Esempio n. 1
0
def test_array_xd_with_none():
    # Fixed shape
    features = datasets.Features(
        {"foo": datasets.Array2D(dtype="int32", shape=(2, 2))})
    dummy_array = np.array([[1, 2], [3, 4]], dtype="int32")
    dataset = datasets.Dataset.from_dict(
        {"foo": [dummy_array, None, dummy_array]}, features=features)
    arr = NumpyArrowExtractor().extract_column(dataset._data)
    assert isinstance(
        arr, np.ndarray) and arr.dtype == np.float64 and arr.shape == (3, 2, 2)
    assert np.allclose(arr[0], dummy_array) and np.allclose(
        arr[2], dummy_array)
    assert np.all(np.isnan(arr[1]))  # broadcasted np.nan - use np.all

    # Dynamic shape
    features = datasets.Features(
        {"foo": datasets.Array2D(dtype="int32", shape=(None, 2))})
    dummy_array = np.array([[1, 2], [3, 4]], dtype="int32")
    dataset = datasets.Dataset.from_dict(
        {"foo": [dummy_array, None, dummy_array]}, features=features)
    arr = NumpyArrowExtractor().extract_column(dataset._data)
    assert isinstance(
        arr, np.ndarray) and arr.dtype == np.object and arr.shape == (3, )
    np.testing.assert_equal(arr[0], dummy_array)
    np.testing.assert_equal(arr[2], dummy_array)
    assert np.isnan(arr[1])  # a single np.nan value - np.all not needed
Esempio n. 2
0
 def test_numpy_extractor(self):
     pa_table = self._create_dummy_table()
     extractor = NumpyArrowExtractor()
     row = extractor.extract_row(pa_table)
     np.testing.assert_equal(row, {"a": _COL_A[0], "b": _COL_B[0], "c": np.array(_COL_C[0])})
     col = extractor.extract_column(pa_table)
     np.testing.assert_equal(col, np.array(_COL_A))
     batch = extractor.extract_batch(pa_table)
     np.testing.assert_equal(batch, {"a": np.array(_COL_A), "b": np.array(_COL_B), "c": np.array(_COL_C)})
Esempio n. 3
0
 def test_numpy_extractor_np_array_kwargs(self):
     pa_table = self._create_dummy_table().drop(["b"])
     extractor = NumpyArrowExtractor(dtype=np.float16)
     row = extractor.extract_row(pa_table)
     self.assertEqual(row["c"].dtype, np.dtype(np.float16))
     col = extractor.extract_column(pa_table)
     self.assertEqual(col.dtype, np.float16)
     batch = extractor.extract_batch(pa_table)
     self.assertEqual(batch["a"].dtype, np.dtype(np.float16))
     self.assertEqual(batch["c"].dtype, np.dtype(np.float16))
Esempio n. 4
0
def test_array_xd_numpy_arrow_extractor(dtype, dummy_value):
    features = datasets.Features(
        {"foo": datasets.Array2D(dtype=dtype, shape=(2, 2))})
    dataset = datasets.Dataset.from_dict({"foo": [[[dummy_value] * 2] * 2]},
                                         features=features)
    arr = NumpyArrowExtractor().extract_column(dataset._data)
    assert isinstance(arr, np.ndarray)
    np.testing.assert_equal(
        arr, np.array([[[dummy_value] * 2] * 2], dtype=np.dtype(dtype)))