def test_from_dask_array_compat_numpy_array_1d(): x = da.ones(10, chunks=3) d1 = from_dask_array(x) # dask assert isinstance(d1, dd.Series) assert (d1.compute().values == x.compute()).all() assert d1.name is None d2 = from_array(x.compute()) # numpy assert isinstance(d1, dd.Series) assert (d2.compute().values == x.compute()).all() assert d2.name is None d1 = from_dask_array(x, columns='name') # dask assert isinstance(d1, dd.Series) assert (d1.compute().values == x.compute()).all() assert d1.name == 'name' d2 = from_array(x.compute(), columns='name') # numpy assert isinstance(d1, dd.Series) assert (d2.compute().values == x.compute()).all() assert d2.name == 'name' # passing list via columns results in DataFrame d1 = from_dask_array(x, columns=['name']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['name'])) d2 = from_array(x.compute(), columns=['name']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['name']))
def test_from_dask_array_compat_numpy_array(): x = da.ones((3, 3, 3), chunks=2) with pytest.raises(ValueError): from_dask_array(x) # dask with pytest.raises(ValueError): from_array(x.compute()) # numpy x = da.ones((10, 3), chunks=(3, 3)) d1 = from_dask_array(x) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2])) d2 = from_array(x.compute()) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2])) with pytest.raises(ValueError): from_dask_array(x, columns=['a']) # dask with pytest.raises(ValueError): from_array(x.compute(), columns=['a']) # numpy d1 = from_dask_array(x, columns=['a', 'b', 'c']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c'])) d2 = from_array(x.compute(), columns=['a', 'b', 'c']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))
def test_meta_from_array(): x = np.array([[1, 2], [3, 4]], dtype=np.int64) res = dd.io._meta_from_array(x) assert isinstance(res, pd.DataFrame) assert res[0].dtype == np.int64 assert res[1].dtype == np.int64 tm.assert_index_equal(res.columns, pd.Index([0, 1])) x = np.array([[1., 2.], [3., 4.]], dtype=np.float64) res = dd.io._meta_from_array(x, columns=['a', 'b']) assert isinstance(res, pd.DataFrame) assert res['a'].dtype == np.float64 assert res['b'].dtype == np.float64 tm.assert_index_equal(res.columns, pd.Index(['a', 'b'])) with pytest.raises(ValueError): dd.io._meta_from_array(x, columns=['a', 'b', 'c']) np.random.seed(42) x = np.random.rand(201, 2) x = from_array(x, chunksize=50, columns=['a', 'b']) assert len(x.divisions) == 6 # Should be 5 partitions and the end
def test_dummy_from_array(): x = np.array([[1, 2], [3, 4]], dtype=np.int64) res = dd.io._dummy_from_array(x) assert isinstance(res, pd.DataFrame) assert res[0].dtype == np.int64 assert res[1].dtype == np.int64 tm.assert_index_equal(res.columns, pd.Index([0, 1])) x = np.array([[1., 2.], [3., 4.]], dtype=np.float64) res = dd.io._dummy_from_array(x, columns=['a', 'b']) assert isinstance(res, pd.DataFrame) assert res['a'].dtype == np.float64 assert res['b'].dtype == np.float64 tm.assert_index_equal(res.columns, pd.Index(['a', 'b'])) msg = r"""Length mismatch: Expected axis has 2 elements, new values have 3 elements""" with tm.assertRaisesRegexp(ValueError, msg): dd.io._dummy_from_array(x, columns=['a', 'b', 'c']) np.random.seed(42) x = np.random.rand(201, 2) x = from_array(x, chunksize=50, columns=['a', 'b']) assert len(x.divisions) == 6 # Should be 5 partitions and the end
def test_from_dask_array_compat_numpy_array(): x = da.ones((3, 3, 3), chunks=2) msg = r"from_array does not input more than 2D array, got array with shape \(3, 3, 3\)" with tm.assertRaisesRegexp(ValueError, msg): from_dask_array(x) # dask with tm.assertRaisesRegexp(ValueError, msg): from_array(x.compute()) # numpy x = da.ones((10, 3), chunks=(3, 3)) d1 = from_dask_array(x) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2])) d2 = from_array(x.compute()) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2])) msg = r"""Length mismatch: Expected axis has 3 elements, new values have 1 elements""" with tm.assertRaisesRegexp(ValueError, msg): from_dask_array(x, columns=['a']) # dask with tm.assertRaisesRegexp(ValueError, msg): from_array(x.compute(), columns=['a']) # numpy d1 = from_dask_array(x, columns=['a', 'b', 'c']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c'])) d2 = from_array(x.compute(), columns=['a', 'b', 'c']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))