def test_from_dask_array_compat_numpy_array_1d(): x = da.ones(10, chunks=3) d1 = from_dask_array(x) # dask assert isinstance(d1, dd.Series) assert (d1.compute().values == x.compute()).all() assert d1.name is None d2 = from_array(x.compute()) # numpy assert isinstance(d1, dd.Series) assert (d2.compute().values == x.compute()).all() assert d2.name is None d1 = from_dask_array(x, columns='name') # dask assert isinstance(d1, dd.Series) assert (d1.compute().values == x.compute()).all() assert d1.name == 'name' d2 = from_array(x.compute(), columns='name') # numpy assert isinstance(d1, dd.Series) assert (d2.compute().values == x.compute()).all() assert d2.name == 'name' # passing list via columns results in DataFrame d1 = from_dask_array(x, columns=['name']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['name'])) d2 = from_array(x.compute(), columns=['name']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['name']))
def test_Series_from_dask_array(): x = da.ones(10, chunks=4) ser = from_dask_array(x, 'a') assert ser.name == 'a' assert list(ser.divisions) == [0, 4, 8, 10] assert (ser.compute(get=get_sync).values == x.compute(get=get_sync)).all() ser = from_dask_array(x) assert ser.name is None
def test_Series_from_dask_array(): x = da.ones(10, chunks=4) ser = from_dask_array(x, 'a') assert ser.name == 'a' assert list(ser.divisions) == [0, 4, 8, 9] assert (ser.compute(get=get_sync).values == x.compute(get=get_sync)).all() ser = from_dask_array(x) assert ser.name is None # dd.from_array should re-route to from_dask_array ser2 = dd.from_array(x) assert eq(ser, ser2)
def test_Series_from_dask_array(): x = da.ones(10, chunks=4) ser = from_dask_array(x, "a") assert ser.name == "a" assert list(ser.divisions) == [0, 4, 8, 9] assert (ser.compute(get=get_sync).values == x.compute(get=get_sync)).all() ser = from_dask_array(x) assert ser.name is None # dd.from_array should re-route to from_dask_array ser2 = dd.from_array(x) assert eq(ser, ser2)
def test_from_dask_array_raises(): x = da.ones((3, 3, 3), chunks=2) pytest.raises(ValueError, lambda: from_dask_array(x)) x = da.ones((10, 3), chunks=(3, 3)) pytest.raises(ValueError, lambda: from_dask_array(x)) # no columns # Not enough columns pytest.raises(ValueError, lambda: from_dask_array(x, columns=['a'])) try: from_dask_array(x, columns=['hello']) except Exception as e: assert 'hello' in str(e) assert '3' in str(e)
def test_from_dask_array_raises(): x = da.ones((3, 3, 3), chunks=2) pytest.raises(ValueError, lambda: from_dask_array(x)) x = da.ones((10, 3), chunks=(3, 3)) pytest.raises(ValueError, lambda: from_dask_array(x)) # no columns # Not enough columns pytest.raises(ValueError, lambda: from_dask_array(x, columns=["a"])) try: from_dask_array(x, columns=["hello"]) except Exception as e: assert "hello" in str(e) assert "3" in str(e)
def test_DataFrame_from_dask_array(): x = da.ones((10, 3), chunks=(4, 2)) df = from_dask_array(x, ['a', 'b', 'c']) assert list(df.columns) == ['a', 'b', 'c'] assert list(df.divisions) == [0, 4, 8, 10] assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all()
def test_DataFrame_from_dask_array(): x = da.ones((10, 3), chunks=(4, 2)) df = from_dask_array(x, ['a', 'b', 'c']) assert list(df.columns) == ['a', 'b', 'c'] assert list(df.divisions) == [0, 4, 8, 9] assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all() # dd.from_array should re-route to from_dask_array df2 = dd.from_array(x, columns=['a', 'b', 'c']) assert df2.columns == df.columns assert df2.divisions == df.divisions
def test_DataFrame_from_dask_array(): x = da.ones((10, 3), chunks=(4, 2)) df = from_dask_array(x, ["a", "b", "c"]) assert list(df.columns) == ["a", "b", "c"] assert list(df.divisions) == [0, 4, 8, 9] assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all() # dd.from_array should re-route to from_dask_array df2 = dd.from_array(x, columns=["a", "b", "c"]) assert df2.columns == df.columns assert df2.divisions == df.divisions
def test_from_dask_array_compat_numpy_array(): x = da.ones((3, 3, 3), chunks=2) with pytest.raises(ValueError): from_dask_array(x) # dask with pytest.raises(ValueError): from_array(x.compute()) # numpy x = da.ones((10, 3), chunks=(3, 3)) d1 = from_dask_array(x) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2])) d2 = from_array(x.compute()) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2])) with pytest.raises(ValueError): from_dask_array(x, columns=['a']) # dask with pytest.raises(ValueError): from_array(x.compute(), columns=['a']) # numpy d1 = from_dask_array(x, columns=['a', 'b', 'c']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c'])) d2 = from_array(x.compute(), columns=['a', 'b', 'c']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))
def test_DataFrame_from_dask_array(): x = da.ones((10, 3), chunks=(4, 2)) df = from_dask_array(x, ['a', 'b', 'c']) assert isinstance(df, dd.DataFrame) tm.assert_index_equal(df.columns, pd.Index(['a', 'b', 'c'])) assert list(df.divisions) == [0, 4, 8, 9] assert (df.compute(get=get_sync).values == x.compute(get=get_sync)).all() # dd.from_array should re-route to from_dask_array df2 = dd.from_array(x, columns=['a', 'b', 'c']) assert isinstance(df, dd.DataFrame) tm.assert_index_equal(df2.columns, df.columns) assert df2.divisions == df.divisions
def test_from_dask_array_compat_numpy_array(): x = da.ones((3, 3, 3), chunks=2) msg = r"from_array does not input more than 2D array, got array with shape \(3, 3, 3\)" with tm.assertRaisesRegexp(ValueError, msg): from_dask_array(x) # dask with tm.assertRaisesRegexp(ValueError, msg): from_array(x.compute()) # numpy x = da.ones((10, 3), chunks=(3, 3)) d1 = from_dask_array(x) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index([0, 1, 2])) d2 = from_array(x.compute()) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index([0, 1, 2])) msg = r"""Length mismatch: Expected axis has 3 elements, new values have 1 elements""" with tm.assertRaisesRegexp(ValueError, msg): from_dask_array(x, columns=['a']) # dask with tm.assertRaisesRegexp(ValueError, msg): from_array(x.compute(), columns=['a']) # numpy d1 = from_dask_array(x, columns=['a', 'b', 'c']) # dask assert isinstance(d1, dd.DataFrame) assert (d1.compute().values == x.compute()).all() tm.assert_index_equal(d1.columns, pd.Index(['a', 'b', 'c'])) d2 = from_array(x.compute(), columns=['a', 'b', 'c']) # numpy assert isinstance(d1, dd.DataFrame) assert (d2.compute().values == x.compute()).all() tm.assert_index_equal(d2.columns, pd.Index(['a', 'b', 'c']))