def test_from_dask_dataframe(): np.random.seed(0) df = pd.DataFrame({'x': np.random.randint(0, 5, size=20), 'y': np.random.normal(size=20)}) ddf = dd.from_pandas(df, npartitions=2) dgdf = dgd.from_dask_dataframe(ddf) got = dgdf.compute().to_pandas() expect = df np.testing.assert_array_equal(got.index.values, expect.index.values) np.testing.assert_array_equal(got.x.values, expect.x.values) np.testing.assert_array_equal(got.y.values, expect.y.values)
def test_set_index(nelem): np.random.seed(0) # Use unique index range as the sort may not be stable-ordering x = np.arange(nelem) np.random.shuffle(x) df = pd.DataFrame({'x': x, 'y': np.random.randint(0, nelem, size=nelem)}) ddf = dd.from_pandas(df, npartitions=2) dgdf = dgd.from_dask_dataframe(ddf) expect = ddf.set_index('x').compute() got = dgdf.set_index('x').compute().to_pandas() np.testing.assert_array_equal(got.index.values, expect.index.values) np.testing.assert_array_equal(got.y.values, expect.y.values) assert got.columns == expect.columns
def test_take(nelem, nparts): np.random.seed(0) # # Use unique index range as the sort may not be stable-ordering x = np.random.randint(0, nelem, size=nelem) y = np.random.random(nelem) selected = np.random.randint(0, nelem - 1, size=nelem // 2) df = pd.DataFrame({'x': x, 'y': y}) ddf = dd.from_pandas(df, npartitions=nparts) dgdf = dgd.from_dask_dataframe(ddf) out = dgdf.take(gd.Series(selected), npartitions=5) got = out.compute().to_pandas() expect = df.take(selected) assert 1 < out.npartitions <= 5 np.testing.assert_array_equal(got.index, np.arange(len(got))) np.testing.assert_array_equal(got.x, expect.x) np.testing.assert_array_equal(got.y, expect.y)