Esempio n. 1
0
def test_from_dask_dataframe():
    np.random.seed(0)
    df = pd.DataFrame({'x': np.random.randint(0, 5, size=20),
                       'y': np.random.normal(size=20)})
    ddf = dd.from_pandas(df, npartitions=2)
    dgdf = dgd.from_dask_dataframe(ddf)
    got = dgdf.compute().to_pandas()
    expect = df

    np.testing.assert_array_equal(got.index.values, expect.index.values)
    np.testing.assert_array_equal(got.x.values, expect.x.values)
    np.testing.assert_array_equal(got.y.values, expect.y.values)
Esempio n. 2
0
def test_set_index(nelem):
    np.random.seed(0)
    # Use unique index range as the sort may not be stable-ordering
    x = np.arange(nelem)
    np.random.shuffle(x)
    df = pd.DataFrame({'x': x,
                       'y': np.random.randint(0, nelem, size=nelem)})
    ddf = dd.from_pandas(df, npartitions=2)
    dgdf = dgd.from_dask_dataframe(ddf)

    expect = ddf.set_index('x').compute()
    got = dgdf.set_index('x').compute().to_pandas()

    np.testing.assert_array_equal(got.index.values, expect.index.values)
    np.testing.assert_array_equal(got.y.values, expect.y.values)
    assert got.columns == expect.columns
Esempio n. 3
0
def test_take(nelem, nparts):
    np.random.seed(0)

    # # Use unique index range as the sort may not be stable-ordering
    x = np.random.randint(0, nelem, size=nelem)
    y = np.random.random(nelem)

    selected = np.random.randint(0, nelem - 1, size=nelem // 2)

    df = pd.DataFrame({'x': x, 'y': y})

    ddf = dd.from_pandas(df, npartitions=nparts)
    dgdf = dgd.from_dask_dataframe(ddf)
    out = dgdf.take(gd.Series(selected), npartitions=5)
    got = out.compute().to_pandas()

    expect = df.take(selected)
    assert 1 < out.npartitions <= 5
    np.testing.assert_array_equal(got.index, np.arange(len(got)))
    np.testing.assert_array_equal(got.x, expect.x)
    np.testing.assert_array_equal(got.y, expect.y)