Beispiel #1
0
def test_categorize():
    dsk = {
        ('x', 0):
        pd.DataFrame({
            'a': ['Alice', 'Bob', 'Alice'],
            'b': ['C', 'D', 'E']
        },
                     index=[0, 1, 2]),
        ('x', 1):
        pd.DataFrame({
            'a': ['Bob', 'Charlie', 'Charlie'],
            'b': ['A', 'A', 'B']
        },
                     index=[3, 4, 5])
    }
    d = dd.DataFrame(dsk, 'x', ['a', 'b'], [3])
    full = d.compute()

    c = d.categorize('a')
    cfull = c.compute()
    assert cfull.dtypes['a'] == 'category'
    assert cfull.dtypes['b'] == 'O'

    assert list(cfull.a.astype('O')) == list(full.a)

    assert (get(c.dask, c._keys()[:1])[0].dtypes == cfull.dtypes).all()

    assert (d.categorize().compute().dtypes == 'category').all()
Beispiel #2
0
def test_repartition():
    df = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')},
                      index=[10, 20, 30, 40, 50, 60])
    a = dd.from_pandas(df, 2)

    b = a.repartition(divisions=[10, 20, 50, 60])
    assert b.divisions == (10, 20, 50, 60)
    assert eq(a, b)
    assert eq(get(b.dask, (b._name, 0)), df.iloc[:1])
Beispiel #3
0
def test_repartition():
    df = pd.DataFrame({
        'x': [1, 2, 3, 4, 5, 6],
        'y': list('abdabd')
    },
                      index=[10, 20, 30, 40, 50, 60])
    a = dd.from_pandas(df, 2)

    b = a.repartition(divisions=[10, 20, 50, 60])
    assert b.divisions == (10, 20, 50, 60)
    assert eq(a, b)
    assert eq(get(b.dask, (b._name, 0)), df.iloc[:1])
Beispiel #4
0
def test_categorize():
    dsk = {('x', 0): pd.DataFrame({'a': ['Alice', 'Bob', 'Alice'],
                                   'b': ['C', 'D', 'E']},
                                   index=[0, 1, 2]),
           ('x', 1): pd.DataFrame({'a': ['Bob', 'Charlie', 'Charlie'],
                                   'b': ['A', 'A', 'B']},
                                   index=[3, 4, 5])}
    d = dd.DataFrame(dsk, 'x', ['a', 'b'], [3])
    full = d.compute()

    c = d.categorize('a')
    cfull = c.compute()
    assert cfull.dtypes['a'] == 'category'
    assert cfull.dtypes['b'] == 'O'

    assert list(cfull.a.astype('O')) == list(full.a)

    assert (get(c.dask, c._keys()[:1])[0].dtypes == cfull.dtypes).all()

    assert (d.categorize().compute().dtypes == 'category').all()