Beispiel #1
0
def test_indexed_concat():
    A = pd.DataFrame({
        'x': [1, 2, 3, 4, 6, 7],
        'y': list('abcdef')
    },
                     index=[1, 2, 3, 4, 6, 7])
    a = dd.repartition(A, [1, 4, 7])

    B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]}, index=[1, 2, 4, 5, 6, 8])
    b = dd.repartition(B, [1, 2, 5, 8])

    for how in ['inner', 'outer']:
        c = concat_indexed_dataframes([a, b], join=how)

        result = c.compute()
        expected = pd.concat([A, B], 0, how)

        assert list(result.columns) == list(expected.columns)

        assert sorted(zip(result.values.tolist(), result.index.values.tolist())) == \
               sorted(zip(expected.values.tolist(), expected.index.values.tolist()))

    assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) == \
           sorted(concat_indexed_dataframes([a, b], join='inner').dask)
    assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) != \
           sorted(concat_indexed_dataframes([a, b], join='outer').dask)
Beispiel #2
0
def test_indexed_concat(join):
    A = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')},
                     index=[1, 2, 3, 4, 6, 7])
    a = dd.repartition(A, [1, 4, 7])

    B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]},
                     index=[1, 2, 4, 5, 6, 8])
    b = dd.repartition(B, [1, 2, 5, 8])

    result = concat_indexed_dataframes([a, b], join=join)
    expected = pd.concat([A, B], axis=0, join=join)
    assert eq(result, expected)

    assert sorted(concat_indexed_dataframes([a, b], join=join).dask) == \
           sorted(concat_indexed_dataframes([a, b], join=join).dask)
    assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) != \
           sorted(concat_indexed_dataframes([a, b], join='outer').dask)
Beispiel #3
0
def test_indexed_concat():
    A = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')},
                     index=[1, 2, 3, 4, 6, 7])
    a = dd.repartition(A, [1, 4, 7])

    B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]},
                     index=[1, 2, 4, 5, 6, 8])
    b = dd.repartition(B, [1, 2, 5, 8])

    for how in ['inner', 'outer']:
        c = concat_indexed_dataframes([a, b], join=how)

        result = c.compute()
        expected = pd.concat([A, B], 0, how)

        assert list(result.columns) == list(expected.columns)

        assert sorted(zip(result.values.tolist(), result.index.values.tolist())) == \
               sorted(zip(expected.values.tolist(), expected.index.values.tolist()))

    assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) == \
           sorted(concat_indexed_dataframes([a, b], join='inner').dask)
    assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) != \
           sorted(concat_indexed_dataframes([a, b], join='outer').dask)