コード例 #1
0
def test_join_indexed_dataframe_to_indexed_dataframe():
    A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6]},
                     index=[1, 2, 3, 4, 6, 7])
    a = dd.repartition(A, [1, 4, 7])

    B = pd.DataFrame({'y': list('abcdef')},
                     index=[1, 2, 4, 5, 6, 8])
    b = dd.repartition(B, [1, 2, 5, 8])

    c = join_indexed_dataframes(a, b, how='left')
    assert c.divisions[0] == a.divisions[0]
    assert c.divisions[-1] == max(a.divisions + b.divisions)
    assert eq(c, A.join(B))

    c = join_indexed_dataframes(a, b, how='right')
    assert c.divisions[0] == b.divisions[0]
    assert c.divisions[-1] == b.divisions[-1]
    assert eq(c, A.join(B, how='right'))

    c = join_indexed_dataframes(a, b, how='inner')
    assert c.divisions[0] == 1
    assert c.divisions[-1] == max(a.divisions + b.divisions)
    assert eq(c.compute(), A.join(B, how='inner'))

    c = join_indexed_dataframes(a, b, how='outer')
    assert c.divisions[0] == 1
    assert c.divisions[-1] == 8
    assert eq(c.compute(), A.join(B, how='outer'))

    assert sorted(join_indexed_dataframes(a, b, how='inner').dask) == \
           sorted(join_indexed_dataframes(a, b, how='inner').dask)
    assert sorted(join_indexed_dataframes(a, b, how='inner').dask) != \
           sorted(join_indexed_dataframes(a, b, how='outer').dask)
コード例 #2
0
def test_concat(join):
    pdf1 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7],
                         'y': list('abcdef')},
                        index=[1, 2, 3, 4, 6, 7])
    ddf1 = dd.from_pandas(pdf1, 2)
    pdf2 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7],
                         'y': list('abcdef')},
                        index=[8, 9, 10, 11, 12, 13])
    ddf2 = dd.from_pandas(pdf2, 2)

    # different columns
    pdf3 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7],
                         'z': list('abcdef')},
                        index=[8, 9, 10, 11, 12, 13])
    ddf3 = dd.from_pandas(pdf3, 2)

    for (dd1, dd2, pd1, pd2) in [(ddf1, ddf2, pdf1, pdf2),
                                 (ddf1, ddf3, pdf1, pdf3)]:
        result = dd.concat([dd1, dd2], join=join)
        expected = pd.concat([pd1, pd2], join=join)
        assert eq(result, expected)

    # test outer only, inner has a problem on pandas side
    for (dd1, dd2, pd1, pd2) in [(ddf1, ddf2, pdf1, pdf2),
                                 (ddf1, ddf3, pdf1, pdf3),
                                 (ddf1.x, ddf2.x, pdf1.x, pdf2.x),
                                 (ddf1.x, ddf3.z, pdf1.x, pdf3.z),
                                 (ddf1.x, ddf2.x, pdf1.x, pdf2.x),
                                 (ddf1.x, ddf3.z, pdf1.x, pdf3.z)]:
        result = dd.concat([dd1, dd2])
        expected = pd.concat([pd1, pd2])
        assert eq(result, expected)
コード例 #3
0
ファイル: test_multi.py プロジェクト: PedroMDuarte/dask
def test_concat(join):
    pdf1 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7],
                         'y': list('abcdef')},
                        index=[1, 2, 3, 4, 6, 7])
    ddf1 = dd.from_pandas(pdf1, 2)
    pdf2 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7],
                         'y': list('abcdef')},
                        index=[8, 9, 10, 11, 12, 13])
    ddf2 = dd.from_pandas(pdf2, 2)

    # different columns
    pdf3 = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7],
                         'z': list('abcdef')},
                        index=[8, 9, 10, 11, 12, 13])
    ddf3 = dd.from_pandas(pdf3, 2)

    for (dd1, dd2, pd1, pd2) in [(ddf1, ddf2, pdf1, pdf2),
                                 (ddf1, ddf3, pdf1, pdf3)]:
        result = dd.concat([dd1, dd2], join=join)
        expected = pd.concat([pd1, pd2], join=join)
        assert eq(result, expected)

    # test outer only, inner has a problem on pandas side
    for (dd1, dd2, pd1, pd2) in [(ddf1, ddf2, pdf1, pdf2),
                                 (ddf1, ddf3, pdf1, pdf3),
                                 (ddf1.x, ddf2.x, pdf1.x, pdf2.x),
                                 (ddf1.x, ddf3.z, pdf1.x, pdf3.z),
                                 (ddf1.x, ddf2.x, pdf1.x, pdf2.x),
                                 (ddf1.x, ddf3.z, pdf1.x, pdf3.z)]:
        result = dd.concat([dd1, dd2])
        expected = pd.concat([pd1, pd2])
        assert eq(result, expected)
コード例 #4
0
ファイル: test_multi.py プロジェクト: PedroMDuarte/dask
def test_join_indexed_dataframe_to_indexed_dataframe():
    A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6]},
                     index=[1, 2, 3, 4, 6, 7])
    a = dd.repartition(A, [1, 4, 7])

    B = pd.DataFrame({'y': list('abcdef')},
                     index=[1, 2, 4, 5, 6, 8])
    b = dd.repartition(B, [1, 2, 5, 8])

    c = join_indexed_dataframes(a, b, how='left')
    assert c.divisions[0] == a.divisions[0]
    assert c.divisions[-1] == max(a.divisions + b.divisions)
    assert eq(c, A.join(B))

    c = join_indexed_dataframes(a, b, how='right')
    assert c.divisions[0] == b.divisions[0]
    assert c.divisions[-1] == b.divisions[-1]
    assert eq(c, A.join(B, how='right'))

    c = join_indexed_dataframes(a, b, how='inner')
    assert c.divisions[0] == 1
    assert c.divisions[-1] == max(a.divisions + b.divisions)
    assert eq(c.compute(), A.join(B, how='inner'))

    c = join_indexed_dataframes(a, b, how='outer')
    assert c.divisions[0] == 1
    assert c.divisions[-1] == 8
    assert eq(c.compute(), A.join(B, how='outer'))

    assert sorted(join_indexed_dataframes(a, b, how='inner').dask) == \
           sorted(join_indexed_dataframes(a, b, how='inner').dask)
    assert sorted(join_indexed_dataframes(a, b, how='inner').dask) != \
           sorted(join_indexed_dataframes(a, b, how='outer').dask)
コード例 #5
0
ファイル: test_multi.py プロジェクト: qwshy/dask
def test_merge(how):
    A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': [1, 1, 2, 2, 3, 4]})
    a = dd.repartition(A, [0, 4, 5])

    B = pd.DataFrame({'y': [1, 3, 4, 4, 5, 6], 'z': [6, 5, 4, 3, 2, 1]})
    b = dd.repartition(B, [0, 2, 5])

    eq(dd.merge(a, b, left_index=True, right_index=True),
       pd.merge(A, B, left_index=True, right_index=True))

    result = dd.merge(a, b, on='y', how=how)
    list_eq(result, pd.merge(A, B, on='y', how=how))
    assert all(d is None for d in result.divisions)

    list_eq(dd.merge(a, b, left_on='x', right_on='z', how=how),
            pd.merge(A, B, left_on='x', right_on='z', how=how))
    list_eq(
        dd.merge(a, b, left_on='x', right_on='z', how=how,
                 suffixes=('1', '2')),
        pd.merge(A, B, left_on='x', right_on='z', how=how,
                 suffixes=('1', '2')))

    list_eq(dd.merge(a, b, how=how), pd.merge(A, B, how=how))
    list_eq(dd.merge(a, B, how=how), pd.merge(A, B, how=how))
    list_eq(dd.merge(A, b, how=how), pd.merge(A, B, how=how))
    list_eq(dd.merge(A, B, how=how), pd.merge(A, B, how=how))

    list_eq(dd.merge(a, b, left_index=True, right_index=True, how=how),
            pd.merge(A, B, left_index=True, right_index=True, how=how))
    list_eq(
        dd.merge(a,
                 b,
                 left_index=True,
                 right_index=True,
                 how=how,
                 suffixes=('1', '2')),
        pd.merge(A,
                 B,
                 left_index=True,
                 right_index=True,
                 how=how,
                 suffixes=('1', '2')))

    list_eq(dd.merge(a, b, left_on='x', right_index=True, how=how),
            pd.merge(A, B, left_on='x', right_index=True, how=how))
    list_eq(
        dd.merge(a,
                 b,
                 left_on='x',
                 right_index=True,
                 how=how,
                 suffixes=('1', '2')),
        pd.merge(A,
                 B,
                 left_on='x',
                 right_index=True,
                 how=how,
                 suffixes=('1', '2')))
コード例 #6
0
ファイル: test_multi.py プロジェクト: PedroMDuarte/dask
 def _check(a, b, aa, bb):
     assert isinstance(a, dd.DataFrame)
     assert isinstance(b, dd.DataFrame)
     assert isinstance(aa, dd.DataFrame)
     assert isinstance(bb, dd.DataFrame)
     assert eq(a, aa)
     assert eq(b, bb)
     assert divisions == (10, 30, 40, 60, 80, 100)
     assert isinstance(L, list)
     assert len(divisions) == 1 + len(L)
コード例 #7
0
 def _check(a, b, aa, bb):
     assert isinstance(a, dd.DataFrame)
     assert isinstance(b, dd.DataFrame)
     assert isinstance(aa, dd.DataFrame)
     assert isinstance(bb, dd.DataFrame)
     assert eq(a, aa)
     assert eq(b, bb)
     assert divisions == (10, 30, 40, 60, 80, 100)
     assert isinstance(L, list)
     assert len(divisions) == 1 + len(L)
コード例 #8
0
ファイル: test_multi.py プロジェクト: roxyboy/dask
def test_align_partitions():
    A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')},
                     index=[10, 20, 30, 40, 50, 60])
    a = dd.repartition(A, [10, 40, 60])

    B = pd.DataFrame({'x': [1, 2, 3, 4], 'y': list('abda')},
                     index=[30, 70, 80, 100])
    b = dd.repartition(B, [30, 80, 100])

    (aa, bb), divisions, L = align_partitions(a, b)
    assert isinstance(a, dd.DataFrame)
    assert isinstance(b, dd.DataFrame)
    assert divisions == (10, 30, 40, 60, 80, 100)
    assert isinstance(L, list)
    assert len(divisions) == 1 + len(L)
    assert L == [[(aa._name, 0), (bb._name, 0)],
                 [(aa._name, 1), (bb._name, 1)],
                 [(aa._name, 2), (bb._name, 2)],
                 [(aa._name, 3), (bb._name, 3)],
                 [(aa._name, 4), (bb._name, 4)]]

    ldf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                        'b': [7, 6, 5, 4, 3, 2, 1]})
    rdf = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7],
                        'd': [7, 6, 5, 4, 3, 2, 1]})

    for lhs, rhs in [(dd.from_pandas(ldf, 1), dd.from_pandas(rdf, 1)),
                     (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 2)),
                     (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 3)),
                     (dd.from_pandas(ldf, 3), dd.from_pandas(rdf, 2))]:
        (lresult, rresult), div, parts = dd.multi.align_partitions(lhs, rhs)
        assert eq(lresult, ldf)
        assert eq(rresult, rdf)

    # different index
    ldf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                        'b': [7, 6, 5, 4, 3, 2, 1]},
                       index=list('abcdefg'))
    rdf = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7],
                        'd': [7, 6, 5, 4, 3, 2, 1]},
                       index=list('fghijkl'))

    for lhs, rhs in [(dd.from_pandas(ldf, 1), dd.from_pandas(rdf, 1)),
                     (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 2)),
                     (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 3)),
                     (dd.from_pandas(ldf, 3), dd.from_pandas(rdf, 2))]:
        (lresult, rresult), div, parts = dd.multi.align_partitions(lhs, rhs)
        assert eq(lresult, ldf)
        assert eq(rresult, rdf)
コード例 #9
0
ファイル: test_multi.py プロジェクト: bj-wangjia/dask
def test_merge_by_multiple_columns():

    pdf1l = pd.DataFrame({'a': list('abcdefghij'),
                          'b': list('abcdefghij'),
                          'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
                         index=list('abcdefghij'))
    pdf1r = pd.DataFrame({'d': list('abcdefghij'),
                          'e': list('abcdefghij'),
                          'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]},
                         index=list('abcdefghij'))

    pdf2l = pd.DataFrame({'a': list('abcdeabcde'),
                          'b': list('abcabcabca'),
                          'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
                         index=list('abcdefghij'))
    pdf2r = pd.DataFrame({'d': list('edcbaedcba'),
                          'e': list('aaabbbcccd'),
                          'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]},
                         index=list('fghijklmno'))

    pdf3l = pd.DataFrame({'a': list('aaaaaaaaaa'),
                          'b': list('aaaaaaaaaa'),
                          'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
                         index=list('abcdefghij'))
    pdf3r = pd.DataFrame({'d': list('aaabbbccaa'),
                          'e': list('abbbbbbbbb'),
                          'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]},
                         index=list('ABCDEFGHIJ'))

    for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r)]:

        for lpart, rpart in [(2, 2), (3, 2), (2, 3)]:

            ddl = dd.from_pandas(pdl, lpart)
            ddr = dd.from_pandas(pdr, rpart)

            for how in ['inner', 'outer', 'left', 'right']:
                eq(ddl.join(ddr, how=how), pdl.join(pdr, how=how))
                eq(ddr.join(ddl, how=how), pdr.join(pdl, how=how))

                eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
                   pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))
                eq(dd.merge(ddr, ddl, how=how, left_index=True, right_index=True),
                   pd.merge(pdr, pdl, how=how, left_index=True, right_index=True))

                # hash join
                list_eq(dd.merge(ddl, ddr, how=how, left_on='a', right_on='d'),
                        pd.merge(pdl, pdr, how=how, left_on='a', right_on='d'))
                list_eq(dd.merge(ddl, ddr, how=how, left_on='b', right_on='e'),
                        pd.merge(pdl, pdr, how=how, left_on='b', right_on='e'))

                list_eq(dd.merge(ddr, ddl, how=how, left_on='d', right_on='a'),
                        pd.merge(pdr, pdl, how=how, left_on='d', right_on='a'))
                list_eq(dd.merge(ddr, ddl, how=how, left_on='e', right_on='b'),
                        pd.merge(pdr, pdl, how=how, left_on='e', right_on='b'))

                list_eq(dd.merge(ddl, ddr, how=how, left_on=['a', 'b'], right_on=['d', 'e']),
                        pd.merge(pdl, pdr, how=how, left_on=['a', 'b'], right_on=['d', 'e']))
コード例 #10
0
def test_merge_by_multiple_columns(how):

    pdf1l = pd.DataFrame({'a': list('abcdefghij'),
                          'b': list('abcdefghij'),
                          'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
                         index=list('abcdefghij'))
    pdf1r = pd.DataFrame({'d': list('abcdefghij'),
                          'e': list('abcdefghij'),
                          'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]},
                         index=list('abcdefghij'))

    pdf2l = pd.DataFrame({'a': list('abcdeabcde'),
                          'b': list('abcabcabca'),
                          'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
                         index=list('abcdefghij'))
    pdf2r = pd.DataFrame({'d': list('edcbaedcba'),
                          'e': list('aaabbbcccd'),
                          'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]},
                         index=list('fghijklmno'))

    pdf3l = pd.DataFrame({'a': list('aaaaaaaaaa'),
                          'b': list('aaaaaaaaaa'),
                          'c': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]},
                         index=list('abcdefghij'))
    pdf3r = pd.DataFrame({'d': list('aaabbbccaa'),
                          'e': list('abbbbbbbbb'),
                          'f': [10, 9, 8, 7, 6, 5, 4, 3, 2, 1]},
                         index=list('ABCDEFGHIJ'))

    for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r)]:

        for lpart, rpart in [(2, 2), (3, 2), (2, 3)]:

            ddl = dd.from_pandas(pdl, lpart)
            ddr = dd.from_pandas(pdr, rpart)

            eq(ddl.join(ddr, how=how), pdl.join(pdr, how=how))
            eq(ddr.join(ddl, how=how), pdr.join(pdl, how=how))

            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))
            eq(dd.merge(ddr, ddl, how=how, left_index=True, right_index=True),
               pd.merge(pdr, pdl, how=how, left_index=True, right_index=True))

            # hash join
            list_eq(dd.merge(ddl, ddr, how=how, left_on='a', right_on='d'),
                    pd.merge(pdl, pdr, how=how, left_on='a', right_on='d'))
            list_eq(dd.merge(ddl, ddr, how=how, left_on='b', right_on='e'),
                    pd.merge(pdl, pdr, how=how, left_on='b', right_on='e'))

            list_eq(dd.merge(ddr, ddl, how=how, left_on='d', right_on='a'),
                    pd.merge(pdr, pdl, how=how, left_on='d', right_on='a'))
            list_eq(dd.merge(ddr, ddl, how=how, left_on='e', right_on='b'),
                    pd.merge(pdr, pdl, how=how, left_on='e', right_on='b'))

            list_eq(dd.merge(ddl, ddr, how=how, left_on=['a', 'b'], right_on=['d', 'e']),
                    pd.merge(pdl, pdr, how=how, left_on=['a', 'b'], right_on=['d', 'e']))
コード例 #11
0
ファイル: test_multi.py プロジェクト: bj-wangjia/dask
def test_merge():
    A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': [1, 1, 2, 2, 3, 4]})
    a = dd.repartition(A, [0, 4, 5])

    B = pd.DataFrame({'y': [1, 3, 4, 4, 5, 6], 'z': [6, 5, 4, 3, 2, 1]})
    b = dd.repartition(B, [0, 2, 5])

    eq(dd.merge(a, b, left_index=True, right_index=True),
       pd.merge(A, B, left_index=True, right_index=True))

    for how in ['inner', 'outer', 'left', 'right']:

        result = dd.merge(a, b, on='y', how=how)
        list_eq(result, pd.merge(A, B, on='y', how=how))
        assert all(d is None for d in result.divisions)

        list_eq(dd.merge(a, b, left_on='x', right_on='z', how=how),
                pd.merge(A, B, left_on='x', right_on='z', how=how))
        list_eq(dd.merge(a, b, left_on='x', right_on='z', how=how,
                         suffixes=('1', '2')),
                pd.merge(A, B, left_on='x', right_on='z', how=how,
                         suffixes=('1', '2')))

        list_eq(dd.merge(a, b, how=how), pd.merge(A, B, how=how))
        list_eq(dd.merge(a, B, how=how), pd.merge(A, B, how=how))
        list_eq(dd.merge(A, b, how=how), pd.merge(A, B, how=how))
        list_eq(dd.merge(A, B, how=how), pd.merge(A, B, how=how))

        list_eq(dd.merge(a, b, left_index=True, right_index=True, how=how),
                pd.merge(A, B, left_index=True, right_index=True, how=how))
        list_eq(dd.merge(a, b, left_index=True, right_index=True, how=how,
                         suffixes=('1', '2')),
                pd.merge(A, B, left_index=True, right_index=True, how=how,
                         suffixes=('1', '2')))

        list_eq(dd.merge(a, b, left_on='x', right_index=True, how=how),
                pd.merge(A, B, left_on='x', right_index=True, how=how))
        list_eq(dd.merge(a, b, left_on='x', right_index=True, how=how,
                         suffixes=('1', '2')),
                pd.merge(A, B, left_on='x', right_index=True, how=how,
                         suffixes=('1', '2')))
コード例 #12
0
ファイル: test_multi.py プロジェクト: PedroMDuarte/dask
def test_indexed_concat(join):
    A = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')},
                     index=[1, 2, 3, 4, 6, 7])
    a = dd.repartition(A, [1, 4, 7])

    B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]},
                     index=[1, 2, 4, 5, 6, 8])
    b = dd.repartition(B, [1, 2, 5, 8])

    result = concat_indexed_dataframes([a, b], join=join)
    expected = pd.concat([A, B], axis=0, join=join)
    assert eq(result, expected)

    assert sorted(concat_indexed_dataframes([a, b], join=join).dask) == \
           sorted(concat_indexed_dataframes([a, b], join=join).dask)
    assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) != \
           sorted(concat_indexed_dataframes([a, b], join='outer').dask)
コード例 #13
0
def test_indexed_concat(join):
    A = pd.DataFrame({'x': [1, 2, 3, 4, 6, 7], 'y': list('abcdef')},
                     index=[1, 2, 3, 4, 6, 7])
    a = dd.repartition(A, [1, 4, 7])

    B = pd.DataFrame({'x': [10, 20, 40, 50, 60, 80]},
                     index=[1, 2, 4, 5, 6, 8])
    b = dd.repartition(B, [1, 2, 5, 8])

    result = concat_indexed_dataframes([a, b], join=join)
    expected = pd.concat([A, B], axis=0, join=join)
    assert eq(result, expected)

    assert sorted(concat_indexed_dataframes([a, b], join=join).dask) == \
           sorted(concat_indexed_dataframes([a, b], join=join).dask)
    assert sorted(concat_indexed_dataframes([a, b], join='inner').dask) != \
           sorted(concat_indexed_dataframes([a, b], join='outer').dask)
コード例 #14
0
ファイル: test_multi.py プロジェクト: PedroMDuarte/dask
def test_join_by_index_patterns(how):

    # Similar test cases as test_merge_by_index_patterns,
    # but columns / index for join have same dtype

    pdf1l = pd.DataFrame({'a': list('abcdefg'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                         index=list('abcdefg'))
    pdf1r = pd.DataFrame({'c': list('abcdefg'),
                          'd': [7, 6, 5, 4, 3, 2, 1]},
                         index=list('abcdefg'))

    pdf2l = pdf1l
    pdf2r = pd.DataFrame({'c': list('gfedcba'),
                          'd': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))

    pdf3l = pdf1l
    pdf3r = pd.DataFrame({'c': list('abdg'),
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    pdf4l = pd.DataFrame({'a': list('abcabce'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf4r = pd.DataFrame({'c': list('abda'),
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    # completely different index
    pdf5l = pd.DataFrame({'a': list('lmnopqr'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('lmnopqr'))
    pdf5r = pd.DataFrame({'c': list('abcd'),
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))

    pdf6l = pd.DataFrame({'a': list('cdefghi'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('cdefghi'))
    pdf6r = pd.DataFrame({'c': list('abab'),
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))

    pdf7l = pd.DataFrame({'a': list('aabbccd'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf7r = pd.DataFrame({'c': list('aabb'),
                          'd': [5, 4, 3, 2]},
                          index=list('fghi'))

    for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r),
                     (pdf4l, pdf4r), (pdf5l, pdf5r), (pdf6l, pdf6r),
                     (pdf7l, pdf7r)]:

        for lpart, rpart in [(2, 2), (3, 2), (2, 3)]:

            ddl = dd.from_pandas(pdl, lpart)
            ddr = dd.from_pandas(pdr, rpart)

            eq(ddl.join(ddr, how=how), pdl.join(pdr, how=how))
            eq(ddr.join(ddl, how=how), pdr.join(pdl, how=how))

            eq(ddl.join(ddr, how=how, lsuffix='l', rsuffix='r'),
               pdl.join(pdr, how=how, lsuffix='l', rsuffix='r'))
            eq(ddr.join(ddl, how=how, lsuffix='l', rsuffix='r'),
               pdr.join(pdl, how=how, lsuffix='l', rsuffix='r'))

            """
コード例 #15
0
ファイル: test_multi.py プロジェクト: PedroMDuarte/dask
def test_merge_by_index_patterns(how):

    pdf1l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]})
    pdf1r = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7],
                          'd': [7, 6, 5, 4, 3, 2, 1]})

    pdf2l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf2r = pd.DataFrame({'c': [7, 6, 5, 4, 3, 2, 1],
                          'd': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))

    pdf3l = pdf2l
    pdf3r = pd.DataFrame({'c': [6, 7, 8, 9],
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    pdf4l = pdf2l
    pdf4r = pd.DataFrame({'c': [9, 10, 11, 12],
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    # completely different index
    pdf5l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('lmnopqr'))
    pdf5r = pd.DataFrame({'c': [1, 1, 1, 1],
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))

    pdf6l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('cdefghi'))
    pdf6r = pd.DataFrame({'c': [1, 2, 1, 2],
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))

    pdf7l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf7r = pd.DataFrame({'c': [5, 6, 7, 8],
                          'd': [5, 4, 3, 2]},
                          index=list('fghi'))

    for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r),
                     (pdf4l, pdf4r), (pdf5l, pdf5r), (pdf6l, pdf6r),
                     (pdf7l, pdf7r)]:

        for lpart, rpart in [(2, 2),  # same partition
                             (3, 2),  # left npartition > right npartition
                             (2, 3)]: # left npartition < right npartition

            ddl = dd.from_pandas(pdl, lpart)
            ddr = dd.from_pandas(pdr, rpart)

            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))
            eq(dd.merge(ddr, ddl, how=how, left_index=True, right_index=True),
               pd.merge(pdr, pdl, how=how, left_index=True, right_index=True))

            eq(ddr.merge(ddl, how=how, left_index=True, right_index=True),
               pdr.merge(pdl, how=how, left_index=True, right_index=True))
            eq(ddl.merge(ddr, how=how, left_index=True, right_index=True),
               pdl.merge(pdr, how=how, left_index=True, right_index=True))

            # hash join
            list_eq(dd.merge(ddl, ddr, how=how, left_on='a', right_on='c'),
                    pd.merge(pdl, pdr, how=how, left_on='a', right_on='c'))
            list_eq(dd.merge(ddl, ddr, how=how, left_on='b', right_on='d'),
                    pd.merge(pdl, pdr, how=how, left_on='b', right_on='d'))

            list_eq(dd.merge(ddr, ddl, how=how, left_on='c', right_on='a'),
                    pd.merge(pdr, pdl, how=how, left_on='c', right_on='a'))
            list_eq(dd.merge(ddr, ddl, how=how, left_on='d', right_on='b'),
                    pd.merge(pdr, pdl, how=how, left_on='d', right_on='b'))

            list_eq(ddl.merge(ddr, how=how, left_on='a', right_on='c'),
                    pdl.merge(pdr, how=how, left_on='a', right_on='c'))
            list_eq(ddl.merge(ddr, how=how, left_on='b', right_on='d'),
                    pdl.merge(pdr, how=how, left_on='b', right_on='d'))

            list_eq(ddr.merge(ddl, how=how, left_on='c', right_on='a'),
                    pdr.merge(pdl, how=how, left_on='c', right_on='a'))
            list_eq(ddr.merge(ddl, how=how, left_on='d', right_on='b'),
                    pdr.merge(pdl, how=how, left_on='d', right_on='b'))
コード例 #16
0
ファイル: test_multi.py プロジェクト: roxyboy/dask
def test_merge_by_index_patterns():

    pdf1l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]})
    pdf1r = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7],
                          'd': [7, 6, 5, 4, 3, 2, 1]})

    pdf2l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf2r = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7],
                          'd': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))

    pdf3l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf3r = pd.DataFrame({'c': [1, 2, 3, 4],
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    pdf4r = pd.DataFrame({'c': [1, 2, 3, 4],
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))
    pdf4l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))

    # completely different index
    pdf5r = pd.DataFrame({'c': [1, 2, 3, 4],
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))
    pdf5l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('lmnopqr'))

    pdf6r = pd.DataFrame({'c': [1, 2, 3, 4],
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))
    pdf6l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('cdefghi'))

    pdf7r = pd.DataFrame({'c': [1, 2, 3, 4],
                          'd': [5, 4, 3, 2]},
                          index=list('fghi'))
    pdf7l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))

    for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r),
                     (pdf4l, pdf4r), (pdf5r, pdf5l), (pdf6r, pdf6l),
                     (pdf7r, pdf7l)]:
        # same partition
        ddl = dd.from_pandas(pdl, 2)
        ddr = dd.from_pandas(pdr, 2)

        for how in ['inner', 'outer', 'left', 'right']:
            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))

        # different partition (left npartition > right npartition)
        ddl = dd.from_pandas(pdl, 3)
        ddr = dd.from_pandas(pdr, 2)

        for how in ['inner', 'outer', 'left', 'right']:
            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))

        # different partition (left npartition < right npartition)
        ddl = dd.from_pandas(pdl, 2)
        ddr = dd.from_pandas(pdr, 3)

        for how in ['inner', 'outer', 'left', 'right']:
            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))
コード例 #17
0
def test_merge_by_index_patterns():

    pdf1l = pd.DataFrame({
        'a': [1, 2, 3, 4, 5, 6, 7],
        'b': [7, 6, 5, 4, 3, 2, 1]
    })
    pdf1r = pd.DataFrame({
        'c': [1, 2, 3, 4, 5, 6, 7],
        'd': [7, 6, 5, 4, 3, 2, 1]
    })

    pdf2l = pd.DataFrame(
        {
            'a': [1, 2, 3, 4, 5, 6, 7],
            'b': [7, 6, 5, 4, 3, 2, 1]
        },
        index=list('abcdefg'))
    pdf2r = pd.DataFrame(
        {
            'c': [1, 2, 3, 4, 5, 6, 7],
            'd': [7, 6, 5, 4, 3, 2, 1]
        },
        index=list('abcdefg'))

    pdf3l = pd.DataFrame(
        {
            'a': [1, 2, 3, 4, 5, 6, 7],
            'b': [7, 6, 5, 4, 3, 2, 1]
        },
        index=list('abcdefg'))
    pdf3r = pd.DataFrame({
        'c': [1, 2, 3, 4],
        'd': [5, 4, 3, 2]
    },
                         index=list('abdg'))

    pdf4r = pd.DataFrame({
        'c': [1, 2, 3, 4],
        'd': [5, 4, 3, 2]
    },
                         index=list('abdg'))
    pdf4l = pd.DataFrame(
        {
            'a': [1, 2, 3, 4, 5, 6, 7],
            'b': [7, 6, 5, 4, 3, 2, 1]
        },
        index=list('abcdefg'))

    # completely different index
    pdf5r = pd.DataFrame({
        'c': [1, 2, 3, 4],
        'd': [5, 4, 3, 2]
    },
                         index=list('abcd'))
    pdf5l = pd.DataFrame(
        {
            'a': [1, 2, 3, 4, 5, 6, 7],
            'b': [7, 6, 5, 4, 3, 2, 1]
        },
        index=list('lmnopqr'))

    pdf6r = pd.DataFrame({
        'c': [1, 2, 3, 4],
        'd': [5, 4, 3, 2]
    },
                         index=list('abcd'))
    pdf6l = pd.DataFrame(
        {
            'a': [1, 2, 3, 4, 5, 6, 7],
            'b': [7, 6, 5, 4, 3, 2, 1]
        },
        index=list('cdefghi'))

    pdf7r = pd.DataFrame({
        'c': [1, 2, 3, 4],
        'd': [5, 4, 3, 2]
    },
                         index=list('fghi'))
    pdf7l = pd.DataFrame(
        {
            'a': [1, 2, 3, 4, 5, 6, 7],
            'b': [7, 6, 5, 4, 3, 2, 1]
        },
        index=list('abcdefg'))

    for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r),
                     (pdf4l, pdf4r), (pdf5r, pdf5l), (pdf6r, pdf6l),
                     (pdf7r, pdf7l)]:
        # same partition
        ddl = dd.from_pandas(pdl, 2)
        ddr = dd.from_pandas(pdr, 2)

        for how in ['inner', 'outer', 'left', 'right']:
            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))

        # different partition (left npartition > right npartition)
        ddl = dd.from_pandas(pdl, 3)
        ddr = dd.from_pandas(pdr, 2)

        for how in ['inner', 'outer', 'left', 'right']:
            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))

        # different partition (left npartition < right npartition)
        ddl = dd.from_pandas(pdl, 2)
        ddr = dd.from_pandas(pdr, 3)

        for how in ['inner', 'outer', 'left', 'right']:
            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))
コード例 #18
0
def test_join_by_index_patterns(how):

    # Similar test cases as test_merge_by_index_patterns,
    # but columns / index for join have same dtype

    pdf1l = pd.DataFrame({'a': list('abcdefg'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                         index=list('abcdefg'))
    pdf1r = pd.DataFrame({'c': list('abcdefg'),
                          'd': [7, 6, 5, 4, 3, 2, 1]},
                         index=list('abcdefg'))

    pdf2l = pdf1l
    pdf2r = pd.DataFrame({'c': list('gfedcba'),
                          'd': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))

    pdf3l = pdf1l
    pdf3r = pd.DataFrame({'c': list('abdg'),
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    pdf4l = pd.DataFrame({'a': list('abcabce'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf4r = pd.DataFrame({'c': list('abda'),
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    # completely different index
    pdf5l = pd.DataFrame({'a': list('lmnopqr'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('lmnopqr'))
    pdf5r = pd.DataFrame({'c': list('abcd'),
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))

    pdf6l = pd.DataFrame({'a': list('cdefghi'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('cdefghi'))
    pdf6r = pd.DataFrame({'c': list('abab'),
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))

    pdf7l = pd.DataFrame({'a': list('aabbccd'),
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf7r = pd.DataFrame({'c': list('aabb'),
                          'd': [5, 4, 3, 2]},
                          index=list('fghi'))

    for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r),
                     (pdf4l, pdf4r), (pdf5l, pdf5r), (pdf6l, pdf6r),
                     (pdf7l, pdf7r)]:

        for lpart, rpart in [(2, 2), (3, 2), (2, 3)]:

            ddl = dd.from_pandas(pdl, lpart)
            ddr = dd.from_pandas(pdr, rpart)

            eq(ddl.join(ddr, how=how), pdl.join(pdr, how=how))
            eq(ddr.join(ddl, how=how), pdr.join(pdl, how=how))

            eq(ddl.join(ddr, how=how, lsuffix='l', rsuffix='r'),
               pdl.join(pdr, how=how, lsuffix='l', rsuffix='r'))
            eq(ddr.join(ddl, how=how, lsuffix='l', rsuffix='r'),
               pdr.join(pdl, how=how, lsuffix='l', rsuffix='r'))

            """
コード例 #19
0
def test_merge_by_index_patterns(how):

    pdf1l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]})
    pdf1r = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7],
                          'd': [7, 6, 5, 4, 3, 2, 1]})

    pdf2l = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf2r = pd.DataFrame({'c': [7, 6, 5, 4, 3, 2, 1],
                          'd': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))

    pdf3l = pdf2l
    pdf3r = pd.DataFrame({'c': [6, 7, 8, 9],
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    pdf4l = pdf2l
    pdf4r = pd.DataFrame({'c': [9, 10, 11, 12],
                          'd': [5, 4, 3, 2]},
                          index=list('abdg'))

    # completely different index
    pdf5l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('lmnopqr'))
    pdf5r = pd.DataFrame({'c': [1, 1, 1, 1],
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))

    pdf6l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('cdefghi'))
    pdf6r = pd.DataFrame({'c': [1, 2, 1, 2],
                          'd': [5, 4, 3, 2]},
                          index=list('abcd'))

    pdf7l = pd.DataFrame({'a': [1, 1, 2, 2, 3, 3, 4],
                          'b': [7, 6, 5, 4, 3, 2, 1]},
                          index=list('abcdefg'))
    pdf7r = pd.DataFrame({'c': [5, 6, 7, 8],
                          'd': [5, 4, 3, 2]},
                          index=list('fghi'))

    for pdl, pdr in [(pdf1l, pdf1r), (pdf2l, pdf2r), (pdf3l, pdf3r),
                     (pdf4l, pdf4r), (pdf5l, pdf5r), (pdf6l, pdf6r),
                     (pdf7l, pdf7r)]:

        for lpart, rpart in [(2, 2),  # same partition
                             (3, 2),  # left npartition > right npartition
                             (2, 3)]: # left npartition < right npartition

            ddl = dd.from_pandas(pdl, lpart)
            ddr = dd.from_pandas(pdr, rpart)

            eq(dd.merge(ddl, ddr, how=how, left_index=True, right_index=True),
               pd.merge(pdl, pdr, how=how, left_index=True, right_index=True))
            eq(dd.merge(ddr, ddl, how=how, left_index=True, right_index=True),
               pd.merge(pdr, pdl, how=how, left_index=True, right_index=True))

            eq(ddr.merge(ddl, how=how, left_index=True, right_index=True),
               pdr.merge(pdl, how=how, left_index=True, right_index=True))
            eq(ddl.merge(ddr, how=how, left_index=True, right_index=True),
               pdl.merge(pdr, how=how, left_index=True, right_index=True))

            # hash join
            list_eq(dd.merge(ddl, ddr, how=how, left_on='a', right_on='c'),
                    pd.merge(pdl, pdr, how=how, left_on='a', right_on='c'))
            list_eq(dd.merge(ddl, ddr, how=how, left_on='b', right_on='d'),
                    pd.merge(pdl, pdr, how=how, left_on='b', right_on='d'))

            list_eq(dd.merge(ddr, ddl, how=how, left_on='c', right_on='a'),
                    pd.merge(pdr, pdl, how=how, left_on='c', right_on='a'))
            list_eq(dd.merge(ddr, ddl, how=how, left_on='d', right_on='b'),
                    pd.merge(pdr, pdl, how=how, left_on='d', right_on='b'))

            list_eq(ddl.merge(ddr, how=how, left_on='a', right_on='c'),
                    pdl.merge(pdr, how=how, left_on='a', right_on='c'))
            list_eq(ddl.merge(ddr, how=how, left_on='b', right_on='d'),
                    pdl.merge(pdr, how=how, left_on='b', right_on='d'))

            list_eq(ddr.merge(ddl, how=how, left_on='c', right_on='a'),
                    pdr.merge(pdl, how=how, left_on='c', right_on='a'))
            list_eq(ddr.merge(ddl, how=how, left_on='d', right_on='b'),
                    pdr.merge(pdl, how=how, left_on='d', right_on='b'))
コード例 #20
0
def test_align_partitions():
    A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6], 'y': list('abdabd')},
                     index=[10, 20, 30, 40, 50, 60])
    a = dd.repartition(A, [10, 40, 60])

    B = pd.DataFrame({'x': [1, 2, 3, 4], 'y': list('abda')},
                     index=[30, 70, 80, 100])
    b = dd.repartition(B, [30, 80, 100])

    s = dd.core.Scalar({('s', 0): 10}, 's')

    (aa, bb), divisions, L = align_partitions(a, b)

    def _check(a, b, aa, bb):
        assert isinstance(a, dd.DataFrame)
        assert isinstance(b, dd.DataFrame)
        assert isinstance(aa, dd.DataFrame)
        assert isinstance(bb, dd.DataFrame)
        assert eq(a, aa)
        assert eq(b, bb)
        assert divisions == (10, 30, 40, 60, 80, 100)
        assert isinstance(L, list)
        assert len(divisions) == 1 + len(L)

    _check(a, b, aa, bb)
    assert L == [[(aa._name, 0), (bb._name, 0)],
                 [(aa._name, 1), (bb._name, 1)],
                 [(aa._name, 2), (bb._name, 2)],
                 [(aa._name, 3), (bb._name, 3)],
                 [(aa._name, 4), (bb._name, 4)]]

    (aa, ss, bb), divisions, L = align_partitions(a, s, b)
    _check(a, b, aa, bb)
    assert L == [[(aa._name, 0), None, (bb._name, 0)],
                 [(aa._name, 1), None, (bb._name, 1)],
                 [(aa._name, 2), None, (bb._name, 2)],
                 [(aa._name, 3), None, (bb._name, 3)],
                 [(aa._name, 4), None, (bb._name, 4)]]
    assert eq(ss, 10)

    ldf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                        'b': [7, 6, 5, 4, 3, 2, 1]})
    rdf = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7],
                        'd': [7, 6, 5, 4, 3, 2, 1]})

    for lhs, rhs in [(dd.from_pandas(ldf, 1), dd.from_pandas(rdf, 1)),
                     (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 2)),
                     (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 3)),
                     (dd.from_pandas(ldf, 3), dd.from_pandas(rdf, 2))]:
        (lresult, rresult), div, parts = dd.multi.align_partitions(lhs, rhs)
        assert eq(lresult, ldf)
        assert eq(rresult, rdf)

    # different index
    ldf = pd.DataFrame({'a': [1, 2, 3, 4, 5, 6, 7],
                        'b': [7, 6, 5, 4, 3, 2, 1]},
                       index=list('abcdefg'))
    rdf = pd.DataFrame({'c': [1, 2, 3, 4, 5, 6, 7],
                        'd': [7, 6, 5, 4, 3, 2, 1]},
                       index=list('fghijkl'))

    for lhs, rhs in [(dd.from_pandas(ldf, 1), dd.from_pandas(rdf, 1)),
                     (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 2)),
                     (dd.from_pandas(ldf, 2), dd.from_pandas(rdf, 3)),
                     (dd.from_pandas(ldf, 3), dd.from_pandas(rdf, 2))]:
        (lresult, rresult), div, parts = dd.multi.align_partitions(lhs, rhs)
        assert eq(lresult, ldf)
        assert eq(rresult, rdf)