def test_join_indexed_dataframe_to_indexed_dataframe(): A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6]}, index=[1, 2, 3, 4, 6, 7]) a = dd.repartition(A, [1, 4, 7]) B = pd.DataFrame({'y': list('abcdef')}, index=[1, 2, 4, 5, 6, 8]) b = dd.repartition(B, [1, 2, 5, 8]) c = join_indexed_dataframes(a, b, how='left') assert c.divisions[0] == a.divisions[0] assert c.divisions[-1] == a.divisions[-1] tm.assert_frame_equal(c.compute(), A.join(B)) c = join_indexed_dataframes(a, b, how='right') assert c.divisions[0] == b.divisions[0] assert c.divisions[-1] == b.divisions[-1] tm.assert_frame_equal(c.compute(), A.join(B, how='right')) c = join_indexed_dataframes(a, b, how='inner') assert c.divisions[0] == 1 assert c.divisions[-1] == 7 tm.assert_frame_equal(c.compute(), A.join(B, how='inner')) c = join_indexed_dataframes(a, b, how='outer') assert c.divisions[0] == 1 assert c.divisions[-1] == 8 tm.assert_frame_equal(c.compute(), A.join(B, how='outer'))
def test_join_indexed_dataframe_to_indexed_dataframe(): A = pd.DataFrame({'x': [1, 2, 3, 4, 5, 6]}, index=[1, 2, 3, 4, 6, 7]) a = dd.repartition(A, [1, 4, 7]) B = pd.DataFrame({'y': list('abcdef')}, index=[1, 2, 4, 5, 6, 8]) b = dd.repartition(B, [1, 2, 5, 8]) c = join_indexed_dataframes(a, b, how='left') assert c.divisions[0] == a.divisions[0] assert c.divisions[-1] == max(a.divisions + b.divisions) assert eq(c, A.join(B)) c = join_indexed_dataframes(a, b, how='right') assert c.divisions[0] == b.divisions[0] assert c.divisions[-1] == b.divisions[-1] assert eq(c, A.join(B, how='right')) c = join_indexed_dataframes(a, b, how='inner') assert c.divisions[0] == 1 assert c.divisions[-1] == max(a.divisions + b.divisions) assert eq(c.compute(), A.join(B, how='inner')) c = join_indexed_dataframes(a, b, how='outer') assert c.divisions[0] == 1 assert c.divisions[-1] == 8 assert eq(c.compute(), A.join(B, how='outer')) assert sorted(join_indexed_dataframes(a, b, how='inner').dask) == \ sorted(join_indexed_dataframes(a, b, how='inner').dask) assert sorted(join_indexed_dataframes(a, b, how='inner').dask) != \ sorted(join_indexed_dataframes(a, b, how='outer').dask)
def test_join_indexed_dataframe_to_indexed_dataframe(): A = pd.DataFrame({"x": [1, 2, 3, 4, 5, 6]}, index=[1, 2, 3, 4, 6, 7]) a = dd.repartition(A, [1, 4, 7]) B = pd.DataFrame({"y": list("abcdef")}, index=[1, 2, 4, 5, 6, 8]) b = dd.repartition(B, [1, 2, 5, 8]) c = join_indexed_dataframes(a, b, how="left") assert c.divisions[0] == a.divisions[0] assert c.divisions[-1] == max(a.divisions + b.divisions) assert eq(c, A.join(B)) c = join_indexed_dataframes(a, b, how="right") assert c.divisions[0] == b.divisions[0] assert c.divisions[-1] == b.divisions[-1] assert eq(c, A.join(B, how="right")) c = join_indexed_dataframes(a, b, how="inner") assert c.divisions[0] == 1 assert c.divisions[-1] == max(a.divisions + b.divisions) assert eq(c.compute(), A.join(B, how="inner")) c = join_indexed_dataframes(a, b, how="outer") assert c.divisions[0] == 1 assert c.divisions[-1] == 8 assert eq(c.compute(), A.join(B, how="outer")) assert sorted(join_indexed_dataframes(a, b, how="inner").dask) == sorted( join_indexed_dataframes(a, b, how="inner").dask ) assert sorted(join_indexed_dataframes(a, b, how="inner").dask) != sorted( join_indexed_dataframes(a, b, how="outer").dask )