def test_join_by_none(): df1 = tibble(x=[1, 2, 3], y=[3, 4, 5]) df2 = tibble(x=[2, 3, 4], z=[5, 6, 7]) out = df1 >> inner_join(df2, keep=True) assert_frame_equal(out, tibble(x_x=[2, 3], y=[4, 5], x_y=[2, 3], z=[5, 6])) out = df1 >> inner_join(df2, keep=False) assert_frame_equal(out, tibble(x=[2, 3], y=[4, 5], z=[5, 6]))
def test_keys_are_coerced_to_symmetric_type(): foo = tibble(id=factor(c("a", "b")), var1="foo") bar = tibble(id=c("a", "b"), var2="bar") idcoltype = inner_join(foo, bar, by="id").id.dtype.name assert idcoltype != "category" idcoltype = inner_join(bar, foo, by="id").id.dtype.name assert idcoltype != "category" df1 = tibble(x=1, y=factor("a")) df2 = tibble(x=2, y=factor("b")) out = full_join(df1, df2, by=["x", "y"]) assert out.y.dtype.name == "category"
def test_group_column_names_reflect_renamed_duplicate_columns(): # test_that("group column names reflect renamed duplicate columns (#2330)", { df1 = tibble(x=range(1, 6), y=range(1, 6)) >> group_by(f.x, f.y) df2 = tibble(x=range(1, 6), y=range(1, 6)) out = inner_join(df1, df2, by="x") assert group_vars(out) == ["x"]
def test_joins_matches_nas_by_default(): # test_that("joins matches NAs by default (#892, #2033)", { df1 = tibble(x=c(None, 1)) df2 = tibble(x=c(None, 2)) assert nrow(inner_join(df1, df2, by=f.x)) == 1 assert nrow(semi_join(df1, df2, by=f.x)) == 1
def test_when_keep_eqs_true_inner_join_preserves_both_sets_of_keys(): # test_that("when keep = True, inner_join() preserves both sets of keys # (#5581)", { # when keys have different names df1 = tibble(a=c(2, 3), b=c(1, 2)) df2 = tibble(x=c(3, 4), y=c(3, 4)) out = inner_join(df1, df2, by=dict(a="x"), keep=True) assert out.a.tolist() == [3] assert out.x.tolist() == [3] # when keys have same name df1 = tibble(a=c(2, 3), b=c(1, 2)) df2 = tibble(a=c(3, 4), y=c(3, 4)) out = inner_join(df1, df2, by=c("a"), keep=True) assert out["a_x"].tolist() == [3] assert out["a_y"].tolist() == [3]
def test_joins_preserve_groups(): gf1 = tibble(a=[1, 2, 3]) >> group_by(f.a) gf2 = tibble(a=rep([1, 2, 3, 4], 2), b=1) >> group_by(f.b) out = inner_join(gf1, gf2, by="a") assert group_vars(out) == ["a"] out = semi_join(gf1, gf2, by="a") assert group_vars(out) == ["a"] # See comment in nest_join out = nest_join(gf1, gf2, by="a") assert group_vars(out) == ["a"]
def test_join_preserve_grouping(df): g = df >> group_by(f.x) tbl = g >> inner_join(g, by=["x", "y"]) gvars = tbl >> group_vars() assert gvars == ["x"] tbl = g >> left_join(g, by=["x", "y"]) gvars = tbl >> group_vars() assert gvars == ["x"] tbl = g >> semi_join(g, by=["x", "y"]) gvars = tbl >> group_vars() assert gvars == ["x"] tbl = g >> anti_join(g, by=["x", "y"]) gvars = tbl >> group_vars() assert gvars == ["x"]
def test_mutating_joins_preserve_row_and_column_order(): df1 = tibble(a=[1, 2, 3]) df2 = tibble(b=1, c=2, a=[4, 3, 2, 1]) out = inner_join(df1, df2, by="a") assert out.columns.tolist() == ["a", "b", "c"] assert out.a.tolist() == [1, 2, 3] out = left_join(df1, df2, by="a") assert out.columns.tolist() == ["a", "b", "c"] assert out.a.tolist() == [1, 2, 3] out = right_join(df1, df2, by="a") assert out.columns.tolist() == ["a", "b", "c"] # order preserved based on df2 assert out.a.tolist() == [4, 3, 2, 1] out = full_join(df1, df2, by="a") assert out.columns.tolist() == ["a", "b", "c"] assert out.a.tolist() == [1, 2, 3, 4]
def test_even_when_column_names_change(): df1 = tibble(x=[1, 1, 2, 3], z=[1, 2, 3, 4], a=1) df2 = tibble(z=[1, 2, 3, 4], b=1, x=[1, 2, 2, 4]) out = inner_join(df1, df2, by="x") assert out.columns.tolist() == ["x", "z_x", "a", "z_y", "b"]
def test_lose_group_when_by_renamed(): df1 = tibble(x=[1, 2, 3], y=[3, 4, 5]) >> group_by(f.x) df2 = tibble(x=[2, 3, 4], z=[5, 6, 7]) out = df1 >> inner_join(df2, keep=True) assert not isinstance(out, TibbleGrouped)