Esempio n. 1
0
def test_join_by_none():
    df1 = tibble(x=[1, 2, 3], y=[3, 4, 5])
    df2 = tibble(x=[2, 3, 4], z=[5, 6, 7])
    out = df1 >> inner_join(df2, keep=True)

    assert_frame_equal(out, tibble(x_x=[2, 3], y=[4, 5], x_y=[2, 3], z=[5, 6]))

    out = df1 >> inner_join(df2, keep=False)
    assert_frame_equal(out, tibble(x=[2, 3], y=[4, 5], z=[5, 6]))
Esempio n. 2
0
def test_keys_are_coerced_to_symmetric_type():
    foo = tibble(id=factor(c("a", "b")), var1="foo")
    bar = tibble(id=c("a", "b"), var2="bar")

    idcoltype = inner_join(foo, bar, by="id").id.dtype.name
    assert idcoltype != "category"
    idcoltype = inner_join(bar, foo, by="id").id.dtype.name
    assert idcoltype != "category"

    df1 = tibble(x=1, y=factor("a"))
    df2 = tibble(x=2, y=factor("b"))
    out = full_join(df1, df2, by=["x", "y"])
    assert out.y.dtype.name == "category"
Esempio n. 3
0
def test_group_column_names_reflect_renamed_duplicate_columns():
    # test_that("group column names reflect renamed duplicate columns (#2330)", {
    df1 = tibble(x=range(1, 6), y=range(1, 6)) >> group_by(f.x, f.y)
    df2 = tibble(x=range(1, 6), y=range(1, 6))

    out = inner_join(df1, df2, by="x")
    assert group_vars(out) == ["x"]
Esempio n. 4
0
def test_joins_matches_nas_by_default():
    # test_that("joins matches NAs by default (#892, #2033)", {
    df1 = tibble(x=c(None, 1))
    df2 = tibble(x=c(None, 2))

    assert nrow(inner_join(df1, df2, by=f.x)) == 1
    assert nrow(semi_join(df1, df2, by=f.x)) == 1
Esempio n. 5
0
def test_when_keep_eqs_true_inner_join_preserves_both_sets_of_keys():
    # test_that("when keep = True, inner_join() preserves both sets of keys
    # (#5581)", {
    # when keys have different names
    df1 = tibble(a=c(2, 3), b=c(1, 2))
    df2 = tibble(x=c(3, 4), y=c(3, 4))
    out = inner_join(df1, df2, by=dict(a="x"), keep=True)
    assert out.a.tolist() == [3]
    assert out.x.tolist() == [3]

    # when keys have same name
    df1 = tibble(a=c(2, 3), b=c(1, 2))
    df2 = tibble(a=c(3, 4), y=c(3, 4))
    out = inner_join(df1, df2, by=c("a"), keep=True)
    assert out["a_x"].tolist() == [3]
    assert out["a_y"].tolist() == [3]
Esempio n. 6
0
def test_joins_preserve_groups():

    gf1 = tibble(a=[1, 2, 3]) >> group_by(f.a)
    gf2 = tibble(a=rep([1, 2, 3, 4], 2), b=1) >> group_by(f.b)

    out = inner_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]

    out = semi_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]

    # See comment in nest_join
    out = nest_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]
Esempio n. 7
0
def test_join_preserve_grouping(df):
    g = df >> group_by(f.x)

    tbl = g >> inner_join(g, by=["x", "y"])
    gvars = tbl >> group_vars()
    assert gvars == ["x"]

    tbl = g >> left_join(g, by=["x", "y"])
    gvars = tbl >> group_vars()
    assert gvars == ["x"]

    tbl = g >> semi_join(g, by=["x", "y"])
    gvars = tbl >> group_vars()
    assert gvars == ["x"]

    tbl = g >> anti_join(g, by=["x", "y"])
    gvars = tbl >> group_vars()
    assert gvars == ["x"]
Esempio n. 8
0
def test_mutating_joins_preserve_row_and_column_order():
    df1 = tibble(a=[1, 2, 3])
    df2 = tibble(b=1, c=2, a=[4, 3, 2, 1])

    out = inner_join(df1, df2, by="a")
    assert out.columns.tolist() == ["a", "b", "c"]
    assert out.a.tolist() == [1, 2, 3]

    out = left_join(df1, df2, by="a")
    assert out.columns.tolist() == ["a", "b", "c"]
    assert out.a.tolist() == [1, 2, 3]

    out = right_join(df1, df2, by="a")
    assert out.columns.tolist() == ["a", "b", "c"]
    # order preserved based on df2
    assert out.a.tolist() == [4, 3, 2, 1]

    out = full_join(df1, df2, by="a")
    assert out.columns.tolist() == ["a", "b", "c"]
    assert out.a.tolist() == [1, 2, 3, 4]
Esempio n. 9
0
def test_even_when_column_names_change():
    df1 = tibble(x=[1, 1, 2, 3], z=[1, 2, 3, 4], a=1)
    df2 = tibble(z=[1, 2, 3, 4], b=1, x=[1, 2, 2, 4])

    out = inner_join(df1, df2, by="x")
    assert out.columns.tolist() == ["x", "z_x", "a", "z_y", "b"]
Esempio n. 10
0
def test_lose_group_when_by_renamed():
    df1 = tibble(x=[1, 2, 3], y=[3, 4, 5]) >> group_by(f.x)
    df2 = tibble(x=[2, 3, 4], z=[5, 6, 7])
    out = df1 >> inner_join(df2, keep=True)
    assert not isinstance(out, TibbleGrouped)