Esempio n. 1
0
def test_compound_ungroup():
    assert ungroup(1) == 1
    g = Series([1, 2, 3]).groupby([1, 1, 2])
    assert ungroup(g) is g.obj

    with pytest.raises(ValueError):
        ungroup(g, "abc")

    df = tibble(x=1, y=2) >> group_by(f.x, f.y)
    out = ungroup(df)
    assert group_vars(out) == []

    out = ungroup(df, f.x)
    assert group_vars(out) == ["y"]

    out = ungroup(df, f.y)
    assert group_vars(out) == ["x"]

    out = group_by(df, f.y, _add=True)
    assert group_vars(out) == ["x", "y"]

    rf = df >> rowwise()
    with pytest.raises(ValueError):
        ungroup(rf, f.x)

    with pytest.raises(KeyError):
        group_by(df, f.w)
Esempio n. 2
0
def test_can_re_rowwise():
    rf1 = rowwise(tibble(x=range(1, 6), y=range(1, 6)), "x")
    rf2 = rowwise(rf1, f.y)
    assert group_vars(rf2) == ["y"]

    rf3 = rowwise(rf2)
    assert group_vars(rf3) == []
Esempio n. 3
0
def test__groups_is_defused_with_context():
    # test_that(".groups is defused with context", {
    local_fn = identity
    out = with_groups(mtcars, local_fn(2), mutate, disp=f.disp / sd(f.disp))
    exp = with_groups(mtcars, 2, mutate, disp=f.disp / sd(f.disp))
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)
Esempio n. 4
0
def test_add(df):
    tbl = df >> group_by(f.x, f.y, _add=True)
    gvars = group_vars(tbl)
    assert gvars == ["x", "y"]

    tbl = df >> group_by(f.x, _add=True) >> group_by(f.y, _add=True)
    gvars = group_vars(tbl)
    assert gvars == ["x", "y"]
Esempio n. 5
0
def test_peels_off_a_single_layer_of_grouping():
    df = tibble(
        x=rep([1, 2, 3, 4], each=4), y=rep([1, 2], each=8), z=runif(16)
    )
    gf = df >> group_by(f.x, f.y)

    assert group_vars(summarise(gf)) == ["x"]
    assert group_vars(summarise(summarise(gf))) == []
Esempio n. 6
0
def test_empty_returns_self():
    df = tibble(x=range(1, 11), y=range(1, 11))
    gf = df >> group_by(f.x)

    assert arrange(df).equals(df)

    out = arrange(gf)
    assert out.equals(gf)
    assert group_vars(out) == group_vars(gf)
Esempio n. 7
0
def test_preserves_grouping():
    gf = tibble(g=[1, 1, 1, 2, 2], x=[1, 2, 3, 4, 5]) >> group_by(f.g)
    out = gf >> filter(is_element(f.x, [3, 4]))
    assert group_vars(out) == ["g"]
    assert group_rows(out) == [[0], [1]]

    out = gf >> filter(f.x < 3)
    assert group_vars(out) == ["g"]
    assert group_rows(out) == [[0, 1]]
Esempio n. 8
0
def test_preserves_grouping():
    gf = tibble(x=c(1, 1, 2, 2), y=f.x) >> group_by(f.x)
    out = gf >> distinct()
    gvars = group_vars(out)
    assert gvars == ["x"]

    out = gf >> distinct(x=f.x + 2)
    gvars = group_vars(out)
    assert gvars == ["x"]
Esempio n. 9
0
def test_deals_with_0_groups():
    df = tibble(x=[]) >> group_by(f.x)
    out = mutate(df, y=f.x + 1)
    exp = tibble(x=[], y=[]) >> group_by(f.x)
    assert_iterable_equal(out, exp)
    assert group_vars(out) == group_vars(exp)

    out = mutate(df, y=max(f.x))
    assert out.shape == (0, 2)
    assert group_vars(out) == ["x"]
Esempio n. 10
0
def test_preserve_grouping():
    df = tibble(g=c(1, 2, 2, 2))
    exp = tibble(g=c(1, 2), n=c(1, 3))

    out = df >> count(f.g)
    assert out.equals(exp)

    df1 = df >> group_by(f.g) >> count()
    df2 = exp >> group_by(f.g)
    assert df1.equals(df2)
    assert group_vars(df1) == group_vars(df2)
Esempio n. 11
0
def test_output_preserves_grouping():
    df = tibble(g=c(1, 2, 2, 2))
    exp = tibble(g=c(1, 2, 2, 2), n=c(1, 3, 3, 3))

    out = df >> add_count(f.g)
    assert out.equals(exp)

    out = df >> group_by(f.g) >> add_count()
    exp >>= group_by(f.g)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)
Esempio n. 12
0
def test_can_add_tallies_of_a_variable():
    df = tibble(a=c(2, 1, 1))
    out = df >> group_by(f.a) >> add_tally()
    exp = tibble(a=c(2, 1, 1), n=c(1, 2, 2)) >> group_by(f.a)
    assert_frame_equal(out, exp)
    assert group_vars(out) == group_vars(exp)
    # sort
    out = df >> group_by(f.a) >> add_tally(sort=True)
    exp = tibble(a=c(1, 1, 2), n=c(2, 2, 1)) >> group_by(f.a)
    assert out.equals(exp)
    # assert_frame_equal(out, exp)
    assert group_vars(out) == group_vars(exp)
Esempio n. 13
0
def test_joins_preserve_groups():

    gf1 = tibble(a=[1, 2, 3]) >> group_by(f.a)
    gf2 = tibble(a=rep([1, 2, 3, 4], 2), b=1) >> group_by(f.b)

    out = inner_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]

    out = semi_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]

    # See comment in nest_join
    out = nest_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]
Esempio n. 14
0
def test_rowwise_preserved_by_subsetting():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    # out = get(rf, [1])
    # assert isinstance(out, TibbleRowwise)
    # assert group_vars(out) == ["x"]

    out = mutate(rf, z=f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename_with(rf, str.upper)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]
Esempio n. 15
0
def test_group_column_names_reflect_renamed_duplicate_columns():
    # test_that("group column names reflect renamed duplicate columns (#2330)", {
    df1 = tibble(x=range(1, 6), y=range(1, 6)) >> group_by(f.x, f.y)
    df2 = tibble(x=range(1, 6), y=range(1, 6))

    out = inner_join(df1, df2, by="x")
    assert group_vars(out) == ["x"]
Esempio n. 16
0
def test_preserves_grouping():
    gf = group_by(tibble(x=[1, 2], y=2), f.x)
    out = mutate(gf, x=1)
    assert group_vars(out) == ["x"]
    assert nrow(group_data(out)) == 1

    out = mutate(gf, z=1)
    assert group_data(out).equals(group_data(gf))
Esempio n. 17
0
def test_rowwise_captures_group_vars():
    df = group_by(tibble(g=[1, 2], x=[1, 2]), f.g)
    rw = rowwise(df)

    assert group_vars(rw) == ["g"]

    with pytest.raises(ValueError):
        rowwise(df, f.x)
Esempio n. 18
0
def test_rowwise_preserved_by_major_verbs():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    out = arrange(rf, f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = filter(rf, f.x < 3)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = mutate(rf, x=f.x + 1)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename(rf, X=f.x)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]

    out = select(rf, "x")
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = slice(rf, c(0, 0))
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    # Except for summarise
    out = summarise(rf, z=mean(f.x, f.y))
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]
Esempio n. 19
0
def test_join_preserve_grouping(df):
    g = df >> group_by(f.x)

    tbl = g >> inner_join(g, by=["x", "y"])
    gvars = tbl >> group_vars()
    assert gvars == ["x"]

    tbl = g >> left_join(g, by=["x", "y"])
    gvars = tbl >> group_vars()
    assert gvars == ["x"]

    tbl = g >> semi_join(g, by=["x", "y"])
    gvars = tbl >> group_vars()
    assert gvars == ["x"]

    tbl = g >> anti_join(g, by=["x", "y"])
    gvars = tbl >> group_vars()
    assert gvars == ["x"]
Esempio n. 20
0
def test_no_expressions():
    df = tibble(x=[1, 2], y=[1, 2])
    gf = group_by(df, f.x)

    out = summarise(df)
    assert dim(out) == (1, 0)

    out = summarise(gf)
    assert group_vars(out) == []
    exp = tibble(x=[1, 2])
    assert_tibble_equal(out, exp)

    out = summarise(df, tibble())
    assert dim(out) == (1, 0)

    out = summarise(gf, tibble())
    assert group_vars(out) == []
    exp = tibble(x=[1, 2])
    assert out.equals(exp)
Esempio n. 21
0
def test_works_with_grouped_empty_data_frames():
    df = tibble(x=[])
    df1 = df >> group_by(f.x) >> summarise(y=1)
    assert dim(df1) == (0, 2)
    assert df1.columns.tolist() == ["x", "y"]

    df1 = df >> rowwise(f.x) >> summarise(y=1)
    assert group_vars(df1) == ["x"]
    assert dim(df1) == (0, 2)
    assert df1.columns.tolist() == ["x", "y"]
Esempio n. 22
0
def test_empty_mutate_returns_input():
    df = tibble(x=1)
    gf = group_by(df, f.x)

    out = mutate(df)
    assert out.equals(df)

    out = mutate(gf)
    assert_tibble_equal(out, gf)
    assert isinstance(gf, TibbleGrouped)
    assert group_vars(out) == ["x"]
Esempio n. 23
0
def test_zero_row_dfs():
    df = tibble(a=[], b=[], g=[])
    dfg = group_by(df, f.g, _drop=False)
    assert dfg.shape == (0, 3)
    assert group_vars(dfg) == ["g"]
    assert group_size(dfg) == []

    x = summarise(dfg, n=n())
    assert x.shape == (0, 2)
    assert group_vars(x) == []

    x = mutate(dfg, c=f.b + 1)
    assert x.shape == (0, 4)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = filter(dfg, f.a == 100)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = arrange(dfg, f.a, f.g)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = select(dfg, f.a)
    assert x.shape == (0, 2)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []
Esempio n. 24
0
def test_groups_arg(caplog):
    df = tibble(x=1, y=2)
    out = df >> group_by(f.x, f.y) >> summarise()
    assert out.equals(df)
    assert "has grouped output by ['x']" in caplog.text
    caplog.clear()

    out = df >> rowwise(f.x, f.y) >> summarise()
    # assert "[Groups: x, y (n=1)]" in out.attrs["_str_footer"]

    df = tibble(x=1, y=2)
    df1 = df >> summarise(z=3, _groups="rowwise")
    df2 = rowwise(tibble(z=3))
    assert isinstance(df1, TibbleRowwise)
    assert isinstance(df2, TibbleRowwise)
    assert df1.equals(df2)
    assert group_vars(df1) == group_vars(df2)

    gf = df >> group_by(f.x, f.y)
    gvars = gf >> summarise() >> group_vars()
    assert gvars == ["x"]
    gvars = gf >> summarise(_groups="drop_last") >> group_vars()
    assert gvars == ["x"]
    gvars = gf >> summarise(_groups="drop") >> group_vars()
    assert gvars == []
    gvars = gf >> summarise(_groups="keep") >> group_vars()
    assert gvars == ["x", "y"]
    gvars = gf >> summarise(_groups="rowwise") >> group_vars()
    assert gvars == ["x", "y"]
    # assert gvars == []

    rf = df >> rowwise(f.x, f.y)
    gvars = rf >> summarise(_groups="drop") >> group_vars()
    assert gvars == []
    gvars = rf >> summarise(_groups="keep") >> group_vars()
    assert gvars == ["x", "y"]
Esempio n. 25
0
def test_keep_always_retains_grouping_variables():
    df = tibble(x=1, y=2, z=3) >> group_by(f.z)
    out = df >> mutate(a=f.x + 1, _keep="none")
    exp = tibble(z=3, a=2) >> group_by(f.z)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)

    out = df >> mutate(a=f.x + 1, _keep="all")
    exp = tibble(x=1, y=2, z=3, a=2) >> group_by(f.z)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)

    out = df >> mutate(a=f.x + 1, _keep="used")
    exp = tibble(x=1, z=3, a=2) >> group_by(f.z)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)

    out = df >> mutate(a=f.x + 1, _keep="unused")
    exp = tibble(y=2, z=3, a=2) >> group_by(f.z)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)
Esempio n. 26
0
def test_removes_vars_with_None():
    df = tibble(x=range(1, 4), y=range(1, 4))
    gf = group_by(df, f.x)

    out = df >> mutate(y=None)
    assert out.columns.tolist() == ["x"]

    out = gf >> mutate(y=None)
    assert out.columns.tolist() == ["x"]
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]
    assert group_rows(out) == [[0], [1], [2]]

    # even if it doesn't exist
    out = df >> mutate(z=None)
    assert out.equals(df)

    z = Series(1, name="z")
    out = df >> mutate(z, z=None)
    assert out.equals(df)

    df = tibble(x=1, y=1)
    out = mutate(df, z=1, x=None, y=None)
    assert out.equals(tibble(z=1))
Esempio n. 27
0
def test_arguments_to_select_dont_match_vars_select_arguments():
    df = tibble(a=1)
    out = select(df, var=f.a)
    assert out.equals(tibble(var=1))

    out = select(group_by(df, f.a), var=f.a)
    exp = group_by(tibble(var=1), f.var)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)

    out = select(df, exclude=f.a)
    assert out.equals(tibble(exclude=1))
    out = select(df, include=f.a)
    assert out.equals(tibble(include=1))

    out = select(group_by(df, f.a), exclude=f.a)
    exp = group_by(tibble(exclude=1), f.exclude)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)

    out = select(group_by(df, f.a), include=f.a)
    exp = group_by(tibble(include=1), f.include)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)
Esempio n. 28
0
def test_tibble_keep_grouping(df):
    g = df >> group_by(f.x)
    tbl = tibble(g)
    # with pytest.raises(NotImplementedError):
    assert group_vars(tbl) == ["x"]
Esempio n. 29
0
def test_preserves_grouping():
    gf = group_by(tibble(g=[1, 2, 3], x=[3, 2, 1]), f.g)

    out = select(gf, h=f.g)
    assert group_vars(out), ["h"]
Esempio n. 30
0
def test_rename_preserve_grouping():
    gf = group_by(tibble(g=[1, 2, 3], x=[3, 2, 1]), f.g)

    out = rename(gf, h=f.g)
    assert group_vars(out) == ["h"]