def test_compound_ungroup(): assert ungroup(1) == 1 g = Series([1, 2, 3]).groupby([1, 1, 2]) assert ungroup(g) is g.obj with pytest.raises(ValueError): ungroup(g, "abc") df = tibble(x=1, y=2) >> group_by(f.x, f.y) out = ungroup(df) assert group_vars(out) == [] out = ungroup(df, f.x) assert group_vars(out) == ["y"] out = ungroup(df, f.y) assert group_vars(out) == ["x"] out = group_by(df, f.y, _add=True) assert group_vars(out) == ["x", "y"] rf = df >> rowwise() with pytest.raises(ValueError): ungroup(rf, f.x) with pytest.raises(KeyError): group_by(df, f.w)
def test_can_re_rowwise(): rf1 = rowwise(tibble(x=range(1, 6), y=range(1, 6)), "x") rf2 = rowwise(rf1, f.y) assert group_vars(rf2) == ["y"] rf3 = rowwise(rf2) assert group_vars(rf3) == []
def test__groups_is_defused_with_context(): # test_that(".groups is defused with context", { local_fn = identity out = with_groups(mtcars, local_fn(2), mutate, disp=f.disp / sd(f.disp)) exp = with_groups(mtcars, 2, mutate, disp=f.disp / sd(f.disp)) assert out.equals(exp) assert group_vars(out) == group_vars(exp)
def test_add(df): tbl = df >> group_by(f.x, f.y, _add=True) gvars = group_vars(tbl) assert gvars == ["x", "y"] tbl = df >> group_by(f.x, _add=True) >> group_by(f.y, _add=True) gvars = group_vars(tbl) assert gvars == ["x", "y"]
def test_peels_off_a_single_layer_of_grouping(): df = tibble( x=rep([1, 2, 3, 4], each=4), y=rep([1, 2], each=8), z=runif(16) ) gf = df >> group_by(f.x, f.y) assert group_vars(summarise(gf)) == ["x"] assert group_vars(summarise(summarise(gf))) == []
def test_empty_returns_self(): df = tibble(x=range(1, 11), y=range(1, 11)) gf = df >> group_by(f.x) assert arrange(df).equals(df) out = arrange(gf) assert out.equals(gf) assert group_vars(out) == group_vars(gf)
def test_preserves_grouping(): gf = tibble(g=[1, 1, 1, 2, 2], x=[1, 2, 3, 4, 5]) >> group_by(f.g) out = gf >> filter(is_element(f.x, [3, 4])) assert group_vars(out) == ["g"] assert group_rows(out) == [[0], [1]] out = gf >> filter(f.x < 3) assert group_vars(out) == ["g"] assert group_rows(out) == [[0, 1]]
def test_preserves_grouping(): gf = tibble(x=c(1, 1, 2, 2), y=f.x) >> group_by(f.x) out = gf >> distinct() gvars = group_vars(out) assert gvars == ["x"] out = gf >> distinct(x=f.x + 2) gvars = group_vars(out) assert gvars == ["x"]
def test_deals_with_0_groups(): df = tibble(x=[]) >> group_by(f.x) out = mutate(df, y=f.x + 1) exp = tibble(x=[], y=[]) >> group_by(f.x) assert_iterable_equal(out, exp) assert group_vars(out) == group_vars(exp) out = mutate(df, y=max(f.x)) assert out.shape == (0, 2) assert group_vars(out) == ["x"]
def test_preserve_grouping(): df = tibble(g=c(1, 2, 2, 2)) exp = tibble(g=c(1, 2), n=c(1, 3)) out = df >> count(f.g) assert out.equals(exp) df1 = df >> group_by(f.g) >> count() df2 = exp >> group_by(f.g) assert df1.equals(df2) assert group_vars(df1) == group_vars(df2)
def test_output_preserves_grouping(): df = tibble(g=c(1, 2, 2, 2)) exp = tibble(g=c(1, 2, 2, 2), n=c(1, 3, 3, 3)) out = df >> add_count(f.g) assert out.equals(exp) out = df >> group_by(f.g) >> add_count() exp >>= group_by(f.g) assert out.equals(exp) assert group_vars(out) == group_vars(exp)
def test_can_add_tallies_of_a_variable(): df = tibble(a=c(2, 1, 1)) out = df >> group_by(f.a) >> add_tally() exp = tibble(a=c(2, 1, 1), n=c(1, 2, 2)) >> group_by(f.a) assert_frame_equal(out, exp) assert group_vars(out) == group_vars(exp) # sort out = df >> group_by(f.a) >> add_tally(sort=True) exp = tibble(a=c(1, 1, 2), n=c(2, 2, 1)) >> group_by(f.a) assert out.equals(exp) # assert_frame_equal(out, exp) assert group_vars(out) == group_vars(exp)
def test_joins_preserve_groups(): gf1 = tibble(a=[1, 2, 3]) >> group_by(f.a) gf2 = tibble(a=rep([1, 2, 3, 4], 2), b=1) >> group_by(f.b) out = inner_join(gf1, gf2, by="a") assert group_vars(out) == ["a"] out = semi_join(gf1, gf2, by="a") assert group_vars(out) == ["a"] # See comment in nest_join out = nest_join(gf1, gf2, by="a") assert group_vars(out) == ["a"]
def test_rowwise_preserved_by_subsetting(): rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x) # out = get(rf, [1]) # assert isinstance(out, TibbleRowwise) # assert group_vars(out) == ["x"] out = mutate(rf, z=f.y) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = rename_with(rf, str.upper) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["X"]
def test_group_column_names_reflect_renamed_duplicate_columns(): # test_that("group column names reflect renamed duplicate columns (#2330)", { df1 = tibble(x=range(1, 6), y=range(1, 6)) >> group_by(f.x, f.y) df2 = tibble(x=range(1, 6), y=range(1, 6)) out = inner_join(df1, df2, by="x") assert group_vars(out) == ["x"]
def test_preserves_grouping(): gf = group_by(tibble(x=[1, 2], y=2), f.x) out = mutate(gf, x=1) assert group_vars(out) == ["x"] assert nrow(group_data(out)) == 1 out = mutate(gf, z=1) assert group_data(out).equals(group_data(gf))
def test_rowwise_captures_group_vars(): df = group_by(tibble(g=[1, 2], x=[1, 2]), f.g) rw = rowwise(df) assert group_vars(rw) == ["g"] with pytest.raises(ValueError): rowwise(df, f.x)
def test_rowwise_preserved_by_major_verbs(): rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x) out = arrange(rf, f.y) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = filter(rf, f.x < 3) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = mutate(rf, x=f.x + 1) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = rename(rf, X=f.x) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["X"] out = select(rf, "x") assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = slice(rf, c(0, 0)) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] # Except for summarise out = summarise(rf, z=mean(f.x, f.y)) assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"]
def test_join_preserve_grouping(df): g = df >> group_by(f.x) tbl = g >> inner_join(g, by=["x", "y"]) gvars = tbl >> group_vars() assert gvars == ["x"] tbl = g >> left_join(g, by=["x", "y"]) gvars = tbl >> group_vars() assert gvars == ["x"] tbl = g >> semi_join(g, by=["x", "y"]) gvars = tbl >> group_vars() assert gvars == ["x"] tbl = g >> anti_join(g, by=["x", "y"]) gvars = tbl >> group_vars() assert gvars == ["x"]
def test_no_expressions(): df = tibble(x=[1, 2], y=[1, 2]) gf = group_by(df, f.x) out = summarise(df) assert dim(out) == (1, 0) out = summarise(gf) assert group_vars(out) == [] exp = tibble(x=[1, 2]) assert_tibble_equal(out, exp) out = summarise(df, tibble()) assert dim(out) == (1, 0) out = summarise(gf, tibble()) assert group_vars(out) == [] exp = tibble(x=[1, 2]) assert out.equals(exp)
def test_works_with_grouped_empty_data_frames(): df = tibble(x=[]) df1 = df >> group_by(f.x) >> summarise(y=1) assert dim(df1) == (0, 2) assert df1.columns.tolist() == ["x", "y"] df1 = df >> rowwise(f.x) >> summarise(y=1) assert group_vars(df1) == ["x"] assert dim(df1) == (0, 2) assert df1.columns.tolist() == ["x", "y"]
def test_empty_mutate_returns_input(): df = tibble(x=1) gf = group_by(df, f.x) out = mutate(df) assert out.equals(df) out = mutate(gf) assert_tibble_equal(out, gf) assert isinstance(gf, TibbleGrouped) assert group_vars(out) == ["x"]
def test_zero_row_dfs(): df = tibble(a=[], b=[], g=[]) dfg = group_by(df, f.g, _drop=False) assert dfg.shape == (0, 3) assert group_vars(dfg) == ["g"] assert group_size(dfg) == [] x = summarise(dfg, n=n()) assert x.shape == (0, 2) assert group_vars(x) == [] x = mutate(dfg, c=f.b + 1) assert x.shape == (0, 4) assert group_vars(x) == ["g"] assert group_size(x) == [] x = filter(dfg, f.a == 100) assert x.shape == (0, 3) assert group_vars(x) == ["g"] assert group_size(x) == [] x = arrange(dfg, f.a, f.g) assert x.shape == (0, 3) assert group_vars(x) == ["g"] assert group_size(x) == [] x = select(dfg, f.a) assert x.shape == (0, 2) assert group_vars(x) == ["g"] assert group_size(x) == []
def test_groups_arg(caplog): df = tibble(x=1, y=2) out = df >> group_by(f.x, f.y) >> summarise() assert out.equals(df) assert "has grouped output by ['x']" in caplog.text caplog.clear() out = df >> rowwise(f.x, f.y) >> summarise() # assert "[Groups: x, y (n=1)]" in out.attrs["_str_footer"] df = tibble(x=1, y=2) df1 = df >> summarise(z=3, _groups="rowwise") df2 = rowwise(tibble(z=3)) assert isinstance(df1, TibbleRowwise) assert isinstance(df2, TibbleRowwise) assert df1.equals(df2) assert group_vars(df1) == group_vars(df2) gf = df >> group_by(f.x, f.y) gvars = gf >> summarise() >> group_vars() assert gvars == ["x"] gvars = gf >> summarise(_groups="drop_last") >> group_vars() assert gvars == ["x"] gvars = gf >> summarise(_groups="drop") >> group_vars() assert gvars == [] gvars = gf >> summarise(_groups="keep") >> group_vars() assert gvars == ["x", "y"] gvars = gf >> summarise(_groups="rowwise") >> group_vars() assert gvars == ["x", "y"] # assert gvars == [] rf = df >> rowwise(f.x, f.y) gvars = rf >> summarise(_groups="drop") >> group_vars() assert gvars == [] gvars = rf >> summarise(_groups="keep") >> group_vars() assert gvars == ["x", "y"]
def test_keep_always_retains_grouping_variables(): df = tibble(x=1, y=2, z=3) >> group_by(f.z) out = df >> mutate(a=f.x + 1, _keep="none") exp = tibble(z=3, a=2) >> group_by(f.z) assert out.equals(exp) assert group_vars(out) == group_vars(exp) out = df >> mutate(a=f.x + 1, _keep="all") exp = tibble(x=1, y=2, z=3, a=2) >> group_by(f.z) assert out.equals(exp) assert group_vars(out) == group_vars(exp) out = df >> mutate(a=f.x + 1, _keep="used") exp = tibble(x=1, z=3, a=2) >> group_by(f.z) assert out.equals(exp) assert group_vars(out) == group_vars(exp) out = df >> mutate(a=f.x + 1, _keep="unused") exp = tibble(y=2, z=3, a=2) >> group_by(f.z) assert out.equals(exp) assert group_vars(out) == group_vars(exp)
def test_removes_vars_with_None(): df = tibble(x=range(1, 4), y=range(1, 4)) gf = group_by(df, f.x) out = df >> mutate(y=None) assert out.columns.tolist() == ["x"] out = gf >> mutate(y=None) assert out.columns.tolist() == ["x"] assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"] assert group_rows(out) == [[0], [1], [2]] # even if it doesn't exist out = df >> mutate(z=None) assert out.equals(df) z = Series(1, name="z") out = df >> mutate(z, z=None) assert out.equals(df) df = tibble(x=1, y=1) out = mutate(df, z=1, x=None, y=None) assert out.equals(tibble(z=1))
def test_arguments_to_select_dont_match_vars_select_arguments(): df = tibble(a=1) out = select(df, var=f.a) assert out.equals(tibble(var=1)) out = select(group_by(df, f.a), var=f.a) exp = group_by(tibble(var=1), f.var) assert out.equals(exp) assert group_vars(out) == group_vars(exp) out = select(df, exclude=f.a) assert out.equals(tibble(exclude=1)) out = select(df, include=f.a) assert out.equals(tibble(include=1)) out = select(group_by(df, f.a), exclude=f.a) exp = group_by(tibble(exclude=1), f.exclude) assert out.equals(exp) assert group_vars(out) == group_vars(exp) out = select(group_by(df, f.a), include=f.a) exp = group_by(tibble(include=1), f.include) assert out.equals(exp) assert group_vars(out) == group_vars(exp)
def test_tibble_keep_grouping(df): g = df >> group_by(f.x) tbl = tibble(g) # with pytest.raises(NotImplementedError): assert group_vars(tbl) == ["x"]
def test_preserves_grouping(): gf = group_by(tibble(g=[1, 2, 3], x=[3, 2, 1]), f.g) out = select(gf, h=f.g) assert group_vars(out), ["h"]
def test_rename_preserve_grouping(): gf = group_by(tibble(g=[1, 2, 3], x=[3, 2, 1]), f.g) out = rename(gf, h=f.g) assert group_vars(out) == ["h"]