def test_handles_simple_symbols(): df = tibble(x=range(1, 5), test=rep(c(True, False), each=2)) res = filter(df, f.test) gdf = group_by(df, f.x) res = filter(gdf, f.test) def h(data): test2 = c(True, True, False, False) return filter(data, test2) out = h(df) assert out.equals(df.iloc[:2, :]) def ff(data, *args): one = 1 return filter(data, f.test, f.x > one, *args) def g(data, *args): four = 4 return ff(data, f.x < four, *args) res = g(df) assert res.x.tolist() == [2] assert res.test.tolist() == [True] res = g(gdf) assert res.x.obj.tolist() == [2] assert res.test.obj.tolist() == [True]
def test_complex_vec(): d = tibble(x=range(1, 11), y=[i + 2j for i in range(1, 11)]) out = d >> filter(f.x < 4) assert out.y.tolist() == [i + 2j for i in range(1, 4)] out = d >> filter(re(f.y) < 4) assert out.y.tolist() == [i + 2j for i in range(1, 4)]
def test_slice_works_with_grouped_data(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = slice(g, f[:2]) exp = filter(g, row_number() < 3) assert_frame_equal(res, exp) res = slice(g, ~f[:2]) exp = filter(g, row_number() >= 3) assert_tibble_equal(res, exp) g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x) # out = group_keys(slice(g, 3, _preserve=True)) # assert out.x.tolist() == [1, 2] out = group_keys(slice(g, 2, _preserve=False)) assert out.x.tolist() == [2] gf = tibble(x=f[1:4]) >> group_by( g=Categorical([1, 1, 2], categories=[1, 2, 3]), _drop=False, ) with pytest.raises(TypeError): gf >> slice("a") with pytest.raises(ValueError): gf >> slice(~f[:2], 1) out = gf >> slice(0) assert out.shape[0] == 2 out = gf >> slice( Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index)) assert_iterable_equal(out.x.obj, [2, 3])
def test_if_any_if_all_single_arg(): df = tibble(x=[True, False], y=[True, True]) out = df >> filter(if_any(c(f.x, f.y))) assert_frame_equal(out, df) out = df >> filter(if_all(c(f.x, f.y))) assert_frame_equal(out, df.iloc[[0], :])
def test_handles_tuple_columns(): res = (tibble(a=[1, 2], x=[tuple(range(1, 11)), tuple(range(1, 6))]) >> filter(f.a == 1)) assert res.x.tolist() == [tuple(range(1, 11))] res = (tibble(a=[1, 2], x=[tuple(range( 1, 11)), tuple(range(1, 6))]) >> group_by(f.a) >> filter(f.a == 1)) assert res.x.obj.tolist() == [tuple(range(1, 11))]
def test_handles_named_logical(): tbl = tibble(a={'a': True}) out = tbl >> filter(f.a) assert out.equals(tbl) tbl = tibble(a={'a': False}) out = tbl >> filter(f.a) assert out.shape[0] == 0
def test_preserves_grouping(): gf = tibble(g=[1, 1, 1, 2, 2], x=[1, 2, 3, 4, 5]) >> group_by(f.g) out = gf >> filter(is_element(f.x, [3, 4])) assert group_vars(out) == ["g"] assert group_rows(out) == [[0], [1]] out = gf >> filter(f.x < 3) assert group_vars(out) == ["g"] assert group_rows(out) == [[0, 1]]
def test_works_with_if_any_if_all(): df = tibble(x1=range(1, 11), x2=c(range(1, 6), 10, 9, 8, 7, 6)) df1 = df >> filter(if_all(starts_with("x"), lambda x: x > 6)) df2 = df >> filter((f.x1 > 6) & (f.x2 > 6)) assert df1.equals(df2) df1 = df >> filter(if_any(starts_with("x"), lambda x: x > 6)) df2 = df >> filter((f.x1 > 6) | (f.x2 > 6)) assert df1.equals(df2)
def test_handles_scalar_results(): df1 = mtcars >> filter(min(f.mpg) > 0) assert df1.equals(mtcars) df2 = (mtcars >> group_by(f.cyl) >> filter(min(f.mpg) > 0) >> arrange( f.cyl, f.mpg)) # See TibbleGrouped's Known issues df3 = mtcars >> group_by(f.cyl) >> arrange(f.cyl, f.mpg) assert_frame_equal(df2, df3)
def test_slice_handles_numeric_input(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = g >> slice(0) assert nrow(res) == 3 exp = g >> filter(row_number() == 1) assert_frame_equal(res, exp) res1 = mtcars >> slice(0) >> as_tibble() res2 = mtcars >> filter(row_number() == 1) assert_frame_equal(res1, res2)
def test_filter_false_handles_indices(caplog): out = mtcars >> group_by(f.cyl) >> filter(False, _preserve=True) assert "support" in caplog.text # out = group_rows(out) # assert out == [[], [], []] out = mtcars >> group_by(f.cyl) >> filter(False, _preserve=False) out = group_rows(out) assert out == []
def test_contains(): df = tibble(a=c("a", "b", "ab"), g=c(1, 1, 2)) res = df >> filter(is_element(f.a, letters)) rows = nrow(res) assert rows == 2 res = df >> group_by(f.g) >> filter(is_element(f.a, letters)) rows = nrow(res) assert rows == 2
def test_if_any_all_na_handling(): df = expandgrid(x=c(True, False, NA), y=c(True, False, NA)) out = df >> filter(if_all(c(f.x, f.y), identity)) expect = df >> filter(f.x & f.y) assert_frame_equal(out, expect) out = df >> filter(if_any(c(f.x, f.y), identity)) expect = df >> filter(f.x | f.y) assert_frame_equal(out, expect)
def test_remember_drop_True(): res = iris >> group_by(f.Species, _drop=True) assert group_by_drop_default(res) res2 = res >> filter(f.Sepal_Length > 5) assert group_by_drop_default(res2) res3 = res >> filter(f.Sepal_Length > 5, _preserve=False) assert group_by_drop_default(res3) res4 = res3 >> group_by(f.Species) assert group_by_drop_default(res4)
def test_if_any_all_enforce_bool(): d = tibble(x=10, y=10) out = d >> filter(if_all(f[f.x:f.y], identity)) assert_frame_equal(out, d) out = d >> filter(if_any(f[f.x:f.y], identity)) assert_frame_equal(out, d) out = d >> mutate(ok=if_all(f[f.x:f.y], identity)) assert_frame_equal(out, mutate(d, ok=True)) out = d >> mutate(ok=if_any(f[f.x:f.y], identity)) assert_frame_equal(out, mutate(d, ok=True))
def test_group_map_respects_empty_groups(): res = group_by(mtcars, f.cyl) >> group_map(lambda df: head(df, 2)) assert len(list(res)) == 3 res = (iris >> group_by(f.Species) >> filter(f.Species == "setosa") >> group_map(tally)) assert len(list(res)) == 1 res = (iris >> group_by(f.Species, _drop=False) >> filter(f.Species == "setosa") >> group_map.list(tally)) # filter unable to keep the structure # assert len(res) == 3 assert len(res) == 1
def test_handles_df_cols(): df = tibble(x=[1, 2], z=tibble(A=[1, 2], B=[3, 4])) expect = df >> slice(0) out = df >> filter(f.x == 1) assert out.equals(expect) out = df >> filter(f["z$A"] == 1) assert out.equals(expect) gdf = df >> group_by(f.x) out = gdf >> filter(f["z$A"] == 1) assert out.equals(expect) out = gdf >> filter(f["z$A"] == 1) assert out.equals(expect)
def test_preserve_order_across_groups(): df = tibble(g=c(1, 2, 1, 2, 1), time=[5, 4, 3, 2, 1], x=f.time) res1 = (df >> group_by(f.g) >> filter(f.x <= 4) >> ungroup() >> arrange( f.g, f.time)) res2 = (df >> arrange(f.g) >> group_by(f.g) >> filter(f.x <= 4) >> ungroup() >> arrange(f.g, f.time)) res3 = (df >> filter(f.x <= 4) >> group_by(f.g) >> ungroup() >> arrange( f.g, f.time)) res1.reset_index(drop=True, inplace=True) res2.reset_index(drop=True, inplace=True) res3.reset_index(drop=True, inplace=True) assert res1.equals(res2) assert res1.equals(res3)
def test_group_modify_makes_a_grouped_df(): res = group_by(mtcars, f.cyl) >> group_modify(lambda df: head(df, 2)) assert nrow(res) == 6 assert group_rows(res) == [[0, 1], [2, 3], [4, 5]] res = (iris >> group_by(f.Species) >> filter(f.Species == "setosa") >> group_modify(lambda df: tally(df))) assert nrow(res) == 1 assert group_rows(res) == [[0]] res = (iris >> group_by(f.Species, _drop=False) >> filter(f.Species == "setosa") >> group_modify(lambda df: tally(df))) # assert nrow(res) == 3 assert nrow(res) == 1 # assert group_rows(res) == [[0], [1], [2]] assert group_rows(res) == [[0]]
def test_rowwise_preserved_by_major_verbs(): rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x) out = arrange(rf, f.y) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = filter(rf, f.x < 3) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = mutate(rf, x=f.x + 1) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = rename(rf, X=f.x) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["X"] out = select(rf, "x") assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = slice(rf, c(0, 0)) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] # Except for summarise out = summarise(rf, z=mean(f.x, f.y)) assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"]
def test_zero_row_dfs(): df = tibble(a=[], b=[], g=[]) dfg = group_by(df, f.g, _drop=False) assert dfg.shape == (0, 3) assert group_vars(dfg) == ["g"] assert group_size(dfg) == [] x = summarise(dfg, n=n()) assert x.shape == (0, 2) assert group_vars(x) == [] x = mutate(dfg, c=f.b + 1) assert x.shape == (0, 4) assert group_vars(x) == ["g"] assert group_size(x) == [] x = filter(dfg, f.a == 100) assert x.shape == (0, 3) assert group_vars(x) == ["g"] assert group_size(x) == [] x = arrange(dfg, f.a, f.g) assert x.shape == (0, 3) assert group_vars(x) == ["g"] assert group_size(x) == [] x = select(dfg, f.a) assert x.shape == (0, 2) assert group_vars(x) == ["g"] assert group_size(x) == []
def test_0_groups_filter(): df = tibble(x=1).loc[[], :] >> group_by(f.x) res = df >> filter(f.x > 3) d1 = df >> dim() d2 = res >> dim() assert d1 == d2 assert df.columns.tolist() == res.columns.tolist()
def test_drop(): res = ( iris >> filter(f.Species == "setosa") >> group_by(f.Species, _drop=True) ) out = res >> count() >> nrow() assert out == 1
def test_grouped_filter_handles_indices(): res = iris >> group_by(f.Species) >> filter(f.Sepal_Length > 5) res2 = res >> mutate(Petal=f.Petal_Width * f.Petal_Length) assert nrow(res) == nrow(res2) grows1 = group_rows(res) grows2 = group_rows(res2) assert grows1 == grows2 assert all(group_keys(res) == group_keys(res2))
def test_remember_drop_False(): res = ( iris >> filter(f.Species == "setosa") >> group_by(f.Species, _drop=False) ) assert not group_by_drop_default(res) res2 = res >> group_by(f.Species) assert not group_by_drop_default(res2)
def test_group_split_bind_rows_round_trip(): iris["Species"] = iris["Species"].astype("category") setosa = iris >> filter(f.Species == "setosa") chunks = setosa >> group_split.list(f.Species) assert len(chunks) == 1 assert bind_rows(chunks).equals(setosa) chunks = setosa >> group_split.list(f.Species, _drop=False) assert len(chunks) == 3 assert_frame_equal(chunks[0], setosa)
def test_use_env_var(): # not a problem, since we use f.y df = tibble(x=1.0, y=2.4) y = "x" out = df >> summarise(across(all_of(y), mean)) expect = tibble(x=1.0) assert out.equals(expect) out = df >> mutate(across(all_of(y), mean)) assert out.equals(df) out = df >> filter(if_all(all_of(y), lambda col: col < 2)) assert out.equals(df)
def test_rowwise(): @register_func(None) def grepl(a, b): return Series([x in y for x, y in zip(a.obj, b.obj)], index=a.obj.index) df = tibble( First=c("string1", "string2"), Second=c("Sentence with string1", "something"), ) res = df >> rowwise() >> filter(grepl(f.First, f.Second)) assert nrow(res) == 1 df1 = df >> slice(0) df2 = res >> ungroup() assert df1.equals(df2)
def test_returns_input_with_no_args(): df = filter(mtcars) assert df.equals(mtcars)
def test_discards_na(): temp = tibble(i=range(1, 6), x=c(NA, 1, 1, 0, 0)) res = filter(temp, f.x == 1) rows = nrow(res) assert rows == 2