def test_pull_nest_df_col(): df = tibble(x=1, y=tibble(a=2)) out = pull(df, 1, to="list") assert out == [[2]] out = pull(df, 1) assert_tibble_equal(out, tibble(a=2))
def test_slice_works_with_grouped_data(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = slice(g, f[:2]) exp = filter(g, row_number() < 3) assert_frame_equal(res, exp) res = slice(g, ~f[:2]) exp = filter(g, row_number() >= 3) assert_tibble_equal(res, exp) g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x) # out = group_keys(slice(g, 3, _preserve=True)) # assert out.x.tolist() == [1, 2] out = group_keys(slice(g, 2, _preserve=False)) assert out.x.tolist() == [2] gf = tibble(x=f[1:4]) >> group_by( g=Categorical([1, 1, 2], categories=[1, 2, 3]), _drop=False, ) with pytest.raises(TypeError): gf >> slice("a") with pytest.raises(ValueError): gf >> slice(~f[:2], 1) out = gf >> slice(0) assert out.shape[0] == 2 out = gf >> slice( Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index)) assert_iterable_equal(out.x.obj, [2, 3])
def test_transmute_without_args_returns_grouping_vars(): df = tibble(x=1, y=2) gf = group_by(df, f.x) out = df >> transmute() assert out.shape == (1, 0) out = gf >> transmute() assert_tibble_equal(out, tibble(x=1).group_by("x"))
def test_before_after_relocate_individual_cols(): df = tibble(x=1, y=2) out = relocate(df, f.x, _after=f.y) assert out.columns.tolist() == ["y", "x"] out = relocate(df, f.y, _before=f.x) assert out.columns.tolist() == ["y", "x"] assert_tibble_equal(df, tibble(x=1, y=2))
def test_empty_mutate_returns_input(): df = tibble(x=1) gf = group_by(df, f.x) out = mutate(df) assert out.equals(df) out = mutate(gf) assert_tibble_equal(out, gf) assert isinstance(gf, TibbleGrouped) assert group_vars(out) == ["x"]
def test_handles_data_frame_columns(): df = tibble(a=c(1, 2, 3), b=c(2, 3, 4), base_col=c(3, 4, 5)) res = mutate(df, new_col=tibble(x=[1, 2, 3])) assert_tibble_equal(res["new_col"], tibble(x=[1, 2, 3])) res = mutate(group_by(df, f.a), new_col=tibble(x=f.a)) assert_iterable_equal(res["new_col"].x.obj, [1, 2, 3]) rf = rowwise(df, f.a) res = mutate(rf, new_col=tibble(x=f.a)) assert_tibble_equal(res["new_col"], tibble(x=[1, 2, 3]) >> rowwise())
def test_input_recycled(): df1 = tibble() >> summarise(x=1, y=[1, 2, 3], z=1) df2 = tibble(x=1, y=[1, 2, 3], z=1) assert df1.equals(df2) gf = group_by(tibble(a=[1, 2]), f.a) df1 = gf >> summarise(x=1, y=[1, 2, 3], z=1) df2 = tibble( a=rep([1, 2], each=3), x=1, y=rep([1, 2, 3], 2), z=1 ) >> group_by(f.a) assert_tibble_equal(df1, df2) df1 = gf >> summarise(x=seq_len(f.a), y=1) df2 = tibble(a=c(1, 2, 2), x=c(1, 1, 2), y=1) >> group_by(f.a) # assert df1.equals(df2) assert_tibble_equal(df1, df2)
def test_summarise_with_multiple_acrosses(): """https://stackoverflow.com/questions/63200530/python-pandas-equivalent-to-dplyr-1-0-0-summarizeacross""" out = ( mtcars >> group_by(f.cyl) >> summarize(across(ends_with("p"), sum), across(ends_with("t"), mean)) ) exp = tibble( cyl=[6, 4, 8], disp=[1283.2, 1156.5, 4943.4], hp=[856, 909, 2929], drat=[3.585714, 4.070909, 3.229286], wt=[3.117143, 2.285727, 3.999214], ) assert_tibble_equal(out, exp)
def test_no_expressions(): df = tibble(x=[1, 2], y=[1, 2]) gf = group_by(df, f.x) out = summarise(df) assert dim(out) == (1, 0) out = summarise(gf) assert group_vars(out) == [] exp = tibble(x=[1, 2]) assert_tibble_equal(out, exp) out = summarise(df, tibble()) assert dim(out) == (1, 0) out = summarise(gf, tibble()) assert group_vars(out) == [] exp = tibble(x=[1, 2]) assert out.equals(exp)
def test_applied_progressively(): df = tibble(x=1) out = df >> mutate(y=f['x'] + 1, z=f.y + 1) assert_tibble_equal(out, tibble(x=1, y=2, z=3)) out = df >> mutate(y=f.x + 1, x=f.y + 1) assert_tibble_equal(out, tibble(x=3, y=2)) out = df >> mutate(x=2, y=f.x) assert_tibble_equal(out, tibble(x=2, y=2)) df = tibble(x=1, y=2) out1 = df >> mutate(x2=f.x, x3=f.x2 + 1) out2 = df >> mutate(x2=f.x + 0, x3=f.x2 + 1) assert_tibble_equal(out1, out2)
def test_unnamed_data_frames_are_automatically_unspliced(): out = tibble(a=1) >> mutate(tibble(b=2)) assert_tibble_equal(out, tibble(a=1, b=2)) out = tibble(a=1) >> mutate(tibble(b=2), tibble(b=3)) assert_tibble_equal(out, tibble(a=1, b=3)) out = tibble(a=1) >> mutate(tibble(b=2), c=f.b) assert_tibble_equal(out, tibble(a=1, b=2, c=2))
def test_sort_empty_df(): df = tibble() out = df >> arrange() assert_tibble_equal(out, df)
def test_mutate_none(): df = tibble(x=1, y=2) out = df >> mutate(None) assert_tibble_equal(df, out)
def test_dup_keyword_args(): df = tibble(a=1) out = df >> mutate(_b=f.a + 1, b=f._b * 2) assert_tibble_equal(out, tibble(a=1, b=4)) # order doesn't matter out = df >> mutate(b=f.a + 1, _b=f.b * 2) assert_tibble_equal(out, tibble(a=1, b=2, _b=4)) # support >= 2 dups out = df >> mutate(__b=f.a + 1, _b=f.__b * 2, b=f._b / 4.0) assert_tibble_equal(out, tibble(a=1, b=1.0)) # has to be consective out = df >> mutate(__b=f.a + 1, _b=f.__b * 2, b=f._b / 4.0) assert_tibble_equal(out, tibble(a=1, b=1.0)) out = df >> mutate(__b=f.a + 1, _b=f.__b * 2) assert_tibble_equal(out, tibble(a=1, _b=4)) out = df >> mutate(_b=f.a + 1) assert_tibble_equal(out, tibble(a=1, _b=2))
def test_mutate_None_preserves_correct_all_vars(): df = (tibble(x=1, y=2) >> mutate(x=None, vars=cur_data_all()) >> pull( f.vars)) exp = tibble(y=2) assert_tibble_equal(df[0], exp)
def test_dup_keyword_args(): df = tibble(g=[1, 1], a=[1.0, 2.0]) >> group_by(f.g) out = df >> summarise(_b=mean(f.a), b=f._b * 2) assert_tibble_equal(out, tibble(g=1, b=3.0))