def test_can_nest_multiple_columns(): df = tibble(x=1, a1=1, a2=2, b1=1, b2=2) out = df >> nest(a=c(f.a1, f.a2), b=c(f.b1, f.b2)) assert out.columns.tolist() == ['x', 'a', 'b'] assert_frame_equal(out.a.values[0], df[['a1', 'a2']]) assert_frame_equal(out.b.values[0], df[['b1', 'b2']])
def test_set_operations_reconstruct_grouping_metadata(): # test_that("set operations reconstruct grouping metadata (#3587)", { df1 = tibble(x=seq(1, 4), g=rep([1, 2], each=2)) >> group_by(f.g) df2 = tibble(x=seq(3, 6), g=rep([2, 3], each=2)) out = setdiff(df1, df2) exp = filter(df1, f.x < 3) assert out.equals(exp) out = intersect(df1, df2) exp = filter(df1, f.x >= 3).reset_index(drop=True) assert_frame_equal(out, exp) out = union(df1, df2) exp = tibble(x=seq(1, 6), g=rep([1, 2, 3], each=2)) >> group_by(f.g) assert out.equals(exp) assert group_vars(out) == group_vars(exp) out = setdiff(df1, df2) >> group_rows() assert out == [[0, 1]] out = intersect(df1, df2) >> group_rows() assert out == [[0, 1]] out = union(df1, df2) >> group_rows() assert out == [[0, 1], [2, 3], [4, 5]]
def test_rows_insert(data): out = rows_insert(data, tibble(a=4, b="z"), by="a") exp = tibble(a=seq(1, 4), b=c("a", "b", NA, "z"), c=c(0.5, 1.5, 2.5, NA)) assert_frame_equal(out, exp) with pytest.raises(ValueError, match="insert duplicate"): rows_insert(data, tibble(a=3, b="z"), by="a")
def test_set_operations_keep_the_ordering_of_the_data(): # test_that("set operations keep the ordering of the data (#3839)", { rev_df = lambda df: df >> get(rev(seq_len(nrow(df))) - 1) df1 = tibble(x=seq(1, 4), g=rep([1, 2], each=2)) df2 = tibble(x=seq(3, 6), g=rep([2, 3], each=2)) out = setdiff(df1, df2) exp = filter(df1, f.x < 3) assert out.equals(exp) out = setdiff(rev_df(df1), df2) exp = filter(rev_df(df1), f.x < 3).reset_index(drop=True) assert out.equals(exp) out = intersect(df1, df2) exp = filter(df1, f.x >= 3).reset_index(drop=True) assert out.equals(exp) out = intersect(rev_df(df1), df2) exp = filter(rev_df(df1), f.x >= 3).reset_index(drop=True) assert out.equals(exp) out = union(df1, df2) exp = tibble(x=seq(1, 6), g=rep([1, 2, 3], each=2)) assert out.equals(exp) out = union(rev_df(df1), df2) exp = tibble(x=c(seq(4, 1), [5, 6]), g=rep([2, 1, 3], each=2)) # assert out.equals(exp) assert_frame_equal(out, exp) out = union(df1, rev_df(df2)) exp = tibble(x=c(seq(1, 4), [6, 5]), g=rep([1, 2, 3], each=2)) assert out.equals(exp)
def test_slice_works_with_grouped_data(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = slice(g, f[:2]) exp = filter(g, row_number() < 3) assert_frame_equal(res, exp) res = slice(g, ~f[:2]) exp = filter(g, row_number() >= 3) assert_tibble_equal(res, exp) g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x) # out = group_keys(slice(g, 3, _preserve=True)) # assert out.x.tolist() == [1, 2] out = group_keys(slice(g, 2, _preserve=False)) assert out.x.tolist() == [2] gf = tibble(x=f[1:4]) >> group_by( g=Categorical([1, 1, 2], categories=[1, 2, 3]), _drop=False, ) with pytest.raises(TypeError): gf >> slice("a") with pytest.raises(ValueError): gf >> slice(~f[:2], 1) out = gf >> slice(0) assert out.shape[0] == 2 out = gf >> slice( Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index)) assert_iterable_equal(out.x.obj, [2, 3])
def test_unchopping_null_inputs_are_dropped(): df = tibble(x=f[1:5], y=[NULL, [1, 2], 4, NULL], z=[NULL, [1, 2], NULL, 5]) out = df >> unchop(c(f.y, f.z), dtypes=float) assert_frame_equal( out, tibble(x=[2, 2, 3, 4], y=[1, 2, 4, NA], z=[1, 2, NA, 5], _dtypes=float))
def test_can_pack_multiple_columns(): df = tibble(a1=1, a2=2, b1=1, b2=2) out = df >> pack(a=c(f.a1, f.a2), b=c(f.b1, f.b2)) assert_iterable_equal(colnames(out), ['a', 'b']) assert_frame_equal(out >> pull(f.a), df[['a1', 'a2']]) assert_frame_equal(out >> pull(f.b), df[['b1', 'b2']])
def test_scale(): out = [1, 2, 3] >> scale() assert_iterable_equal(out, [-1.0, 0.0, 1.0]) # assert_iterable_equal(out.attrs["scaled:center"], [2]) # assert_iterable_equal(out.attrs["scaled:scale"], [1]) out = scale([1, 2, 3], center=1) assert_iterable_equal(out, [0.0, 0.6324555, 1.2649111], approx=True) # assert_iterable_equal(out.attrs["scaled:center"], [1]) # assert_iterable_equal(out.attrs["scaled:scale"], [1.581139], approx=True) out = [1, 2, 3] >> scale(scale=1) assert_iterable_equal(out, [-1.0, 0.0, 1.0]) # assert_iterable_equal(out.attrs["scaled:center"], [2]) # assert_iterable_equal(out.attrs["scaled:scale"], [1]) with pytest.raises(ValueError): scale([1, 2, 3], center=[1, 2]) with pytest.raises(ValueError): [1, 2, 3] >> scale(scale=[1, 2]) df = tibble(x=[1, 2, 3], y=[4, 5, 6]) assert_frame_equal(scale(df, False, False), df) df = tibble(x=["a", "b"]) with pytest.raises(ValueError): scale(df)
def test_named_dfs_are_not_flattened(): df = tibble(x=f[1:3], y=f[1:3]) out = expand(df, x=nesting(f.x, f.y)) >> pull(f.x) assert_frame_equal(out, df) out = crossing(x=df) >> pull(f.x) assert_frame_equal(out, df)
def test_unchop_can_specify_dtypes(): df = tibble(x=1, y=[[1, 2]]) dtypes = {'y': int, 'z': int} # No extra columns added exp = tibble(x=[1, 1], y=[1, 2]) # exp = tibble(x=[1,1], y=[1,2], z=[NA,NA]) out = unchop(df, f.y, dtypes=dtypes) assert_frame_equal(out, exp)
def test_filter_slice_retain_zero_group_labels(df): # count loses _drop=False out = df >> filter(f.f == 1) >> count() >> ungroup() expect = tibble(f=factor([1, 2, 3], levels=[1, 2, 3]), n=[2, 0, 0]) assert_frame_equal(out, expect) out = df >> slice(1) >> count() >> ungroup() expect = tibble(f=factor([1, 2, 3], levels=[1, 2, 3]), n=[1, 1, 0]) assert_frame_equal(out, expect)
def test_handles_scalar_results(): df1 = mtcars >> filter(min(f.mpg) > 0) assert df1.equals(mtcars) df2 = (mtcars >> group_by(f.cyl) >> filter(min(f.mpg) > 0) >> arrange( f.cyl, f.mpg)) # See TibbleGrouped's Known issues df3 = mtcars >> group_by(f.cyl) >> arrange(f.cyl, f.mpg) assert_frame_equal(df2, df3)
def test_slice_silently_ignores_out_of_range_values(): res1 = slice(mtcars, c(2, 100)) res2 = slice(mtcars, 2) assert_frame_equal(res1, res2) g = group_by(mtcars, f.cyl) res1 = slice(g, c(2, 100)) res2 = slice(g, 2) assert_frame_equal(res1, res2)
def test_unchop_empty_list(): df = tibble(x=[], y=[]) out = unchop(df, f.y).y.to_list() assert out == [] df = tibble(x=[], y=tibble(z=[])) # support nested df? out = unchop(df, f['y$z']) >> pull(f.y) assert_frame_equal(out >> drop_index(), tibble(z=[]))
def test_slice_handles_numeric_input(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = g >> slice(0) assert nrow(res) == 3 exp = g >> filter(row_number() == 1) assert_frame_equal(res, exp) res1 = mtcars >> slice(0) >> as_tibble() res2 = mtcars >> filter(row_number() == 1) assert_frame_equal(res1, res2)
def test_unchop_optionally_keep_empty_rows(): df = tibble( x=[1, 2], y=[NULL, [1, 2]], # unchopping y meaning x, z will be keys and they have to be hashable # z = [tibble(x=[]), tibble(x=[1,2])] ) out = df >> unchop(f.y, keep_empty=True) assert_frame_equal( out, tibble(x=[1, 2, 2], y=[None, 1, 2], _dtypes={'y': object}))