def test_slice_silently_ignores_out_of_range_values(): res1 = slice(mtcars, c(2, 100)) res2 = slice(mtcars, 2) assert_frame_equal(res1, res2) g = group_by(mtcars, f.cyl) res1 = slice(g, c(2, 100)) res2 = slice(g, 2) assert_frame_equal(res1, res2)
def test_slice_handles_numeric_input(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = g >> slice(0) assert nrow(res) == 3 exp = g >> filter(row_number() == 1) assert_frame_equal(res, exp) res1 = mtcars >> slice(0) >> as_tibble() res2 = mtcars >> filter(row_number() == 1) assert_frame_equal(res1, res2)
def test_slice_handles_na(): df = tibble(x=[1, 2, 3]) assert nrow(slice(df, NA)) == 0 assert nrow(slice(df, c(1, NA))) == 1 out = df >> slice(c(~c(1), NA)) >> nrow() assert out == 2 df = tibble(x=[1, 2, 3, 4], g=rep([1, 2], 2)) >> group_by(f.g) assert nrow(slice(df, c(1, NA))) == 2 out = df >> slice(c(~c(1), NA)) >> nrow() assert out == 2
def test_slice_handles_df_columns(): df = tibble(x=[1, 2], y=tibble(a=[1, 2], b=[3, 4]), z=tibble(A=[1, 2], B=[3, 4])) out = slice(df, 0) assert out.equals(df.iloc[[0], :]) gdf = group_by(df, f.x) assert slice(gdf, 0).equals(gdf) # TODO: group_by a stacked df is not supported yet gdf = group_by(df, f["y$a"], f["y$b"]) assert slice(gdf, 0).equals(gdf) gdf = group_by(df, f["z$A"], f["z$B"]) assert slice(gdf, 0).equals(gdf)
def test_slice_gives_correct_rows(): a = tibble(value=[f"row{i}" for i in range(1, 11)]) out = slice(a, c(0, 1, 2)) assert out.value.tolist() == ["row1", "row2", "row3"] out = slice(a, c(3, 5, 8)) assert out.value.tolist() == ["row4", "row6", "row9"] a = tibble(value=[f"row{i}" for i in range(1, 11)], group=rep([1, 2], each=5)) >> group_by(f.group) out = slice(a, f[:3]) assert out.value.obj.tolist() == [f"row{i}" for i in [1, 2, 3, 6, 7, 8]] out = slice(a, c(1, 3)) assert out.value.obj.tolist() == [f"row{i}" for i in [2, 4, 7, 9]]
def test_slice_family_on_rowwise_df(): df = tibble(x=f[1:6]) >> rowwise() out = df >> slice_head(prop=0.1) assert out.shape[0] == 0 out = df >> slice([0, 1, 2]) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_head(n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_tail(n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_min(f.x, n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_max(f.x, n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_sample(n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5
def test_rowwise_preserved_by_major_verbs(): rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x) out = arrange(rf, f.y) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = filter(rf, f.x < 3) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = mutate(rf, x=f.x + 1) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = rename(rf, X=f.x) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["X"] out = select(rf, "x") assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] out = slice(rf, c(0, 0)) assert isinstance(out, TibbleRowwise) assert group_vars(out) == ["x"] # Except for summarise out = summarise(rf, z=mean(f.x, f.y)) assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"]
def test_slice_works_with_grouped_data(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = slice(g, f[:2]) exp = filter(g, row_number() < 3) assert_frame_equal(res, exp) res = slice(g, ~f[:2]) exp = filter(g, row_number() >= 3) assert_tibble_equal(res, exp) g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x) # out = group_keys(slice(g, 3, _preserve=True)) # assert out.x.tolist() == [1, 2] out = group_keys(slice(g, 2, _preserve=False)) assert out.x.tolist() == [2] gf = tibble(x=f[1:4]) >> group_by( g=Categorical([1, 1, 2], categories=[1, 2, 3]), _drop=False, ) with pytest.raises(TypeError): gf >> slice("a") with pytest.raises(ValueError): gf >> slice(~f[:2], 1) out = gf >> slice(0) assert out.shape[0] == 2 out = gf >> slice( Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index)) assert_iterable_equal(out.x.obj, [2, 3])
def test_rename_errors_with_invalid_grouped_df(): df = tibble(x=[1, 2, 3]) # Incompatible type with pytest.raises(TypeError): slice(df, object()) with pytest.raises(TypeError): slice(df, {"a": 1}) # Mix of positive and negative integers with pytest.raises(ValueError): mtcars >> slice(c(~c(1), 2)) with pytest.raises(ValueError): mtcars >> slice(c(f[2:4], ~c(1))) # n and prop are carefully validated # with pytest.raises(ValueError): # _n_from_prop(10, n=1, prop=1) with pytest.raises(TypeError): _n_from_prop(10, n="a") with pytest.raises(TypeError): _n_from_prop(10, prop="a") with pytest.raises(ValueError): _n_from_prop(10, n=-1) with pytest.raises(ValueError): _n_from_prop(10, prop=-1) with pytest.raises(TypeError): _n_from_prop(10, n=n()) with pytest.raises(TypeError): _n_from_prop(10, prop=n())
def test_preserve_prop_not_support(caplog): df = tibble(x=f[:5]) >> group_by(f.x) df >> slice(f.x == 2, _preserve=True) assert "_preserve" in caplog.text with pytest.raises(ValueError): df >> slice_min(f.x, prop=0.5) with pytest.raises(ValueError): df >> slice_max(f.x, prop=0.5) with pytest.raises(ValueError): df >> slice_sample(f.x, prop=0.5)
def test_handles_df_cols(): df = tibble(x=[1, 2], z=tibble(A=[1, 2], B=[3, 4])) expect = df >> slice(0) out = df >> filter(f.x == 1) assert out.equals(expect) out = df >> filter(f["z$A"] == 1) assert out.equals(expect) gdf = df >> group_by(f.x) out = gdf >> filter(f["z$A"] == 1) assert out.equals(expect) out = gdf >> filter(f["z$A"] == 1) assert out.equals(expect)
def test_mixed_rows(): df = tibble(x=range(5)) # order kept # 0 1 2 3 4 # -3 -1 # 3 out = slice(df, c(-c(3, 1), 3)) assert out.x.tolist() == [2, 4, 3] # 0 1 2 3 4 # -2 -1 # 3 out = slice(df, c(-f[1:3], 3)) assert out.x.tolist() == [4, 3, 3] # 0 1 2 3 4 # 0 2 # -1 out = slice(df, c(~c(0, 2), ~c(-1))) assert out.x.tolist() == [1, 3] out = df >> slice(c(~f[3:], ~c(1))) assert out.x.tolist() == [0, 2]
def test_rowwise(): @register_func(None) def grepl(a, b): return Series([x in y for x, y in zip(a.obj, b.obj)], index=a.obj.index) df = tibble( First=c("string1", "string2"), Second=c("Sentence with string1", "something"), ) res = df >> rowwise() >> filter(grepl(f.First, f.Second)) assert nrow(res) == 1 df1 = df >> slice(0) df2 = res >> ungroup() assert df1.equals(df2)
def test_slice_accepts_star_args(): out1 = slice(mtcars, 1, 2) out2 = slice(mtcars, [1, 2]) assert out1.equals(out2) out3 = slice(mtcars, 0, n() - 1) out4 = slice(mtcars, c(0, nrow(mtcars) - 1)) assert out3.equals(out4) g = mtcars >> group_by(f.cyl) out5 = slice(g, 0, n() - 1) out6 = slice(g, c(0, n() - 1)) assert out5.equals(out6)
def test_slice_handles_logical_NA(): df = tibble(x=[1, 2, 3]) assert nrow(slice(df, NA)) == 0
def test_slice_works_with_negative_indices(): res = slice(mtcars, ~f[:2]) exp = mtcars.tail(-2) assert_frame_equal(res, exp)
def test_slice_handles_empty_df(): df = tibble(x=[]) res = df >> slice(f[:3]) assert nrow(res) == 0 assert names(res) == ["x"]
def test_wrong_indices(): df = tibble(x=f[:3]) with pytest.raises(TypeError): df >> slice("a")
def test_slice_works_fine_if_n_gt_nrow(): by_slice = mtcars >> arrange(f.cyl) >> group_by(f.cyl) slice_res = by_slice >> slice(7) filter_res = by_slice >> filter(row_number() == 8) assert slice_res.equals(filter_res)
def test_slice_strips_grouped_indices(): res = mtcars >> group_by(f.cyl) >> slice(1) >> mutate(mpgplus=f.mpg + 1) assert nrow(res) == 3 assert group_rows(res) == [[0], [1], [2]]
def test_empty_slice_returns_input(): df = tibble(x=[1, 2, 3]) assert slice(df).equals(df)
def test_slice_works_with_0col_dfs(): out = tibble(a=[1, 2, 3]) >> select(~f.a) >> slice(1) >> nrow() assert out == 1
def test_slice_correctly_computes_positive_indices_from_negative_indices(): x = tibble(y=range(1, 11)) # negative in dplyr meaning exclusive assert slice(x, ~f[9:30]).equals(tibble(y=range(1, 10)))
def test_slice_does_not_evaluate_the_expression_in_empty_groups(): res = mtcars >> group_by(f.cyl) >> filter(f.cyl == 6) >> slice(f[:2]) assert nrow(res) == 2