def test_slice_any_checks_for_constant_n_and_prop(): df = tibble(x=range(1, 11)) with pytest.raises(TypeError): slice_head(df, n=f.x) # ok with n() with pytest.raises(TypeError): slice_head(df, prop=f.x) with pytest.raises(TypeError): slice_tail(df, n=f.x) with pytest.raises(TypeError): slice_tail(df, prop=f.x) with pytest.raises(TypeError): slice_min(df, f.x, n=f.x) with pytest.raises(TypeError): slice_min(df, f.x, prop=f.x) with pytest.raises(TypeError): slice_max(df, f.x, n=f.x) with pytest.raises(TypeError): slice_max(df, f.x, prop=f.x) with pytest.raises(TypeError): slice_sample(df, n=f.x) with pytest.raises(TypeError): slice_sample(df, prop=f.x)
def test_arguments_to_sample_are_passed_along(): df = tibble(x=range(1, 101), wt=c(1, rep(0, 99))) out = df >> slice_sample(n=1, weight_by=f.wt) assert out.x.tolist() == [1] out = df >> slice_sample(n=2, weight_by=f.wt, replace=True) assert out.x.tolist() == [1, 1]
def test_slice_family_on_rowwise_df(): df = tibble(x=f[1:6]) >> rowwise() out = df >> slice_head(prop=0.1) assert out.shape[0] == 0 out = df >> slice([0, 1, 2]) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_head(n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_tail(n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_min(f.x, n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_max(f.x, n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5 out = df >> slice_sample(n=3) assert isinstance(out, TibbleRowwise) assert nrow(out) == 5
def test_slicex_on_grouped_data(): gf = tibble(g=rep([1, 2], each=3), x=seq(1, 6)) >> group_by(f.g) out = gf >> slice_min(f.x) assert out.equals(tibble(g=[1, 2], x=[1, 4])) out = gf >> slice_max(f.x) assert out.equals(tibble(g=[1, 2], x=[3, 6])) out = gf >> slice_sample() assert dim(out) == (2, 2)
def test_functions_silently_truncate_results(): df = tibble(x=range(1, 6)) out = df >> slice_head(n=6) >> nrow() assert out == 5 out = df >> slice_tail(n=6) >> nrow() assert out == 5 out = df >> slice_sample(n=6) >> nrow() assert out == 5 out = df >> slice_min(f.x, n=6) >> nrow() assert out == 5 out = df >> slice_max(f.x, n=6) >> nrow() assert out == 5
def test_preserve_prop_not_support(caplog): df = tibble(x=f[:5]) >> group_by(f.x) df >> slice(f.x == 2, _preserve=True) assert "_preserve" in caplog.text with pytest.raises(ValueError): df >> slice_min(f.x, prop=0.5) with pytest.raises(ValueError): df >> slice_max(f.x, prop=0.5) with pytest.raises(ValueError): df >> slice_sample(f.x, prop=0.5)
def test_proportion_computed_correctly(): df = tibble(x=range(1, 11)) out = df >> slice_head(prop=0.11) >> nrow() assert out == 1 out = df >> slice_tail(prop=0.11) >> nrow() assert out == 1 out = df >> slice_sample(prop=0.11) >> nrow() assert out == 1 out = df >> slice_min(f.x, prop=0.11) >> nrow() assert out == 1 out = df >> slice_max(f.x, prop=0.11) >> nrow() assert out == 1 out = df >> slice_max(f.x, prop=0.11, with_ties=False) >> nrow() assert out == 1 out = df >> slice_min(f.x, prop=0.11, with_ties=False) >> nrow() assert out == 1
def test_slice_sample_n_defaults_to_1(): df = tibble(g=rep([1, 2], each=3), x=seq(1, 6)) out = df >> slice_sample(n=None) assert dim(out) == (1, 2)
def test_slice_sample_dose_not_error_on_0rows(): df = tibble(dummy=[], weight=[]) res = slice_sample(df, prop=0.5, weight_by=f.weight) assert nrow(res) == 0