def test_pull_nest_df_col(): df = tibble(x=1, y=tibble(a=2)) out = pull(df, 1, to="list") assert out == [[2]] out = pull(df, 1) assert_tibble_equal(out, tibble(a=2))
def test_add_tally_can_be_given_a_weighting_variable(): df = tibble(a=c(1, 1, 2, 2, 2), w=c(1, 1, 2, 3, 4)) out = df >> group_by(f.a) >> add_tally(wt=f.w) >> pull(f.n, to="list") assert out == [2, 2, 9, 9, 9] out = df >> group_by(f.a) >> add_tally(wt=f.w + 1) >> pull(f.n, to="list") assert out == [4, 4, 12, 12, 12]
def test_pull_a_flat_dict(): df = tibble(x=[1, 2], y=[3, 4]) out = df >> pull(f.y, f.x) assert out == {1: 3, 2: 4} with pytest.raises(ValueError): # length mismatches df >> pull(f.y, name=[3, 4, 5], to="dict")
def test_pull_series_when_to_equals_series(): df = tibble(**{"x$a": 1}) out = df >> pull(f.x, to="series") assert len(out) == 1 assert out.values.tolist() == [1] # with name out = df >> pull(f.x, to="series", name="a") assert out.name == "a"
def test_cur_group_rows(): df = tibble(x=c("b", "a", "b"), y=[1, 2, 3]) gf = df >> group_by(f.x, _sort=True) out = gf >> summarise(x=cur_group_rows()) >> pull() assert out.values.tolist() == [[1], [0, 2]] # data frame out = df >> summarise(x=cur_group_rows()) >> pull() assert out.values.tolist() == [[0, 1, 2]]
def test_cur_group(): df = tibble(g=1, x=1) gf = df >> group_by(f.g) out = df >> summarise(key=cur_group()) >> pull(f.key) assert len(out) == 1 assert_iterable_equal(out, [np.nan]) out = gf >> summarise(key=cur_group()) >> pull(f.key, to="list") assert len(out) == 1 assert out[0].equals(tibble(g=1))
def test_pull_series_with_name(): df = tibble(x=1) out = df >> pull(f.x, to="frame", name="a") assert dim(out) == (1, 1) assert out.columns.tolist() == ["a"] with pytest.raises(ValueError): df >> pull(f.x, to="frame", name=["a", "b"]) # pull array out = df >> pull(f.x, to="array") assert isinstance(out, np.ndarray)
def test_pull_df(): df = tibble(**{"x$a": 1, "x$b": 2}) out = df >> pull(f.x, to="series") assert len(out) == 2 assert out["a"].values.tolist() == [1] assert out["b"].values.tolist() == [2] with pytest.raises(ValueError): df >> pull(f.x, to="series", name=["a"]) out = df >> pull(f.x, to="series", name=["c", "d"]) assert len(out) == 2 assert out["c"].values.tolist() == [1] assert out["d"].values.tolist() == [2]
def test_n_distinct_handles_in_na_rm(): d = tibble(x=c([1, 2, 3, 4], NA)) yes = True no = False out = d >> summarise(n=n_distinct(f.x, na_rm=True)) >> pull(to="list") assert out == [4] out = d >> summarise(n=n_distinct(f.x, na_rm=False)) >> pull(to="list") assert out == [5] out = d >> summarise(n=n_distinct(f.x, na_rm=yes)) >> pull(to="list") assert out == [4] out = d >> summarise(n=n_distinct(f.x, na_rm=no)) >> pull(to="list") assert out == [5] out = (d >> summarise(n=n_distinct(f.x, na_rm=True or True)) >> pull(to="list")) assert out == [4]
def test_cur_data_all(): df = tibble(x=c("b", "a", "b"), y=[1, 2, 3]) gf = df >> group_by(f.x, _sort=True) out = df >> summarise(x=cur_data()) >> pull(f.x, to="list") assert out[0].equals(df) out = df >> summarise(x=cur_data_all()) >> pull(f.x, to="list") assert out[0].equals(df) out = gf >> summarise(x=cur_data()) >> pull(f.x) assert out.values[0].values.flatten().tolist() == [2] assert out.values[1].values.flatten().tolist() == [1, 3] out = gf >> summarise(x=cur_data_all()) >> pull(f.x) assert out.values[0].values.flatten().tolist() == ["a", 2] assert out.values[1].values.flatten().tolist() == ["b", 1, "b", 3]
def test_group_rows_group_keys_partition_group_data(): df = tibble(x=[1, 2], y=[1, 2]) rows = group_rows(df) assert rows == [[0, 1]] gf = group_by(df, f.x, f.y) gd = group_data(gf) assert group_keys(gf).equals(gd.iloc[:, [0, 1]]) assert pull(gd, to="list") == group_rows(gf)
def test_inside_mutate(): out = (mtcars >> get(f[:4]) >> mutate(out=case_when(f.cyl == 4, 1, f["am"] == 1, 2, True, 0)) >> pull(to="list")) assert out == [2, 2, 1, 0]
def test_mutate_None_preserves_correct_all_vars(): df = (tibble(x=1, y=2) >> mutate(x=None, vars=cur_data_all()) >> pull( f.vars)) exp = tibble(y=2) assert_tibble_equal(df[0], exp)
def test_pull_grouped(): df = tibble(x=1, y=2).group_by('x') out = pull(df, f.y) assert isinstance(out, Series) assert out.tolist() == [2]
def test_ntile_does_not_overflow(): m = int(1e2) res = tibble(a=range(1, m + 1)) >> mutate(b=ntile(f.a, n=m)) >> count( f.b) >> pull(to='list') assert sum(res) == 100
def test_weighted_tally_drops_nas(): df = tibble(x=c(1, 1, NA)) out = tally(df, f.x) >> pull(to="list") assert out == [2]