Example #1
0
def test_pull_nest_df_col():
    df = tibble(x=1, y=tibble(a=2))
    out = pull(df, 1, to="list")
    assert out == [[2]]

    out = pull(df, 1)
    assert_tibble_equal(out, tibble(a=2))
Example #2
0
def test_add_tally_can_be_given_a_weighting_variable():
    df = tibble(a=c(1, 1, 2, 2, 2), w=c(1, 1, 2, 3, 4))

    out = df >> group_by(f.a) >> add_tally(wt=f.w) >> pull(f.n, to="list")
    assert out == [2, 2, 9, 9, 9]

    out = df >> group_by(f.a) >> add_tally(wt=f.w + 1) >> pull(f.n, to="list")
    assert out == [4, 4, 12, 12, 12]
Example #3
0
def test_pull_a_flat_dict():
    df = tibble(x=[1, 2], y=[3, 4])
    out = df >> pull(f.y, f.x)
    assert out == {1: 3, 2: 4}

    with pytest.raises(ValueError):
        # length mismatches
        df >> pull(f.y, name=[3, 4, 5], to="dict")
Example #4
0
def test_pull_series_when_to_equals_series():
    df = tibble(**{"x$a": 1})
    out = df >> pull(f.x, to="series")
    assert len(out) == 1
    assert out.values.tolist() == [1]

    # with name
    out = df >> pull(f.x, to="series", name="a")
    assert out.name == "a"
Example #5
0
def test_cur_group_rows():
    df = tibble(x=c("b", "a", "b"), y=[1, 2, 3])
    gf = df >> group_by(f.x, _sort=True)

    out = gf >> summarise(x=cur_group_rows()) >> pull()
    assert out.values.tolist() == [[1], [0, 2]]
    # data frame
    out = df >> summarise(x=cur_group_rows()) >> pull()
    assert out.values.tolist() == [[0, 1, 2]]
Example #6
0
def test_cur_group():
    df = tibble(g=1, x=1)
    gf = df >> group_by(f.g)

    out = df >> summarise(key=cur_group()) >> pull(f.key)
    assert len(out) == 1
    assert_iterable_equal(out, [np.nan])

    out = gf >> summarise(key=cur_group()) >> pull(f.key, to="list")
    assert len(out) == 1
    assert out[0].equals(tibble(g=1))
Example #7
0
def test_pull_series_with_name():
    df = tibble(x=1)
    out = df >> pull(f.x, to="frame", name="a")
    assert dim(out) == (1, 1)
    assert out.columns.tolist() == ["a"]

    with pytest.raises(ValueError):
        df >> pull(f.x, to="frame", name=["a", "b"])

    # pull array
    out = df >> pull(f.x, to="array")
    assert isinstance(out, np.ndarray)
Example #8
0
def test_pull_df():
    df = tibble(**{"x$a": 1, "x$b": 2})
    out = df >> pull(f.x, to="series")
    assert len(out) == 2
    assert out["a"].values.tolist() == [1]
    assert out["b"].values.tolist() == [2]

    with pytest.raises(ValueError):
        df >> pull(f.x, to="series", name=["a"])

    out = df >> pull(f.x, to="series", name=["c", "d"])
    assert len(out) == 2
    assert out["c"].values.tolist() == [1]
    assert out["d"].values.tolist() == [2]
Example #9
0
def test_n_distinct_handles_in_na_rm():
    d = tibble(x=c([1, 2, 3, 4], NA))
    yes = True
    no = False

    out = d >> summarise(n=n_distinct(f.x, na_rm=True)) >> pull(to="list")
    assert out == [4]
    out = d >> summarise(n=n_distinct(f.x, na_rm=False)) >> pull(to="list")
    assert out == [5]
    out = d >> summarise(n=n_distinct(f.x, na_rm=yes)) >> pull(to="list")
    assert out == [4]
    out = d >> summarise(n=n_distinct(f.x, na_rm=no)) >> pull(to="list")
    assert out == [5]

    out = (d >> summarise(n=n_distinct(f.x, na_rm=True or True)) >>
           pull(to="list"))
    assert out == [4]
Example #10
0
def test_cur_data_all():
    df = tibble(x=c("b", "a", "b"), y=[1, 2, 3])
    gf = df >> group_by(f.x, _sort=True)

    out = df >> summarise(x=cur_data()) >> pull(f.x, to="list")
    assert out[0].equals(df)

    out = df >> summarise(x=cur_data_all()) >> pull(f.x, to="list")
    assert out[0].equals(df)

    out = gf >> summarise(x=cur_data()) >> pull(f.x)
    assert out.values[0].values.flatten().tolist() == [2]
    assert out.values[1].values.flatten().tolist() == [1, 3]

    out = gf >> summarise(x=cur_data_all()) >> pull(f.x)
    assert out.values[0].values.flatten().tolist() == ["a", 2]
    assert out.values[1].values.flatten().tolist() == ["b", 1, "b", 3]
Example #11
0
def test_group_rows_group_keys_partition_group_data():
    df = tibble(x=[1, 2], y=[1, 2])
    rows = group_rows(df)
    assert rows == [[0, 1]]

    gf = group_by(df, f.x, f.y)
    gd = group_data(gf)

    assert group_keys(gf).equals(gd.iloc[:, [0, 1]])
    assert pull(gd, to="list") == group_rows(gf)
Example #12
0
def test_inside_mutate():
    out = (mtcars >> get(f[:4]) >>
           mutate(out=case_when(f.cyl == 4, 1, f["am"] == 1, 2, True, 0)) >>
           pull(to="list"))
    assert out == [2, 2, 1, 0]
Example #13
0
def test_mutate_None_preserves_correct_all_vars():
    df = (tibble(x=1, y=2) >> mutate(x=None, vars=cur_data_all()) >> pull(
        f.vars))

    exp = tibble(y=2)
    assert_tibble_equal(df[0], exp)
Example #14
0
def test_pull_grouped():
    df = tibble(x=1, y=2).group_by('x')
    out = pull(df, f.y)
    assert isinstance(out, Series)
    assert out.tolist() == [2]
Example #15
0
def test_ntile_does_not_overflow():
    m = int(1e2)
    res = tibble(a=range(1, m + 1)) >> mutate(b=ntile(f.a, n=m)) >> count(
        f.b) >> pull(to='list')
    assert sum(res) == 100
Example #16
0
def test_weighted_tally_drops_nas():
    df = tibble(x=c(1, 1, NA))
    out = tally(df, f.x) >> pull(to="list")
    assert out == [2]