コード例 #1
0
def test_rep_df():
    df = tibble(x=f[:3])
    with pytest.raises(ValueError):
        rep(df, each=2)

    out = rep(df, times=2, length=5)
    assert_frame_equal(out, tibble(x=[0, 1, 2, 0, 1]))
コード例 #2
0
def test_peels_off_a_single_layer_of_grouping():
    df = tibble(
        x=rep([1, 2, 3, 4], each=4), y=rep([1, 2], each=8), z=runif(16)
    )
    gf = df >> group_by(f.x, f.y)

    assert group_vars(summarise(gf)) == ["x"]
    assert group_vars(summarise(summarise(gf))) == []
コード例 #3
0
def test_dense_rank_with_groups():
    df = tibble(x=rep(f[1:5], each=2), y=rep([1, 2], each=4))
    out = dense_rank(df.x)
    assert out.tolist() == [1, 1, 2, 2, 3, 3, 4, 4]

    df = df.groupby("y")
    out = dense_rank(df.x)
    assert out.tolist() == [1, 1, 2, 2, 1, 1, 2, 2]
コード例 #4
0
def test_min_rank_with_groups():
    df = tibble(x=rep(f[1:5], each=2), y=rep([1, 2], each=4))
    out = min_rank(df.x)
    assert out.tolist() == [1, 1, 3, 3, 5, 5, 7, 7]

    df = df.groupby("y")
    out = min_rank(df.x)
    assert out.tolist() == [1, 1, 3, 3, 1, 1, 3, 3]
コード例 #5
0
ファイル: test_group_by.py プロジェクト: pwwang/datar
def test_mutate_does_not_loose_variables():
    df = tibble(
        a=rep([1, 2, 3, 4], 2), b=rep([1, 2, 3, 4], each=2), x=runif(8)
    )
    by_ab = df >> group_by(f.a, f.b)
    by_a = by_ab >> summarise(x=sum(f.x), _groups="drop_last")
    by_a_quantile = by_a >> group_by(quantile=ntile(f.x, 4))

    assert by_a_quantile.columns.tolist() == ["a", "b", "x", "quantile"]
コード例 #6
0
def test_recycling():
    df = tibble(x=1, y=2)
    out = df >> summarise(across(everything(), lambda col: rep(42, col)))
    expect = tibble(x=rep(42, 2), y=rep(42, 2))
    assert out.equals(expect)

    df = tibble(x=2, y=3)
    with pytest.raises(ValueError):
        df >> summarise(across(everything(), lambda col: rep(42, col)))
コード例 #7
0
def test_complex_expression_as_value():
    # https://stackoverflow.com/questions/30714810/
    # pandas-group-by-and-aggregate-column-1-with-condition-from-column-2
    dat = (tibble(
        user=rep(c("1", 2, 3, 4), each=5),
        cancel_date=rep(c(12, 5, 10, 11), each=5),
    ) >> group_by(f.user))
    out = dat >> mutate(
        # mulitple size not supported yet
        # login=sample(f[1 : ], size=n(), replace=True)
        login=sample(f[1:], size=1, replace=True))
    assert nrow(out) == 20
コード例 #8
0
def test_cume_dist_with_groups():
    df = tibble(x=rep(f[1:5], each=2), y=rep([1, 2], each=4))
    out = cume_dist(df.x)
    assert_iterable_equal(
        out,
        [0.25, 0.25, 0.5, 0.5, 0.75, 0.75, 1.0, 1.0],
        approx=1e-3,
    )

    df = df.groupby("y")
    out = cume_dist(df.x)
    assert_iterable_equal(
        out,
        [0.5, 0.5, 1.0, 1.0, 0.5, 0.5, 1.0, 1.0],
    )
コード例 #9
0
def test_percent_rank_with_groups():
    df = tibble(x=rep(f[1:5], each=2), y=rep([1, 2], each=4))
    out = percent_rank(df.x)
    assert_iterable_equal(
        out,
        [0.0, 0.0, 0.333, 0.333, 0.666, 0.666, 1.0, 1.0],
        approx=1e-3,
    )

    df = df.groupby("y")
    out = percent_rank(df.x)
    assert_iterable_equal(
        out,
        [0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0],
    )
コード例 #10
0
def test_arguments_to_sample_are_passed_along():
    df = tibble(x=range(1, 101), wt=c(1, rep(0, 99)))
    out = df >> slice_sample(n=1, weight_by=f.wt)
    assert out.x.tolist() == [1]

    out = df >> slice_sample(n=2, weight_by=f.wt, replace=True)
    assert out.x.tolist() == [1, 1]
コード例 #11
0
def test_input_recycled():
    df1 = tibble() >> summarise(x=1, y=[1, 2, 3], z=1)
    df2 = tibble(x=1, y=[1, 2, 3], z=1)
    assert df1.equals(df2)

    gf = group_by(tibble(a=[1, 2]), f.a)
    df1 = gf >> summarise(x=1, y=[1, 2, 3], z=1)
    df2 = tibble(
        a=rep([1, 2], each=3), x=1, y=rep([1, 2, 3], 2), z=1
    ) >> group_by(f.a)
    assert_tibble_equal(df1, df2)

    df1 = gf >> summarise(x=seq_len(f.a), y=1)
    df2 = tibble(a=c(1, 2, 2), x=c(1, 1, 2), y=1) >> group_by(f.a)
    # assert df1.equals(df2)
    assert_tibble_equal(df1, df2)
コード例 #12
0
def test_handles_simple_symbols():
    df = tibble(x=range(1, 5), test=rep(c(True, False), each=2))
    res = filter(df, f.test)

    gdf = group_by(df, f.x)
    res = filter(gdf, f.test)

    def h(data):
        test2 = c(True, True, False, False)
        return filter(data, test2)

    out = h(df)
    assert out.equals(df.iloc[:2, :])

    def ff(data, *args):
        one = 1
        return filter(data, f.test, f.x > one, *args)

    def g(data, *args):
        four = 4
        return ff(data, f.x < four, *args)

    res = g(df)
    assert res.x.tolist() == [2]
    assert res.test.tolist() == [True]

    res = g(gdf)
    assert res.x.obj.tolist() == [2]
    assert res.test.obj.tolist() == [True]
コード例 #13
0
def test_list_output_columns():
    df = tibble(x=range(1, 11), g=rep([1, 2], each=5))
    res = (
        df
        >> group_by(f.g)
        >> summarise(y=f.x.apply(list))
    )
    assert_iterable_equal(res.y[0], [1, 2, 3, 4, 5])
コード例 #14
0
def test_correctly_reconstructs_groups():
    d = (
        tibble(x=[1, 2, 3, 4], g1=rep([1, 2], 2), g2=[1, 2, 3, 4])
        >> group_by(f.g1, f.g2)
        >> summarise(x=f.x + 1)
    )
    # Different from dplyr, original df does not reorder.
    assert group_rows(d) == [[0, 2], [1, 3]]
コード例 #15
0
def test_group_split_keeps_group_variables_by_default():
    tbl = tibble(x=[1, 2, 3, 4], g=factor(rep(["a", "b"], each=2)))
    out = group_split(tbl, f.g)
    res = list(out)

    assert len(res) == 2
    assert res[0].equals(tbl.iloc[[0, 1], :])
    assert res[1].equals(tbl.iloc[[2, 3], :].reset_index(drop=True))
コード例 #16
0
def test_slicex_on_grouped_data():
    gf = tibble(g=rep([1, 2], each=3), x=seq(1, 6)) >> group_by(f.g)

    out = gf >> slice_min(f.x)
    assert out.equals(tibble(g=[1, 2], x=[1, 4]))
    out = gf >> slice_max(f.x)
    assert out.equals(tibble(g=[1, 2], x=[3, 6]))
    out = gf >> slice_sample()
    assert dim(out) == (2, 2)
コード例 #17
0
def test_cache_key():
    df = tibble(g=rep([1, 2], each=2), a=range(1, 5)) >> group_by(f.g)

    out = df >> mutate(
        tibble(
            x=across(where(is_numeric), mean).a,
            y=across(where(is_numeric), max).a,
        ))
    expect = df >> mutate(x=mean(f.a), y=max(f.a))
    assert_frame_equal(out, expect)
コード例 #18
0
def test_slice_handles_na():
    df = tibble(x=[1, 2, 3])
    assert nrow(slice(df, NA)) == 0
    assert nrow(slice(df, c(1, NA))) == 1
    out = df >> slice(c(~c(1), NA)) >> nrow()
    assert out == 2

    df = tibble(x=[1, 2, 3, 4], g=rep([1, 2], 2)) >> group_by(f.g)
    assert nrow(slice(df, c(1, NA))) == 2
    out = df >> slice(c(~c(1), NA)) >> nrow()
    assert out == 2
コード例 #19
0
def test_group_modify_works_with_additional_arguments():
    def myfun(x, y, foo):
        x = x.copy()
        x[foo] = 1
        return x

    srcdata = tibble(A=rep([1, 2], each=3)) >> group_by(f.A)
    targetdata = srcdata.copy()
    targetdata["bar"] = 1

    out = group_modify(srcdata, _f=myfun, foo="bar")
    assert_frame_equal(out.reset_index(drop=True), targetdata)
コード例 #20
0
def test_rep_sgb_param(caplog):
    df = tibble(
        x=[1, 1, 2, 2],
        times=[1, 2, 1, 2],
        length=[3, 4, 4, 3],
        each=[1, 1, 1, 1],
    ).group_by("x")
    out = rep([1, 2], df.times)
    assert_iterable_equal(out.obj, [1, 2, 2, 1, 2, 2])

    out = rep([1, 2], times=df.times, each=1, length=df.length)
    assert "first element" in caplog.text

    assert_iterable_equal(out.obj, [1, 2, 2, 1, 2, 2, 1])
    assert_iterable_equal(out.grouper.size(), [3, 4])

    df2 = tibble(x=[1, 2], each=[1, 1]).group_by("x")
    out = rep(df2.x, each=df2.each)
    assert_iterable_equal(out.obj, [1, 2])
    out = rep(df2.x, times=df2.each, length=df2.each, each=df2.each)
    assert_iterable_equal(out.obj, [1, 2])
    out = rep(3, each=df2.each)
    assert_iterable_equal(out.obj, [3, 3])

    out = rep(df2.x.obj, 2)
    assert_iterable_equal(out, [1, 2, 1, 2])
コード例 #21
0
ファイル: test_arrange.py プロジェクト: pwwang/datar
def test_errors():
    x = Series(1, name="x")
    df = tibble(x, x, _name_repair="minimal")

    with pytest.raises(NameNonUniqueError):
        df >> arrange(f.x)

    df = tibble(x=x)
    with pytest.raises(KeyError):
        df >> arrange(f.y)

    with pytest.raises(ValueError, match="Length of values"):
        df >> arrange(rep(f.x, 2))
コード例 #22
0
def test_joins_preserve_groups():

    gf1 = tibble(a=[1, 2, 3]) >> group_by(f.a)
    gf2 = tibble(a=rep([1, 2, 3, 4], 2), b=1) >> group_by(f.b)

    out = inner_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]

    out = semi_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]

    # See comment in nest_join
    out = nest_join(gf1, gf2, by="a")
    assert group_vars(out) == ["a"]
コード例 #23
0
def test_group_list_respects_empty_groups():
    tbl = tibble(
        x=[1, 2, 3, 4],
        g=factor(rep(["a", "b"], each=2), levels=["a", "b", "c"]),
    )

    res = group_split.list(tbl, f.g)

    assert res[0].equals(tbl.iloc[:2, :])
    assert res[1].equals(tbl.iloc[[2, 3], :].reset_index(drop=True))

    res = group_split.list(tbl, f.g, _drop=False)
    assert res[0].equals(tbl.iloc[:2, :])
    assert res[1].equals(tbl.iloc[[2, 3], :].reset_index(drop=True))
    assert res[2].equals(tbl.iloc[[], :])
コード例 #24
0
def test_slice_gives_correct_rows():
    a = tibble(value=[f"row{i}" for i in range(1, 11)])
    out = slice(a, c(0, 1, 2))
    assert out.value.tolist() == ["row1", "row2", "row3"]

    out = slice(a, c(3, 5, 8))
    assert out.value.tolist() == ["row4", "row6", "row9"]

    a = tibble(value=[f"row{i}" for i in range(1, 11)],
               group=rep([1, 2], each=5)) >> group_by(f.group)

    out = slice(a, f[:3])
    assert out.value.obj.tolist() == [f"row{i}" for i in [1, 2, 3, 6, 7, 8]]

    out = slice(a, c(1, 3))
    assert out.value.obj.tolist() == [f"row{i}" for i in [2, 4, 7, 9]]
コード例 #25
0
ファイル: test_verbs.py プロジェクト: pwwang/datar
def test_can_recycle_when_add_multiple_columns_of_len1():
    df = tibble(a=[1, 2, 3])
    df_new = add_column(df, b=4, c=5)
    assert_frame_equal(df_new, tibble(a=[1, 2, 3], b=rep(4, 3), c=rep(5, 3)))
コード例 #26
0
ファイル: test_verbs.py プロジェクト: pwwang/datar
def test_can_recycle_when_adding_a_column_of_len1():
    df = tibble(a=[1, 2, 3])
    df_new = add_column(df, b=4)
    assert_frame_equal(df_new, tibble(a=[1, 2, 3], b=rep(4, 3)))
コード例 #27
0
ファイル: test_verbs.py プロジェクト: pwwang/datar
def test_can_recycle_when_adding_columns():
    df = tibble(a=[1, 2, 3])
    df_new = add_column(df, b=4, c=[3, 2, 1])
    assert_frame_equal(df_new, tibble(a=[1, 2, 3], b=rep(4, 3), c=[3, 2, 1]))
コード例 #28
0
def test_rep_error():
    with pytest.raises(ValueError):
        rep(c(1, 2, 3), c(1, 2))
    with pytest.raises(ValueError):
        rep(c(1, 2, 3), c(1, 2, 3), each=2)
コード例 #29
0
def test_rep_grouped_df():
    df = tibble(x=f[:3], g=[1, 1, 2]).group_by("g")
    out = rep(df, 2, length=5)
    assert isinstance(out, TibbleGrouped)
    assert_iterable_equal(out.x.obj, [0, 1, 2, 0, 1])
    assert out._datar["grouped"].grouper.ngroups == 2
コード例 #30
0
def test_rep(x, times, length, each, expected):
    assert_iterable_equal(
        rep(x, times=times, length=length, each=each), expected
    )