Exemple #1
0
def test_auto_splicing():
    df1 = iris >> group_by(f.Species)
    df2 = iris >> group_by(tibble(Species=iris.Species))
    assert df1.equals(df2)

    df1 = iris >> group_by(f.Species)
    df2 = iris >> group_by(across(f.Species))
    assert df1.equals(df2)

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Sepal_Length, f.Sepal_Width)
    )
    df2 = iris >> group_by(across(starts_with("Sepal"), round))
    assert df1.equals(df2)

    # across(character()), across(NULL) not supported

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Sepal_Length, f.Sepal_Width, f.Species)
    )
    df2 = iris >> group_by(across(starts_with("Sepal"), round), f.Species)
    assert df1.equals(df2)

    df1 = (
        iris
        >> mutate(across(starts_with("Sepal"), round))
        >> group_by(f.Species, f.Sepal_Length, f.Sepal_Width)
    )
    df2 = iris >> group_by(f.Species, across(starts_with("Sepal"), round))
    assert df1.equals(df2)
Exemple #2
0
def test_length1_vectors_are_recycled():
    df = tibble(x=range(1, 5))
    out = mutate(df, y=1)
    assert out.y.tolist() == [1, 1, 1, 1]

    with pytest.raises(ValueError, match="does not match length"):
        mutate(df, y=[1, 2])
Exemple #3
0
def test_row_number_with_groups():
    df = tibble(x=[3, 3, 4, 4]).group_by("x")
    out = df >> mutate(n=row_number())
    assert_iterable_equal(out.n.obj, [1, 2, 1, 2])

    out = df >> mutate(n=row_number() + 1)
    assert_iterable_equal(out.n.obj, [2, 3, 2, 3])
Exemple #4
0
def test_preserves_grouping():
    gf = group_by(tibble(x=[1, 2], y=2), f.x)
    out = mutate(gf, x=1)
    assert group_vars(out) == ["x"]
    assert nrow(group_data(out)) == 1

    out = mutate(gf, z=1)
    assert group_data(out).equals(group_data(gf))
Exemple #5
0
def test_preserves_names():
    df = tibble(a=range(1, 4))
    # note it's treated as data frame
    out1 = df >> mutate(b=tibble(**dict(zip(letters[:3], [0, 1, 2]))))
    out2 = df >> mutate(b=tibble(**dict(zip(letters[:3], [[0], [1], [2]]))))

    assert_iterable_equal(out1["b"].columns, list("abc"))
    assert_iterable_equal(out2["b"].columns, list("abc"))
Exemple #6
0
def test_keep_none_only_keeps_grouping_variables():
    df = tibble(x=1, y=2)
    gf = group_by(df, f.x)

    out = mutate(df, z=1, _keep="none")
    assert out.columns.tolist() == ["z"]
    out = mutate(gf, z=1, _keep="none")
    assert out.columns.tolist() == ["x", "z"]
Exemple #7
0
def test_unnamed_data_frames_are_automatically_unspliced():
    out = tibble(a=1) >> mutate(tibble(b=2))
    assert_tibble_equal(out, tibble(a=1, b=2))

    out = tibble(a=1) >> mutate(tibble(b=2), tibble(b=3))
    assert_tibble_equal(out, tibble(a=1, b=3))

    out = tibble(a=1) >> mutate(tibble(b=2), c=f.b)
    assert_tibble_equal(out, tibble(a=1, b=2, c=2))
Exemple #8
0
def test_return_one_row():
    # not actually one row, but returns a corresponding series
    df = tibble(x=range(1, 43))
    out = df >> mutate(across(c(), as_factor))
    assert out.equals(df)

    out = df >> mutate(y=across(c(), as_factor))
    # empty column in pandas will be NAs
    assert out.y.isna().all()
Exemple #9
0
def test_deals_with_0_groups():
    df = tibble(x=[]) >> group_by(f.x)
    out = mutate(df, y=f.x + 1)
    exp = tibble(x=[], y=[]) >> group_by(f.x)
    assert_iterable_equal(out, exp)
    assert group_vars(out) == group_vars(exp)

    out = mutate(df, y=max(f.x))
    assert out.shape == (0, 2)
    assert group_vars(out) == ["x"]
Exemple #10
0
def test_cache_key():
    df = tibble(g=rep([1, 2], each=2), a=range(1, 5)) >> group_by(f.g)

    out = df >> mutate(
        tibble(
            x=across(where(is_numeric), mean).a,
            y=across(where(is_numeric), max).a,
        ))
    expect = df >> mutate(x=mean(f.a), y=max(f.a))
    assert_frame_equal(out, expect)
Exemple #11
0
def test_group_by_keeps_the_right_order_of_subdfs():
    df = (
        tibble(
            g1=["a", "b", "c", "a", "b", "c", "a", "b", "c"],
            g2=["a", "b", "c", "a", "b", "c", "a", "b", "b"],
        )
        >> mutate(x=range(9))
    )
    out = df >> group_by(f.g1, f.g2) >> mutate(x=f.x)
    assert_iterable_equal(out.x.obj, range(9))
Exemple #12
0
def test_works_on_empty_data_frames():
    df = tibble()
    res = df >> mutate()
    assert nrow(res) == 0
    assert len(res) == 0

    res = df >> mutate(x=[])
    assert res.columns.tolist() == ["x"]
    assert nrow(res) == 0
    assert ncol(res) == 1
Exemple #13
0
def test_handles_data_frame_columns():
    df = tibble(a=c(1, 2, 3), b=c(2, 3, 4), base_col=c(3, 4, 5))
    res = mutate(df, new_col=tibble(x=[1, 2, 3]))
    assert_tibble_equal(res["new_col"], tibble(x=[1, 2, 3]))

    res = mutate(group_by(df, f.a), new_col=tibble(x=f.a))
    assert_iterable_equal(res["new_col"].x.obj, [1, 2, 3])

    rf = rowwise(df, f.a)
    res = mutate(rf, new_col=tibble(x=f.a))
    assert_tibble_equal(res["new_col"], tibble(x=[1, 2, 3]) >> rowwise())
Exemple #14
0
def test_lead_lag_inside_mutates_handles_expressions_as_value_for_default():
    df = tibble(x=[1, 2, 3])
    res = mutate(df,
                 leadn=lead(f.x, default=f.x[0]),
                 lagn=lag(f.x, default=f.x[0]))
    assert_iterable_equal(res.leadn, lead(df.x, default=df.x[0]))
    assert_iterable_equal(res.lagn, lag(df.x, default=df.x[0]))

    res = mutate(df, leadn=lead(f.x, default=[1]), lagn=lag(f.x, default=[1]))
    assert_iterable_equal(res.leadn, lead(df.x, default=[1]))
    assert_iterable_equal(res.lagn, lag(df.x, default=[1]))
Exemple #15
0
def test_mutate_cols_inside_func():
    df = tibble(x=2, y=4, z=8)

    @register_func(None, context=None)
    def data_frame(**kwargs):
        return tibble(**kwargs)

    out = df >> mutate(data_frame(x=f.x / f.y, y=f.y / f.y, z=f.z / f.y))
    # df.y does not work on grouped data
    expect = df >> mutate(across(everything(), lambda col: col / df.y))
    assert out.equals(expect)
Exemple #16
0
def test_works_sequentially():

    df = tibble(a=1)
    out = df >> mutate(x=ncol(across(where(is_numeric))),
                       y=ncol(across(where(is_numeric))))
    expect = tibble(a=1, x=1, y=2)
    assert out.equals(expect)

    out = df >> mutate(a="x", y=ncol(across(where(is_numeric))))
    expect = tibble(a="x", y=0)
    assert out.equals(expect)
Exemple #17
0
def test_empty_mutate_returns_input():
    df = tibble(x=1)
    gf = group_by(df, f.x)

    out = mutate(df)
    assert out.equals(df)

    out = mutate(gf)
    assert_tibble_equal(out, gf)
    assert isinstance(gf, TibbleGrouped)
    assert group_vars(out) == ["x"]
Exemple #18
0
def test_can_use_before_and_after_to_control_column_position():
    df = tibble(x=1, y=2)
    out = mutate(df, z=1)
    assert out.columns.tolist() == ["x", "y", "z"]
    out = mutate(df, z=1, _before=1)
    assert out.columns.tolist() == ["x", "z", "y"]
    out = mutate(df, z=1, _after=0)
    assert out.columns.tolist() == ["x", "z", "y"]

    df = tibble(x=1, y=2)
    out = mutate(df, x=1, _after=f.y)
    assert out.columns.tolist() == ["x", "y"]
Exemple #19
0
def test_if_any_all_enforce_bool():
    d = tibble(x=10, y=10)
    out = d >> filter(if_all(f[f.x:f.y], identity))
    assert_frame_equal(out, d)

    out = d >> filter(if_any(f[f.x:f.y], identity))
    assert_frame_equal(out, d)

    out = d >> mutate(ok=if_all(f[f.x:f.y], identity))
    assert_frame_equal(out, mutate(d, ok=True))

    out = d >> mutate(ok=if_any(f[f.x:f.y], identity))
    assert_frame_equal(out, mutate(d, ok=True))
Exemple #20
0
def test_cur_data_all_sequentially():
    df = tibble(a=1)
    out = df >> mutate(
        x=cur_data().transform(ncol), y=cur_data().transform(ncol)
    )
    expect = tibble(a=1, x=1, y=2)
    assert out.equals(expect)

    gf = tibble(a=1, b=2) >> group_by(f.a)
    out = gf >> mutate(
        x=cur_data_all().transform(ncol), y=cur_data_all().transform(ncol)
    )
    expect = tibble(a=1, b=2, x=2, y=3)
    assert out.equals(expect)
Exemple #21
0
def test_applied_progressively():
    df = tibble(x=1)
    out = df >> mutate(y=f['x'] + 1, z=f.y + 1)
    assert_tibble_equal(out, tibble(x=1, y=2, z=3))

    out = df >> mutate(y=f.x + 1, x=f.y + 1)
    assert_tibble_equal(out, tibble(x=3, y=2))

    out = df >> mutate(x=2, y=f.x)
    assert_tibble_equal(out, tibble(x=2, y=2))

    df = tibble(x=1, y=2)
    out1 = df >> mutate(x2=f.x, x3=f.x2 + 1)
    out2 = df >> mutate(x2=f.x + 0, x3=f.x2 + 1)
    assert_tibble_equal(out1, out2)
Exemple #22
0
def test_attrgetter():
    df = tibble(x=list("abc"))

    out = df >> mutate(y=attrgetter(f.x, "str").upper())
    assert_iterable_equal(out.y, ["A", "B", "C"])

    out = df >> mutate(y=pd_str(f.x).upper())
    assert_iterable_equal(out.y, ["A", "B", "C"])

    gf = df >> group_by(g=1)
    out = gf >> mutate(y=attrgetter(f.x, "str").upper())
    assert_iterable_equal(out.y.obj, ["A", "B", "C"])

    out = gf >> mutate(y=pd_str(f.x).upper())
    assert_iterable_equal(out.y.obj, ["A", "B", "C"])
Exemple #23
0
def test_rowwise_preserved_by_major_verbs():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    out = arrange(rf, f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = filter(rf, f.x < 3)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = mutate(rf, x=f.x + 1)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename(rf, X=f.x)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]

    out = select(rf, "x")
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = slice(rf, c(0, 0))
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    # Except for summarise
    out = summarise(rf, z=mean(f.x, f.y))
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]
Exemple #24
0
def test_mutate_internally():
    df = tibble(g=c(1, 2), x=c(1, 2))

    df1 = df >> distinct(aa=f.g * 2)
    df2 = df >> mutate(aa=f.g * 2) >> distinct(f.aa)

    assert df1.equals(df2)
Exemple #25
0
def test_order_by():
    df = tibble(x=f[1:6])
    out = df >> mutate(y=order_by(f[5:], cumsum(f.x)))
    assert_iterable_equal(out.y, [15, 14, 12, 9, 5])

    with pytest.raises(ValueError):
        order_by(seq(5, 1), cumsum(seq(1, 5)))
Exemple #26
0
def test_0col_df_in_results_ignored():
    df1 = tibble(x=[1, 2])
    df2 = df1 >> group_by(f.x) >> summarise(tibble())
    assert df2.equals(df1)

    df2 = df1 >> group_by(f.x) >> summarise(tibble(), y=65)
    df3 = df1 >> mutate(y=65)
    assert df2.equals(df3)

    df2 = tibble(x=[1, 2], y=[3, 4])
    df3 = df2 >> group_by(f.x) >> summarise(tibble())
    assert df3.equals(df1)

    df3 = df2 >> group_by(f.x) >> summarise(tibble(), z=98)
    df4 = df1 >> mutate(z=98)
    assert df3.equals(df4)
Exemple #27
0
def read_bed(bedfile, bedidx):
    """Read BED file."""
    _log("- Reading BED file:", bedfile)
    ofile = outfile.parent / f"_{stems[bedidx]}.bed"
    df = pandas.read_csv(bedfile, sep="\t", header=None)
    header = [
        "chrom",
        "start",
        "end",
        "name",
        "score",
        "strand",
        "thickStart",
        "thickEnd",
        "itemRgb",
        "blockCount",
        "blockSizes",
        "blockStarts",
    ]
    df.columns = header[:len(df.columns)]
    if "score" in df.columns and bedidx not in ignore_scores:
        ofile = bedfile
    else:
        df = df >> mutate(score=f.end - f.start)
        df.to_csv(ofile, sep="\t", index=False, header=False)

    return ofile
Exemple #28
0
def test_zero_row_dfs():
    df = tibble(a=[], b=[], g=[])
    dfg = group_by(df, f.g, _drop=False)
    assert dfg.shape == (0, 3)
    assert group_vars(dfg) == ["g"]
    assert group_size(dfg) == []

    x = summarise(dfg, n=n())
    assert x.shape == (0, 2)
    assert group_vars(x) == []

    x = mutate(dfg, c=f.b + 1)
    assert x.shape == (0, 4)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = filter(dfg, f.a == 100)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = arrange(dfg, f.a, f.g)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = select(dfg, f.a)
    assert x.shape == (0, 2)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []
Exemple #29
0
def test_nb_fail():
    from datar.datasets import iris

    out = iris >> mutate(
        across(
            where(is_double) & ~c(f["Petal_Length"], f["Petal_Width"]), round))
    rows = out >> nrow()
    assert rows == 150
Exemple #30
0
def test_if_any_all_in_mutate():
    d = tibble(x=c(1, 5, 10, 10), y=c(0, 0, 0, 10), z=c(10, 5, 1, 10))
    res = d >> mutate(
        any=if_any(f[f.x:], lambda x: x > 8),
        all=if_all(f[f.x:f.any], lambda x: x > 8),
    )
    assert_iterable_equal(res["any"], [True, False, True, True])
    assert_iterable_equal(res["all"], [False, False, False, True])