Esempio n. 1
0
def test_can_move_blocks_of_vars():
    df = tibble(x=1, a="a", y=2, b="a")
    out = df >> relocate(where(is_string_dtype))
    assert out.columns.tolist() == ["a", "b", "x", "y"]

    out = df >> relocate(where(is_string_dtype),
                         _after=where(is_numeric_dtype))
    assert out.columns.tolist() == ["x", "y", "a", "b"]
Esempio n. 2
0
def test_cache_key():
    df = tibble(g=rep([1, 2], each=2), a=range(1, 5)) >> group_by(f.g)

    out = df >> mutate(
        tibble(
            x=across(where(is_numeric), mean).a,
            y=across(where(is_numeric), max).a,
        ))
    expect = df >> mutate(x=mean(f.a), y=max(f.a))
    assert_frame_equal(out, expect)
Esempio n. 3
0
def test_works_sequentially():

    df = tibble(a=1)
    out = df >> mutate(x=ncol(across(where(is_numeric))),
                       y=ncol(across(where(is_numeric))))
    expect = tibble(a=1, x=1, y=2)
    assert out.equals(expect)

    out = df >> mutate(a="x", y=ncol(across(where(is_numeric))))
    expect = tibble(a="x", y=0)
    assert out.equals(expect)
Esempio n. 4
0
def test_error_messages():
    with pytest.raises(ValueError, match="Argument `_fns` of across must be"):
        tibble(x=1) >> summarise(res=across(where(is_numeric), 42))
    with pytest.raises(ValueError, match="must only be used inside verbs"):
        across()
    with pytest.raises(ValueError, match="must only be used inside verbs"):
        c_across()
Esempio n. 5
0
def test_nb_fail():
    from datar.datasets import iris

    out = iris >> mutate(
        across(
            where(is_double) & ~c(f["Petal_Length"], f["Petal_Width"]), round))
    rows = out >> nrow()
    assert rows == 150
Esempio n. 6
0
def test_tidyselect_funs():
    # tidyselect.where
    def isupper(ser):
        return ser.name.isupper()

    df = tibble(x=1, X=2, y=3, Y=4)
    out = df >> select(where(isupper))
    assert out.columns.tolist() == ["X", "Y"]

    @register_verb
    def islower(_data, series):
        return [series.name.islower(), True]

    out = df >> select(where(islower))
    assert out.columns.tolist() == ["x", "y"]

    out = df >> select(where(lambda x: False))
    assert out.shape == (1, 0)

    out = df >> select(ends_with("y"))
    assert out.columns.tolist() == ["y", "Y"]
    out = df >> select(contains("y"))
    assert out.columns.tolist() == ["y", "Y"]

    with pytest.raises(KeyError):
        df >> select(all_of(["x", "a"]))

    out = df >> select(any_of(["x", "y"]))
    assert out.columns.tolist() == ["x", "y"]
    out = df >> select(any_of(["x", "a"]))
    assert out.columns.tolist() == ["x"]

    out = num_range("a", 3, width=2)
    assert out == ["a00", "a01", "a02"]

    df = tibble(tibble(X=1), X=2, _name_repair="minimal")
    out = df >> select(contains("X"))
    assert out.columns.tolist() == ["X"]
Esempio n. 7
0
def test_names_output():
    gf = tibble(x=1, y=2, z=3, s="") >> group_by(f.x)

    out = gf >> summarise(across())
    assert out.columns.tolist() == ["x", "y", "z", "s"]

    out = gf >> summarise(across(_names="id_{_col}"))
    assert out.columns.tolist() == ["x", "id_y", "id_z", "id_s"]

    out = gf >> summarise(across(where(is_numeric), mean))
    assert out.columns.tolist() == ["x", "y", "z"]

    out = gf >> summarise(across(where(is_numeric), mean,
                                 _names="mean_{_col}"))
    assert out.columns.tolist() == ["x", "mean_y", "mean_z"]

    out = gf >> summarise(across(where(is_numeric), {
        "mean": mean,
        "sum": sum
    }))
    assert out.columns.tolist() == ["x", "y_mean", "y_sum", "z_mean", "z_sum"]

    # Different from R's list
    out = gf >> summarise(across(where(is_numeric), {"mean": mean, 1: sum}))
    assert out.columns.tolist() == ["x", "y_mean", "y_1", "z_mean", "z_1"]

    # Different from R's list
    out = gf >> summarise(across(where(is_numeric), {0: mean, "sum": sum}))
    assert out.columns.tolist() == ["x", "y_0", "y_sum", "z_0", "z_sum"]

    out = gf >> summarise(across(where(is_numeric), [mean, sum]))
    assert out.columns.tolist() == ["x", "y_0", "y_1", "z_0", "z_1"]

    out = gf >> summarise(
        across(where(is_numeric), [mean, sum], _names="{_col}_{_fn1}"))
    assert out.columns.tolist() == ["x", "y_1", "y_2", "z_1", "z_2"]

    out = gf >> summarise(
        across(
            where(is_numeric),
            {
                "mean": mean,
                "sum": sum
            },
            _names="{_fn}_{_col}",
        ))
    assert out.columns.tolist() == ["x", "mean_y", "sum_y", "mean_z", "sum_z"]
Esempio n. 8
0
def test_reject_non_vectors():
    with pytest.raises(ValueError, match="Argument `_fns` of across must be"):
        tibble(x=1) >> summarise(across(where(is_numeric), object()))
Esempio n. 9
0
def test_used_separately():
    df = tibble(a=1, b=2)
    out = df >> mutate(x=ncol(across(where(is_numeric))), y=ncol(across(f.a)))
    expect = tibble(a=1, b=2, x=2, y=1)
    assert out.equals(expect)
Esempio n. 10
0
def test_used_twice():
    df = tibble(a=1, b=2)
    out = df >> mutate(x=ncol(across(where(is_numeric))) + ncol(across(f.a)))
    expect = tibble(a=1, b=2, x=3)
    assert out.equals(expect)
Esempio n. 11
0
def test_keep_used_not_affected_by_across():
    df = tibble(x=1, y=2, z=3, a="a", b="b", c="c")
    out = df >> mutate(across(where(is_numeric), identity), _keep="unused")
    assert out.columns.tolist() == df.columns.tolist()