Exemple #1
0
def test_select_doesnot_fail_if_some_names_missing():
    df1 = tibble(x=range(1, 11), y=range(1, 11), z=range(1, 11))
    df2 = colnames(df1, ["x", "y", ""])

    out1 = select(df1, f.x)
    assert out1.equals(tibble(x=range(1, 11)))
    out2 = select(df2, f.x)
    assert out2.equals(tibble(x=range(1, 11)))
Exemple #2
0
def test_works_on_na_names():
    df = tibble(x=1, y=2, z=3) >> colnames(c("x", "y", NA))
    out = select(df, f.x)
    assert_iterable_equal(out.x, [1])

    df >>= colnames(c(NA, "y", "z"))
    out = select(df, f.y)
    assert_iterable_equal(out.y, [2])
Exemple #3
0
def test_with_no_args_returns_nothing():
    empty = select(mtcars)
    assert ncol(empty) == 0
    assert nrow(empty) == 32

    empty = select(mtcars, **{})
    assert ncol(empty) == 0
    assert nrow(empty) == 32
Exemple #4
0
def test_select_rename_with_dup_names():
    df = tibble(tibble(x=1), x=2, _name_repair="minimal")
    with pytest.raises(
            ValueError,
            match='Names must be unique. Name "x" found at locations'):
        df >> select(y=f.x)

    with pytest.raises(KeyError):
        df >> select(y=3)
Exemple #5
0
def test_excluding_all_vars_returns_nothing():
    out = select(mtcars, ~f[f.mpg:])
    assert out.shape == (32, 0)

    out = mtcars >> select(starts_with("x"))
    assert out.shape == (32, 0)

    out = mtcars >> select(~matches("."))
    assert out.shape == (32, 0)
Exemple #6
0
def test_can_select_with_duplicate_columns():

    df = tibble(tibble(x=1), x=2, y=1, _name_repair="minimal")
    out = select(df, 0, 2)
    assert out.columns.tolist() == ["x", "y"]
    out = select(df, 2, 0)
    assert out.columns.tolist() == ["y", "x"]

    out = select(df, f.y)
    assert out.columns.tolist() == ["y"]
Exemple #7
0
def test_rowwise_preserved_by_major_verbs():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    out = arrange(rf, f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = filter(rf, f.x < 3)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = mutate(rf, x=f.x + 1)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename(rf, X=f.x)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]

    out = select(rf, "x")
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = slice(rf, c(0, 0))
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    # Except for summarise
    out = summarise(rf, z=mean(f.x, f.y))
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]
Exemple #8
0
def test_0_groups_select():
    df = tibble(x=1).loc[[], :] >> group_by(f.x)
    res = df >> select(f.x)
    d1 = df >> dim()
    d2 = res >> dim()
    assert d1 == d2
    assert df.columns.tolist() == res.columns.tolist()
Exemple #9
0
def test_zero_row_dfs():
    df = tibble(a=[], b=[], g=[])
    dfg = group_by(df, f.g, _drop=False)
    assert dfg.shape == (0, 3)
    assert group_vars(dfg) == ["g"]
    assert group_size(dfg) == []

    x = summarise(dfg, n=n())
    assert x.shape == (0, 2)
    assert group_vars(x) == []

    x = mutate(dfg, c=f.b + 1)
    assert x.shape == (0, 4)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = filter(dfg, f.a == 100)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = arrange(dfg, f.a, f.g)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = select(dfg, f.a)
    assert x.shape == (0, 2)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []
Exemple #10
0
def test_can_be_before_group_by():
    df = tibble(id=c(1, 1, 2, 2, 2, 3, 3, 4, 4, 5),
                year=c(2013, 2013, 2012, 2013, 2013, 2013, 2012, 2012, 2013,
                       2013),
                var1=rnorm(10))
    dfagg = df >> group_by(f.id, f.year) >> select(
        f.id, f.year, f.var1) >> summarise(var1=mean(f.var1))

    assert_iterable_equal(names(dfagg), ["id", "year", "var1"])
Exemple #11
0
def test_group_split_keep_false_does_not_tryto_remove_virtual_grouping_cols():
    # test_that("group_split(keep=FALSE) does not try to
    # remove virtual grouping columns (#4045)", {
    iris3 = iris.head(4).copy()
    df = group_by(iris3, _bootstrap=[0, 1, 0, 1])
    rows = [[0, 2], [1, 3]]

    res = group_split.list(df, _keep=False)
    iris3 = select(iris3, ~f._bootstrap)
    assert len(res) == 2
    assert_frame_equal(res[0], iris3.iloc[rows[0], :].reset_index(drop=True))
    assert_frame_equal(res[1], iris3.iloc[rows[1], :].reset_index(drop=True))
Exemple #12
0
def test_tidyselect_funs():
    # tidyselect.where
    def isupper(ser):
        return ser.name.isupper()

    df = tibble(x=1, X=2, y=3, Y=4)
    out = df >> select(where(isupper))
    assert out.columns.tolist() == ["X", "Y"]

    @register_verb
    def islower(_data, series):
        return [series.name.islower(), True]

    out = df >> select(where(islower))
    assert out.columns.tolist() == ["x", "y"]

    out = df >> select(where(lambda x: False))
    assert out.shape == (1, 0)

    out = df >> select(ends_with("y"))
    assert out.columns.tolist() == ["y", "Y"]
    out = df >> select(contains("y"))
    assert out.columns.tolist() == ["y", "Y"]

    with pytest.raises(KeyError):
        df >> select(all_of(["x", "a"]))

    out = df >> select(any_of(["x", "y"]))
    assert out.columns.tolist() == ["x", "y"]
    out = df >> select(any_of(["x", "a"]))
    assert out.columns.tolist() == ["x"]

    out = num_range("a", 3, width=2)
    assert out == ["a00", "a01", "a02"]

    df = tibble(tibble(X=1), X=2, _name_repair="minimal")
    out = df >> select(contains("X"))
    assert out.columns.tolist() == ["X"]
Exemple #13
0
def avg_weights_and_filter(owfiles):
    _log("- Averaging bin weights")
    ofile = outfile.parent / "_avg_weights_filtered.bed"
    df = None
    for owfile in owfiles:
        tmp = pandas.read_csv(owfile, sep="\t", header=0)
        df = df >> bind_rows(tmp)

    df = df >> group_by(f.chrom1, f.start1, f.end1) >> summarise(
        chrom=f.chrom1,
        start=f.start1,
        end=f.end1,
        name=paste(f.name, collapse=":"),
        score=mean(f.weight),
        strand="+",
    ) >> filter_(
        f.score >= cutoff
    ) >> ungroup() >> select(
        ~f.chrom1, ~f.start1, ~f.end1,
    )

    df.to_csv(ofile, sep="\t", index=False, header=False)
    return ofile, len(df.columns)
Exemple #14
0
def test_arguments_to_select_dont_match_vars_select_arguments():
    df = tibble(a=1)
    out = select(df, var=f.a)
    assert out.equals(tibble(var=1))

    out = select(group_by(df, f.a), var=f.a)
    exp = group_by(tibble(var=1), f.var)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)

    out = select(df, exclude=f.a)
    assert out.equals(tibble(exclude=1))
    out = select(df, include=f.a)
    assert out.equals(tibble(include=1))

    out = select(group_by(df, f.a), exclude=f.a)
    exp = group_by(tibble(exclude=1), f.exclude)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)

    out = select(group_by(df, f.a), include=f.a)
    exp = group_by(tibble(include=1), f.include)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)
Exemple #15
0
def test_keeps_attributes():
    df = tibble(x=1)
    df.attrs["a"] = "b"
    out = select(df, f.x)
    assert out.attrs["a"] == "b"
Exemple #16
0
def test_0_col_df():
    df = tibble(x=range(10)) >> select(~f.x)
    cols = df >> distinct() >> ncol()
    assert cols == 0
Exemple #17
0
def test_transmute_can_handle_auto_splicing():
    out = iris >> transmute(tibble(f.Sepal_Length, f.Sepal_Width))
    exp = iris >> select(f.Sepal_Length, f.Sepal_Width)
    assert out.equals(exp)
Exemple #18
0
def test_slice_works_with_0col_dfs():
    out = tibble(a=[1, 2, 3]) >> select(~f.a) >> slice(1) >> nrow()
    assert out == 1
Exemple #19
0
def test_negating_empty_match_returns_everything():
    df = tibble(x=[1, 2, 3], y=[3, 2, 1])
    out = df >> select(~starts_with("xyz"))
    assert out.equals(df)
Exemple #20
0
def test_can_select_data_pronoun():
    out = select(mtcars, mtcars.cyl)
    exp = select(mtcars, f.cyl)
    assert out.equals(exp)
Exemple #21
0
def test_non_syntactic_grouping_variable_is_preserved():
    df = DataFrame({"a b": [1]}) >> group_by("a b") >> select()
    assert df.columns.tolist() == ["a b"]
    df = DataFrame({"a b": [1]}) >> group_by(f["a b"]) >> select()
    assert df.columns.tolist() == ["a b"]
Exemple #22
0
def test_can_select_with_list_of_strs():
    out = select(mtcars, "cyl", "disp", c("cyl", "am", "drat"))
    # https://github.com/pwwang/datar/issues/23
    # exp = mtcars[c("cyl", "disp", "am", "drat")]
    exp = mtcars[["cyl", "disp", "am", "drat"]]
    assert out.equals(exp)
Exemple #23
0
def test_grouping_variables_preserved_with_a_message(caplog):
    df = tibble(g=[1, 2, 3], x=[3, 2, 1]) >> group_by(f.g)
    res = select(df, f.x)
    assert "Adding missing grouping variables" in caplog.text
    assert res.columns.tolist() == ["g", "x"]
Exemple #24
0
def test_preserves_grouping():
    gf = group_by(tibble(g=[1, 2, 3], x=[3, 2, 1]), f.g)

    out = select(gf, h=f.g)
    assert group_vars(out), ["h"]
Exemple #25
0
def test_treats_null_inputs_as_empty():
    out = select(mtcars, None, f.cyl, None)
    exp = select(mtcars, f.cyl)
    assert out.equals(exp)
Exemple #26
0
def test_can_select_with_strings():
    variabls = dict(foo="cyl", bar="am")
    out = select(mtcars, **variabls)
    exp = select(mtcars, foo=f.cyl, bar=f.am)
    assert out.equals(exp)
Exemple #27
0
def test_select_add_group_vars():
    res = mtcars >> group_by(f.vs) >> select(f.mpg)
    assert res.columns.tolist() == ["vs", "mpg"]