Exemple #1
0
def test_ignores_null_empty():
    df = tibble(a=1)
    out = df >> bind_rows(NULL)
    assert out.equals(df)

    df0 = tibble()
    out = df >> bind_rows(df0)
    assert out.equals(df)

    # no rows
    df_no_rows = df.iloc[[], :]
    out = df >> bind_rows(df_no_rows)
    assert out.equals(df)

    # no cols
    df_no_cols = df.iloc[:, []]
    out = df >> bind_rows(df_no_cols)
    rows = out >> nrow()
    assert rows == 2

    val = out.fillna(1234) >> get(1, f.a)
    assert val == 1234

    out = df_no_cols >> bind_rows(df)
    rows = out >> nrow()
    assert rows == 2

    val = out.fillna(888) >> get(0, f.a)
    assert val == 888
Exemple #2
0
def test_can_recycle_when_adding_rows():
    iris_new = add_row(iris, Sepal_Length=[-1, -2], Species="unknown")
    assert nrow(iris_new) == nrow(iris) + 2
    assert_iterable_equal(iris_new.Sepal_Length,
                          iris.Sepal_Length.tolist() + [-1, -2])
    assert_iterable_equal(iris_new.Species,
                          iris.Species.tolist() + ["unknown"] * 2)
Exemple #3
0
def test_can_add_row():
    df_all_new = add_row(df_all, a=4, b=3)
    assert df_all_new.columns.tolist() == df_all.columns.tolist()
    assert nrow(df_all_new) == nrow(df_all) + 1
    assert_iterable_equal(df_all_new.a, [1.0, 2.5, NA, 4])
    assert_iterable_equal(df_all_new.b, [1.0, 2.0, NA, 3.0])
    assert_iterable_equal(df_all_new.c, [True, False, NA, NA])
Exemple #4
0
def test_joins_matches_nas_by_default():
    # test_that("joins matches NAs by default (#892, #2033)", {
    df1 = tibble(x=c(None, 1))
    df2 = tibble(x=c(None, 2))

    assert nrow(inner_join(df1, df2, by=f.x)) == 1
    assert nrow(semi_join(df1, df2, by=f.x)) == 1
Exemple #5
0
def test_with_no_args_returns_nothing():
    empty = select(mtcars)
    assert ncol(empty) == 0
    assert nrow(empty) == 32

    empty = select(mtcars, **{})
    assert ncol(empty) == 0
    assert nrow(empty) == 32
Exemple #6
0
def test_grouped_filter_handles_indices():
    res = iris >> group_by(f.Species) >> filter(f.Sepal_Length > 5)
    res2 = res >> mutate(Petal=f.Petal_Width * f.Petal_Length)

    assert nrow(res) == nrow(res2)
    grows1 = group_rows(res)
    grows2 = group_rows(res2)
    assert grows1 == grows2
    assert all(group_keys(res) == group_keys(res2))
Exemple #7
0
def test_works_on_empty_data_frames():
    df = tibble()
    res = df >> mutate()
    assert nrow(res) == 0
    assert len(res) == 0

    res = df >> mutate(x=[])
    assert res.columns.tolist() == ["x"]
    assert nrow(res) == 0
    assert ncol(res) == 1
Exemple #8
0
def test_contains():
    df = tibble(a=c("a", "b", "ab"), g=c(1, 1, 2))

    res = df >> filter(is_element(f.a, letters))
    rows = nrow(res)
    assert rows == 2

    res = df >> group_by(f.g) >> filter(is_element(f.a, letters))
    rows = nrow(res)
    assert rows == 2
Exemple #9
0
def test_min_and_max_ignore_nas():
    df = tibble(id=range(1, 5), x=c(2, NA, 1, 2), y=[NA] * 4)
    out = df >> slice_min(f.x, n=2)
    assert out.id.tolist() == [3, 1, 4]
    out = df >> slice_min(f.y, n=2) >> nrow()
    assert out == 0
    out = df >> slice_max(f.x, n=2)
    assert out.id.tolist() == [1, 4]
    out = df >> slice_max(f.y, n=2) >> nrow()
    assert out == 0
Exemple #10
0
def test_0_vars(df):
    gdata = group_data(group_by(iris))
    assert names(gdata) == ["_rows"]
    out = gdata
    assert_iterable_equal(out._rows[0], range(nrow(iris)))

    gdata = group_data(group_by(iris, **{}))
    assert names(gdata) == ["_rows"]
    out = gdata
    assert_iterable_equal(out._rows[0], range(nrow(iris)))
Exemple #11
0
def test_slice_handles_na():
    df = tibble(x=[1, 2, 3])
    assert nrow(slice(df, NA)) == 0
    assert nrow(slice(df, c(1, NA))) == 1
    out = df >> slice(c(~c(1), NA)) >> nrow()
    assert out == 2

    df = tibble(x=[1, 2, 3, 4], g=rep([1, 2], 2)) >> group_by(f.g)
    assert nrow(slice(df, c(1, NA))) == 2
    out = df >> slice(c(~c(1), NA)) >> nrow()
    assert out == 2
Exemple #12
0
def test_min_and_max_return_ties_by_default():
    df = tibble(x=c(1, 1, 1, 2, 2))

    out = df >> slice_min(f.x) >> nrow()
    assert out == 3
    out = df >> slice_max(f.x) >> nrow()
    assert out == 2

    out = df >> slice_min(f.x, with_ties=False) >> nrow()
    assert out == 1
    out = df >> slice_max(f.x, with_ties=False) >> nrow()
    assert out == 1
Exemple #13
0
def test_functions_silently_truncate_results():
    df = tibble(x=range(1, 6))
    out = df >> slice_head(n=6) >> nrow()
    assert out == 5
    out = df >> slice_tail(n=6) >> nrow()
    assert out == 5
    out = df >> slice_sample(n=6) >> nrow()
    assert out == 5
    out = df >> slice_min(f.x, n=6) >> nrow()
    assert out == 5
    out = df >> slice_max(f.x, n=6) >> nrow()
    assert out == 5
Exemple #14
0
def test_rowid_to_column():
    # test_that("rowid_to_column keeps the tbl classes", {
    res = rowid_to_column(mtcars)
    assert not has_rownames(res)
    assert_iterable_equal(res.rowid, seq_len(nrow(mtcars)) - 1)
    with pytest.raises(ValueError, match="duplicated"):
        rowid_to_column(mtcars, f.wt)

    res1 = rowid_to_column(mtcars, "row_id")
    assert not has_rownames(res1)
    assert_iterable_equal(res1.row_id, seq_len(nrow(mtcars)) - 1)
    with pytest.raises(ValueError, match="duplicated"):
        rowid_to_column(res1, f.wt)
Exemple #15
0
def test_one_group_for_NA():
    x = c(NA, NA, NA, range(10, 0, -1), range(10, 0, -1))
    w = numpy.array(c(20, 30, 40, range(1, 11), range(1, 11))) * 10

    assert n_distinct(x, na_rm=False) == 11
    res = tibble(x=x, w=w) >> group_by(f.x) >> summarise(n=n())
    assert nrow(res) == 11
Exemple #16
0
def test_group_modify_makes_a_grouped_df():
    res = group_by(mtcars, f.cyl) >> group_modify(lambda df: head(df, 2))
    assert nrow(res) == 6
    assert group_rows(res) == [[0, 1], [2, 3], [4, 5]]

    res = (iris >> group_by(f.Species) >> filter(f.Species == "setosa") >>
           group_modify(lambda df: tally(df)))
    assert nrow(res) == 1
    assert group_rows(res) == [[0]]

    res = (iris >> group_by(f.Species, _drop=False) >>
           filter(f.Species == "setosa") >> group_modify(lambda df: tally(df)))
    # assert nrow(res) == 3
    assert nrow(res) == 1
    # assert group_rows(res) == [[0], [1], [2]]
    assert group_rows(res) == [[0]]
Exemple #17
0
def test_complex():
    df1 = tibble(r=[1 + 1j, 2 - 1j])
    df2 = tibble(r=[1 - 1j, 2 + 1j])
    df3 = df1 >> bind_rows(df2)
    out = df3 >> nrow()
    assert out == 4
    assert df3.r.tolist() == df1.r.tolist() + df2.r.tolist()
Exemple #18
0
def test_hierachical_data():
    my_list = [dict(x=1, y="a"), dict(x=2, y="b")]
    res = my_list >> bind_rows()
    rows = nrow(res)
    assert rows == 2
    out = is_int(res.x)
    assert out
    out = is_character(res.y)
    assert out

    res = dict(x=1, y="a") >> bind_rows(dict(x=2, y="b"))
    rows = nrow(res)
    assert rows == 2
    out = is_int(res.x)
    assert out
    out = is_character(res.y)
    assert out
Exemple #19
0
def test_drop():
    res = (
        iris
        >> filter(f.Species == "setosa")
        >> group_by(f.Species, _drop=True)
    )
    out = res >> count() >> nrow()
    assert out == 1
Exemple #20
0
def test_nb_fail():
    from datar.datasets import iris

    out = iris >> mutate(
        across(
            where(is_double) & ~c(f["Petal_Length"], f["Petal_Width"]), round))
    rows = out >> nrow()
    assert rows == 150
Exemple #21
0
def test_preserves_grouping():
    gf = group_by(tibble(x=[1, 2], y=2), f.x)
    out = mutate(gf, x=1)
    assert group_vars(out) == ["x"]
    assert nrow(group_data(out)) == 1

    out = mutate(gf, z=1)
    assert group_data(out).equals(group_data(gf))
Exemple #22
0
def test_slice_handles_numeric_input():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)
    res = g >> slice(0)
    assert nrow(res) == 3
    exp = g >> filter(row_number() == 1)
    assert_frame_equal(res, exp)

    res1 = mtcars >> slice(0) >> as_tibble()
    res2 = mtcars >> filter(row_number() == 1)
    assert_frame_equal(res1, res2)
Exemple #23
0
def test_group_modify_map_want_functions_with_at_least_1_arg():
    head1 = lambda df: head(df, 1)
    g = iris >> group_by(f.Species)
    assert nrow(group_modify(g, head1)) == 3
    assert len(list(group_map(g, head1))) == 3

    head_err = lambda: 1
    with pytest.raises(TypeError):
        group_modify(g, head_err)
    with pytest.raises(TypeError):
        group_map.list(g, head_err)  # force function to execute
Exemple #24
0
def test_complex_expression_as_value():
    # https://stackoverflow.com/questions/30714810/
    # pandas-group-by-and-aggregate-column-1-with-condition-from-column-2
    dat = (tibble(
        user=rep(c("1", 2, 3, 4), each=5),
        cancel_date=rep(c(12, 5, 10, 11), each=5),
    ) >> group_by(f.user))
    out = dat >> mutate(
        # mulitple size not supported yet
        # login=sample(f[1 : ], size=n(), replace=True)
        login=sample(f[1:], size=1, replace=True))
    assert nrow(out) == 20
Exemple #25
0
def test_slice_family_on_rowwise_df():
    df = tibble(x=f[1:6]) >> rowwise()
    out = df >> slice_head(prop=0.1)
    assert out.shape[0] == 0

    out = df >> slice([0, 1, 2])
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_head(n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_tail(n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_min(f.x, n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_max(f.x, n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_sample(n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5
Exemple #26
0
def test_slice_accepts_star_args():
    out1 = slice(mtcars, 1, 2)
    out2 = slice(mtcars, [1, 2])
    assert out1.equals(out2)

    out3 = slice(mtcars, 0, n() - 1)
    out4 = slice(mtcars, c(0, nrow(mtcars) - 1))
    assert out3.equals(out4)

    g = mtcars >> group_by(f.cyl)
    out5 = slice(g, 0, n() - 1)
    out6 = slice(g, c(0, n() - 1))
    assert out5.equals(out6)
Exemple #27
0
def test_nb_fail_c_across():
    df = tibble(
        id=[1, 2, 3, 4],
        k=["a", "b", "c", "d"],
        w=runif(4),
        x=runif(4),
        y=runif(4),
        z=runif(4),
    )
    out = (df >> rowwise() >> mutate(sum=sum(c_across(f[f.w:])),
                                     sd=sd(c_across(f[f.w:]))))

    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 4
Exemple #28
0
def test_rowwise():
    @register_func(None)
    def grepl(a, b):
        return Series([x in y for x, y in zip(a.obj, b.obj)],
                      index=a.obj.index)

    df = tibble(
        First=c("string1", "string2"),
        Second=c("Sentence with string1", "something"),
    )
    res = df >> rowwise() >> filter(grepl(f.First, f.Second))
    assert nrow(res) == 1

    df1 = df >> slice(0)
    df2 = res >> ungroup()
    assert df1.equals(df2)
Exemple #29
0
def test_proportion_computed_correctly():
    df = tibble(x=range(1, 11))

    out = df >> slice_head(prop=0.11) >> nrow()
    assert out == 1
    out = df >> slice_tail(prop=0.11) >> nrow()
    assert out == 1
    out = df >> slice_sample(prop=0.11) >> nrow()
    assert out == 1
    out = df >> slice_min(f.x, prop=0.11) >> nrow()
    assert out == 1
    out = df >> slice_max(f.x, prop=0.11) >> nrow()
    assert out == 1
    out = df >> slice_max(f.x, prop=0.11, with_ties=False) >> nrow()
    assert out == 1
    out = df >> slice_min(f.x, prop=0.11, with_ties=False) >> nrow()
    assert out == 1
Exemple #30
0
def test_column_to_rownames(caplog):
    var = "var"
    assert has_rownames(mtcars)
    res0 = rownames_to_column(mtcars, var)
    res = column_to_rownames(res0, var)
    assert caplog.text == ""
    assert has_rownames(res)
    assert_iterable_equal(rownames(res), rownames(mtcars))
    assert_frame_equal(res, mtcars)
    # has_name is not a public API
    #   expect_false(has_name(res, var))

    mtcars1 = mtcars.copy()
    mtcars1["num"] = rev(seq_len(nrow(mtcars))) - 1
    res0 = rownames_to_column(mtcars1)
    res = column_to_rownames(res0, var="num")
    assert caplog.text == ""
    assert has_rownames(res)
    assert_iterable_equal(rownames(res), as_character(mtcars1.num))
    with pytest.raises(ValueError):
        column_to_rownames(res)
    with pytest.raises(KeyError):
        column_to_rownames(rownames_to_column(mtcars1, var), "num2")