Esempio n. 1
0
def test_can_nest_multiple_columns():
    df = tibble(x=1, a1=1, a2=2, b1=1, b2=2)
    out = df >> nest(a=c(f.a1, f.a2), b=c(f.b1, f.b2))

    assert out.columns.tolist() == ['x', 'a', 'b']
    assert_frame_equal(out.a.values[0], df[['a1', 'a2']])
    assert_frame_equal(out.b.values[0], df[['b1', 'b2']])
Esempio n. 2
0
def test_set_operations_reconstruct_grouping_metadata():
    # test_that("set operations reconstruct grouping metadata (#3587)", {
    df1 = tibble(x=seq(1, 4), g=rep([1, 2], each=2)) >> group_by(f.g)
    df2 = tibble(x=seq(3, 6), g=rep([2, 3], each=2))

    out = setdiff(df1, df2)
    exp = filter(df1, f.x < 3)
    assert out.equals(exp)

    out = intersect(df1, df2)
    exp = filter(df1, f.x >= 3).reset_index(drop=True)
    assert_frame_equal(out, exp)

    out = union(df1, df2)
    exp = tibble(x=seq(1, 6), g=rep([1, 2, 3], each=2)) >> group_by(f.g)
    assert out.equals(exp)
    assert group_vars(out) == group_vars(exp)

    out = setdiff(df1, df2) >> group_rows()
    assert out == [[0, 1]]

    out = intersect(df1, df2) >> group_rows()
    assert out == [[0, 1]]

    out = union(df1, df2) >> group_rows()
    assert out == [[0, 1], [2, 3], [4, 5]]
Esempio n. 3
0
def test_rows_insert(data):
    out = rows_insert(data, tibble(a=4, b="z"), by="a")
    exp = tibble(a=seq(1, 4), b=c("a", "b", NA, "z"), c=c(0.5, 1.5, 2.5, NA))
    assert_frame_equal(out, exp)

    with pytest.raises(ValueError, match="insert duplicate"):
        rows_insert(data, tibble(a=3, b="z"), by="a")
Esempio n. 4
0
def test_set_operations_keep_the_ordering_of_the_data():
    # test_that("set operations keep the ordering of the data (#3839)", {
    rev_df = lambda df: df >> get(rev(seq_len(nrow(df))) - 1)

    df1 = tibble(x=seq(1, 4), g=rep([1, 2], each=2))
    df2 = tibble(x=seq(3, 6), g=rep([2, 3], each=2))

    out = setdiff(df1, df2)
    exp = filter(df1, f.x < 3)
    assert out.equals(exp)

    out = setdiff(rev_df(df1), df2)
    exp = filter(rev_df(df1), f.x < 3).reset_index(drop=True)
    assert out.equals(exp)

    out = intersect(df1, df2)
    exp = filter(df1, f.x >= 3).reset_index(drop=True)
    assert out.equals(exp)

    out = intersect(rev_df(df1), df2)
    exp = filter(rev_df(df1), f.x >= 3).reset_index(drop=True)
    assert out.equals(exp)

    out = union(df1, df2)
    exp = tibble(x=seq(1, 6), g=rep([1, 2, 3], each=2))
    assert out.equals(exp)

    out = union(rev_df(df1), df2)
    exp = tibble(x=c(seq(4, 1), [5, 6]), g=rep([2, 1, 3], each=2))
    # assert out.equals(exp)
    assert_frame_equal(out, exp)

    out = union(df1, rev_df(df2))
    exp = tibble(x=c(seq(1, 4), [6, 5]), g=rep([1, 2, 3], each=2))
    assert out.equals(exp)
Esempio n. 5
0
def test_slice_works_with_grouped_data():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)

    res = slice(g, f[:2])
    exp = filter(g, row_number() < 3)
    assert_frame_equal(res, exp)

    res = slice(g, ~f[:2])
    exp = filter(g, row_number() >= 3)
    assert_tibble_equal(res, exp)

    g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x)
    # out = group_keys(slice(g, 3, _preserve=True))
    # assert out.x.tolist() == [1, 2]
    out = group_keys(slice(g, 2, _preserve=False))
    assert out.x.tolist() == [2]

    gf = tibble(x=f[1:4]) >> group_by(
        g=Categorical([1, 1, 2], categories=[1, 2, 3]),
        _drop=False,
    )
    with pytest.raises(TypeError):
        gf >> slice("a")
    with pytest.raises(ValueError):
        gf >> slice(~f[:2], 1)

    out = gf >> slice(0)
    assert out.shape[0] == 2

    out = gf >> slice(
        Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index))
    assert_iterable_equal(out.x.obj, [2, 3])
Esempio n. 6
0
def test_unchopping_null_inputs_are_dropped():
    df = tibble(x=f[1:5], y=[NULL, [1, 2], 4, NULL], z=[NULL, [1, 2], NULL, 5])
    out = df >> unchop(c(f.y, f.z), dtypes=float)
    assert_frame_equal(
        out,
        tibble(x=[2, 2, 3, 4], y=[1, 2, 4, NA], z=[1, 2, NA, 5],
               _dtypes=float))
Esempio n. 7
0
def test_can_pack_multiple_columns():
    df = tibble(a1=1, a2=2, b1=1, b2=2)
    out = df >> pack(a=c(f.a1, f.a2), b=c(f.b1, f.b2))

    assert_iterable_equal(colnames(out), ['a', 'b'])
    assert_frame_equal(out >> pull(f.a), df[['a1', 'a2']])
    assert_frame_equal(out >> pull(f.b), df[['b1', 'b2']])
Esempio n. 8
0
def test_scale():

    out = [1, 2, 3] >> scale()
    assert_iterable_equal(out, [-1.0, 0.0, 1.0])
    # assert_iterable_equal(out.attrs["scaled:center"], [2])
    # assert_iterable_equal(out.attrs["scaled:scale"], [1])

    out = scale([1, 2, 3], center=1)
    assert_iterable_equal(out, [0.0, 0.6324555, 1.2649111], approx=True)
    # assert_iterable_equal(out.attrs["scaled:center"], [1])
    # assert_iterable_equal(out.attrs["scaled:scale"], [1.581139], approx=True)

    out = [1, 2, 3] >> scale(scale=1)
    assert_iterable_equal(out, [-1.0, 0.0, 1.0])
    # assert_iterable_equal(out.attrs["scaled:center"], [2])
    # assert_iterable_equal(out.attrs["scaled:scale"], [1])

    with pytest.raises(ValueError):
        scale([1, 2, 3], center=[1, 2])
    with pytest.raises(ValueError):
        [1, 2, 3] >> scale(scale=[1, 2])

    df = tibble(x=[1, 2, 3], y=[4, 5, 6])
    assert_frame_equal(scale(df, False, False), df)

    df = tibble(x=["a", "b"])
    with pytest.raises(ValueError):
        scale(df)
Esempio n. 9
0
def test_named_dfs_are_not_flattened():
    df = tibble(x=f[1:3], y=f[1:3])
    out = expand(df, x=nesting(f.x, f.y)) >> pull(f.x)
    assert_frame_equal(out, df)

    out = crossing(x=df) >> pull(f.x)
    assert_frame_equal(out, df)
Esempio n. 10
0
def test_unchop_can_specify_dtypes():
    df = tibble(x=1, y=[[1, 2]])
    dtypes = {'y': int, 'z': int}
    # No extra columns added
    exp = tibble(x=[1, 1], y=[1, 2])
    # exp = tibble(x=[1,1], y=[1,2], z=[NA,NA])
    out = unchop(df, f.y, dtypes=dtypes)
    assert_frame_equal(out, exp)
Esempio n. 11
0
def test_filter_slice_retain_zero_group_labels(df):
    # count loses _drop=False
    out = df >> filter(f.f == 1) >> count() >> ungroup()
    expect = tibble(f=factor([1, 2, 3], levels=[1, 2, 3]), n=[2, 0, 0])
    assert_frame_equal(out, expect)

    out = df >> slice(1) >> count() >> ungroup()
    expect = tibble(f=factor([1, 2, 3], levels=[1, 2, 3]), n=[1, 1, 0])
    assert_frame_equal(out, expect)
Esempio n. 12
0
def test_handles_scalar_results():
    df1 = mtcars >> filter(min(f.mpg) > 0)
    assert df1.equals(mtcars)

    df2 = (mtcars >> group_by(f.cyl) >> filter(min(f.mpg) > 0) >> arrange(
        f.cyl, f.mpg))
    # See TibbleGrouped's Known issues
    df3 = mtcars >> group_by(f.cyl) >> arrange(f.cyl, f.mpg)
    assert_frame_equal(df2, df3)
Esempio n. 13
0
def test_slice_silently_ignores_out_of_range_values():
    res1 = slice(mtcars, c(2, 100))
    res2 = slice(mtcars, 2)
    assert_frame_equal(res1, res2)

    g = group_by(mtcars, f.cyl)
    res1 = slice(g, c(2, 100))
    res2 = slice(g, 2)
    assert_frame_equal(res1, res2)
Esempio n. 14
0
def test_unchop_empty_list():
    df = tibble(x=[], y=[])
    out = unchop(df, f.y).y.to_list()
    assert out == []

    df = tibble(x=[], y=tibble(z=[]))
    # support nested df?
    out = unchop(df, f['y$z']) >> pull(f.y)
    assert_frame_equal(out >> drop_index(), tibble(z=[]))
Esempio n. 15
0
def test_slice_handles_numeric_input():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)
    res = g >> slice(0)
    assert nrow(res) == 3
    exp = g >> filter(row_number() == 1)
    assert_frame_equal(res, exp)

    res1 = mtcars >> slice(0) >> as_tibble()
    res2 = mtcars >> filter(row_number() == 1)
    assert_frame_equal(res1, res2)
Esempio n. 16
0
def test_unchop_optionally_keep_empty_rows():
    df = tibble(
        x=[1, 2],
        y=[NULL, [1, 2]],
        # unchopping y meaning x, z will be keys and they have to be hashable
        # z = [tibble(x=[]), tibble(x=[1,2])]
    )
    out = df >> unchop(f.y, keep_empty=True)
    assert_frame_equal(
        out, tibble(x=[1, 2, 2], y=[None, 1, 2], _dtypes={'y': object}))