Example #1
0
def test_ignores_group():
    df = tibble(g=[2, 1] * 2, x=[4, 3, 2, 1])
    gf = df >> group_by(f.g)
    out = gf >> arrange(f.x)
    assert out.equals(df.iloc[[3, 2, 1, 0], :].reset_index(drop=True))

    out = gf >> arrange(f.x, _by_group=True)
    exp = df.iloc[[3, 1, 2, 0], :].reset_index(drop=True)
    assert_frame_equal(out, exp)
Example #2
0
def test_handles_scalar_results():
    df1 = mtcars >> filter(min(f.mpg) > 0)
    assert df1.equals(mtcars)

    df2 = (mtcars >> group_by(f.cyl) >> filter(min(f.mpg) > 0) >> arrange(
        f.cyl, f.mpg))
    # See TibbleGrouped's Known issues
    df3 = mtcars >> group_by(f.cyl) >> arrange(f.cyl, f.mpg)
    assert_frame_equal(df2, df3)
Example #3
0
def test_empty_returns_self():
    df = tibble(x=range(1, 11), y=range(1, 11))
    gf = df >> group_by(f.x)

    assert arrange(df).equals(df)

    out = arrange(gf)
    assert out.equals(gf)
    assert group_vars(out) == group_vars(gf)
Example #4
0
def test_errors():
    x = Series(1, name="x")
    df = tibble(x, x, _name_repair="minimal")

    with pytest.raises(NameNonUniqueError):
        df >> arrange(f.x)

    df = tibble(x=x)
    with pytest.raises(KeyError):
        df >> arrange(f.y)

    with pytest.raises(ValueError, match="Length of values"):
        df >> arrange(rep(f.x, 2))
Example #5
0
def test_preserve_order_across_groups():
    df = tibble(g=c(1, 2, 1, 2, 1), time=[5, 4, 3, 2, 1], x=f.time)
    res1 = (df >> group_by(f.g) >> filter(f.x <= 4) >> ungroup() >> arrange(
        f.g, f.time))

    res2 = (df >> arrange(f.g) >> group_by(f.g) >> filter(f.x <= 4) >>
            ungroup() >> arrange(f.g, f.time))

    res3 = (df >> filter(f.x <= 4) >> group_by(f.g) >> ungroup() >> arrange(
        f.g, f.time))
    res1.reset_index(drop=True, inplace=True)
    res2.reset_index(drop=True, inplace=True)
    res3.reset_index(drop=True, inplace=True)
    assert res1.equals(res2)
    assert res1.equals(res3)
Example #6
0
def test_slice_works_with_grouped_data():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)

    res = slice(g, f[:2])
    exp = filter(g, row_number() < 3)
    assert_frame_equal(res, exp)

    res = slice(g, ~f[:2])
    exp = filter(g, row_number() >= 3)
    assert_tibble_equal(res, exp)

    g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x)
    # out = group_keys(slice(g, 3, _preserve=True))
    # assert out.x.tolist() == [1, 2]
    out = group_keys(slice(g, 2, _preserve=False))
    assert out.x.tolist() == [2]

    gf = tibble(x=f[1:4]) >> group_by(
        g=Categorical([1, 1, 2], categories=[1, 2, 3]),
        _drop=False,
    )
    with pytest.raises(TypeError):
        gf >> slice("a")
    with pytest.raises(ValueError):
        gf >> slice(~f[:2], 1)

    out = gf >> slice(0)
    assert out.shape[0] == 2

    out = gf >> slice(
        Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index))
    assert_iterable_equal(out.x.obj, [2, 3])
Example #7
0
def test_rowwise_preserved_by_major_verbs():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    out = arrange(rf, f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = filter(rf, f.x < 3)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = mutate(rf, x=f.x + 1)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename(rf, X=f.x)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]

    out = select(rf, "x")
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = slice(rf, c(0, 0))
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    # Except for summarise
    out = summarise(rf, z=mean(f.x, f.y))
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]
Example #8
0
def test_zero_row_dfs():
    df = tibble(a=[], b=[], g=[])
    dfg = group_by(df, f.g, _drop=False)
    assert dfg.shape == (0, 3)
    assert group_vars(dfg) == ["g"]
    assert group_size(dfg) == []

    x = summarise(dfg, n=n())
    assert x.shape == (0, 2)
    assert group_vars(x) == []

    x = mutate(dfg, c=f.b + 1)
    assert x.shape == (0, 4)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = filter(dfg, f.a == 100)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = arrange(dfg, f.a, f.g)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = select(dfg, f.a)
    assert x.shape == (0, 2)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []
Example #9
0
def test_0_groups_arrange():
    df = tibble(x=1).loc[[], :] >> group_by(f.x)
    res = df >> arrange(f.x)
    d1 = df >> dim()
    d2 = res >> dim()
    assert d1 == d2
    assert df.columns.tolist() == res.columns.tolist()
Example #10
0
def test_slice_handles_numeric_input():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)
    res = g >> slice(0)
    assert nrow(res) == 3
    exp = g >> filter(row_number() == 1)
    assert_frame_equal(res, exp)

    res1 = mtcars >> slice(0) >> as_tibble()
    res2 = mtcars >> filter(row_number() == 1)
    assert_frame_equal(res1, res2)
Example #11
0
def test_across():
    df = tibble(x=[1, 3, 2, 1], y=[4, 3, 2, 1])

    out = df >> arrange(across())
    expect = df >> arrange(f.x, f.y)
    assert out.equals(expect)

    out = df >> arrange(across(None, desc))
    expect = df >> arrange(desc(f.x), desc(f.y))
    assert out.equals(expect)

    out = df >> arrange(across(f.x))
    expect = df >> arrange(f.x)
    assert out.equals(expect)

    out = df >> arrange(across(f.y))
    expect = df >> arrange(f.y)
    assert out.equals(expect)
Example #12
0
def test_update_grouping():
    df = tibble(g=[2, 2, 1, 1], x=[1, 3, 2, 4])
    res = df >> group_by(f.g) >> arrange(f.x)
    assert isinstance(res, TibbleGrouped)
    assert group_rows(res) == [[0, 2], [1, 3]]
Example #13
0
def test_slice_works_fine_if_n_gt_nrow():
    by_slice = mtcars >> arrange(f.cyl) >> group_by(f.cyl)
    slice_res = by_slice >> slice(7)
    filter_res = by_slice >> filter(row_number() == 8)
    assert slice_res.equals(filter_res)
Example #14
0
def test_complex_cols():
    df = tibble(x=[1, 2, 3], y=[3 + 2j, 2 + 2j, 1 + 2j])
    out = df >> arrange(f.y)
    assert_iterable_equal(out.x, [3, 2, 1])
Example #15
0
def test_df_cols():
    df = tibble(x=[1, 2, 3], y=tibble(z=[3, 2, 1]))
    out = df >> arrange(f.y)
    expect = tibble(x=[3, 2, 1], y=tibble(z=[1, 2, 3]))
    assert out.reset_index(drop=True).equals(expect)
Example #16
0
def test_na_end():
    df = tibble(x=c(4, 3, NA))  # NA makes it float
    out = df >> arrange(f.x)
    assert_iterable_equal(out.x, [3, 4, None])
    out = df >> arrange(desc(f.x))
    assert_iterable_equal(out.x, [4, 3, None])
Example #17
0
def test_sort_empty_df():
    df = tibble()
    out = df >> arrange()
    assert_tibble_equal(out, df)
Example #18
0
def test_filter_restructures_group_data_correctly():
    df = (mtcars >> arrange(f.gear) >> group_by(
        f.cyl) >> mutate(cum=f.drat.cumsum()) >> filter(f.cum >= 5) >>
          mutate(ranking=f.cum.rank()))
    assert nrow(df) == 29