Beispiel #1
0
def test_slice_silently_ignores_out_of_range_values():
    res1 = slice(mtcars, c(2, 100))
    res2 = slice(mtcars, 2)
    assert_frame_equal(res1, res2)

    g = group_by(mtcars, f.cyl)
    res1 = slice(g, c(2, 100))
    res2 = slice(g, 2)
    assert_frame_equal(res1, res2)
Beispiel #2
0
def test_slice_handles_numeric_input():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)
    res = g >> slice(0)
    assert nrow(res) == 3
    exp = g >> filter(row_number() == 1)
    assert_frame_equal(res, exp)

    res1 = mtcars >> slice(0) >> as_tibble()
    res2 = mtcars >> filter(row_number() == 1)
    assert_frame_equal(res1, res2)
Beispiel #3
0
def test_slice_handles_na():
    df = tibble(x=[1, 2, 3])
    assert nrow(slice(df, NA)) == 0
    assert nrow(slice(df, c(1, NA))) == 1
    out = df >> slice(c(~c(1), NA)) >> nrow()
    assert out == 2

    df = tibble(x=[1, 2, 3, 4], g=rep([1, 2], 2)) >> group_by(f.g)
    assert nrow(slice(df, c(1, NA))) == 2
    out = df >> slice(c(~c(1), NA)) >> nrow()
    assert out == 2
Beispiel #4
0
def test_slice_handles_df_columns():
    df = tibble(x=[1, 2],
                y=tibble(a=[1, 2], b=[3, 4]),
                z=tibble(A=[1, 2], B=[3, 4]))
    out = slice(df, 0)
    assert out.equals(df.iloc[[0], :])

    gdf = group_by(df, f.x)
    assert slice(gdf, 0).equals(gdf)
    # TODO: group_by a stacked df is not supported yet
    gdf = group_by(df, f["y$a"], f["y$b"])
    assert slice(gdf, 0).equals(gdf)
    gdf = group_by(df, f["z$A"], f["z$B"])
    assert slice(gdf, 0).equals(gdf)
Beispiel #5
0
def test_slice_gives_correct_rows():
    a = tibble(value=[f"row{i}" for i in range(1, 11)])
    out = slice(a, c(0, 1, 2))
    assert out.value.tolist() == ["row1", "row2", "row3"]

    out = slice(a, c(3, 5, 8))
    assert out.value.tolist() == ["row4", "row6", "row9"]

    a = tibble(value=[f"row{i}" for i in range(1, 11)],
               group=rep([1, 2], each=5)) >> group_by(f.group)

    out = slice(a, f[:3])
    assert out.value.obj.tolist() == [f"row{i}" for i in [1, 2, 3, 6, 7, 8]]

    out = slice(a, c(1, 3))
    assert out.value.obj.tolist() == [f"row{i}" for i in [2, 4, 7, 9]]
Beispiel #6
0
def test_slice_family_on_rowwise_df():
    df = tibble(x=f[1:6]) >> rowwise()
    out = df >> slice_head(prop=0.1)
    assert out.shape[0] == 0

    out = df >> slice([0, 1, 2])
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_head(n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_tail(n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_min(f.x, n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_max(f.x, n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5

    out = df >> slice_sample(n=3)
    assert isinstance(out, TibbleRowwise)
    assert nrow(out) == 5
Beispiel #7
0
def test_rowwise_preserved_by_major_verbs():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    out = arrange(rf, f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = filter(rf, f.x < 3)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = mutate(rf, x=f.x + 1)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename(rf, X=f.x)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]

    out = select(rf, "x")
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = slice(rf, c(0, 0))
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    # Except for summarise
    out = summarise(rf, z=mean(f.x, f.y))
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]
Beispiel #8
0
def test_slice_works_with_grouped_data():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)

    res = slice(g, f[:2])
    exp = filter(g, row_number() < 3)
    assert_frame_equal(res, exp)

    res = slice(g, ~f[:2])
    exp = filter(g, row_number() >= 3)
    assert_tibble_equal(res, exp)

    g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x)
    # out = group_keys(slice(g, 3, _preserve=True))
    # assert out.x.tolist() == [1, 2]
    out = group_keys(slice(g, 2, _preserve=False))
    assert out.x.tolist() == [2]

    gf = tibble(x=f[1:4]) >> group_by(
        g=Categorical([1, 1, 2], categories=[1, 2, 3]),
        _drop=False,
    )
    with pytest.raises(TypeError):
        gf >> slice("a")
    with pytest.raises(ValueError):
        gf >> slice(~f[:2], 1)

    out = gf >> slice(0)
    assert out.shape[0] == 2

    out = gf >> slice(
        Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index))
    assert_iterable_equal(out.x.obj, [2, 3])
Beispiel #9
0
def test_rename_errors_with_invalid_grouped_df():
    df = tibble(x=[1, 2, 3])

    # Incompatible type
    with pytest.raises(TypeError):
        slice(df, object())
    with pytest.raises(TypeError):
        slice(df, {"a": 1})

    # Mix of positive and negative integers
    with pytest.raises(ValueError):
        mtcars >> slice(c(~c(1), 2))
    with pytest.raises(ValueError):
        mtcars >> slice(c(f[2:4], ~c(1)))

    # n and prop are carefully validated
    # with pytest.raises(ValueError):
    #     _n_from_prop(10, n=1, prop=1)
    with pytest.raises(TypeError):
        _n_from_prop(10, n="a")
    with pytest.raises(TypeError):
        _n_from_prop(10, prop="a")
    with pytest.raises(ValueError):
        _n_from_prop(10, n=-1)
    with pytest.raises(ValueError):
        _n_from_prop(10, prop=-1)
    with pytest.raises(TypeError):
        _n_from_prop(10, n=n())
    with pytest.raises(TypeError):
        _n_from_prop(10, prop=n())
Beispiel #10
0
def test_preserve_prop_not_support(caplog):
    df = tibble(x=f[:5]) >> group_by(f.x)
    df >> slice(f.x == 2, _preserve=True)
    assert "_preserve" in caplog.text

    with pytest.raises(ValueError):
        df >> slice_min(f.x, prop=0.5)

    with pytest.raises(ValueError):
        df >> slice_max(f.x, prop=0.5)

    with pytest.raises(ValueError):
        df >> slice_sample(f.x, prop=0.5)
Beispiel #11
0
def test_handles_df_cols():
    df = tibble(x=[1, 2], z=tibble(A=[1, 2], B=[3, 4]))
    expect = df >> slice(0)

    out = df >> filter(f.x == 1)
    assert out.equals(expect)
    out = df >> filter(f["z$A"] == 1)
    assert out.equals(expect)

    gdf = df >> group_by(f.x)

    out = gdf >> filter(f["z$A"] == 1)
    assert out.equals(expect)
    out = gdf >> filter(f["z$A"] == 1)
    assert out.equals(expect)
Beispiel #12
0
def test_mixed_rows():
    df = tibble(x=range(5))

    # order kept
    # 0   1   2   3   4
    #        -3      -1
    #             3
    out = slice(df, c(-c(3, 1), 3))
    assert out.x.tolist() == [2, 4, 3]

    # 0   1   2   3   4
    #            -2  -1
    #             3
    out = slice(df, c(-f[1:3], 3))
    assert out.x.tolist() == [4, 3, 3]

    # 0   1   2   3   4
    # 0       2
    #                -1
    out = slice(df, c(~c(0, 2), ~c(-1)))
    assert out.x.tolist() == [1, 3]

    out = df >> slice(c(~f[3:], ~c(1)))
    assert out.x.tolist() == [0, 2]
Beispiel #13
0
def test_rowwise():
    @register_func(None)
    def grepl(a, b):
        return Series([x in y for x, y in zip(a.obj, b.obj)],
                      index=a.obj.index)

    df = tibble(
        First=c("string1", "string2"),
        Second=c("Sentence with string1", "something"),
    )
    res = df >> rowwise() >> filter(grepl(f.First, f.Second))
    assert nrow(res) == 1

    df1 = df >> slice(0)
    df2 = res >> ungroup()
    assert df1.equals(df2)
Beispiel #14
0
def test_slice_accepts_star_args():
    out1 = slice(mtcars, 1, 2)
    out2 = slice(mtcars, [1, 2])
    assert out1.equals(out2)

    out3 = slice(mtcars, 0, n() - 1)
    out4 = slice(mtcars, c(0, nrow(mtcars) - 1))
    assert out3.equals(out4)

    g = mtcars >> group_by(f.cyl)
    out5 = slice(g, 0, n() - 1)
    out6 = slice(g, c(0, n() - 1))
    assert out5.equals(out6)
Beispiel #15
0
def test_slice_handles_logical_NA():
    df = tibble(x=[1, 2, 3])
    assert nrow(slice(df, NA)) == 0
Beispiel #16
0
def test_slice_works_with_negative_indices():
    res = slice(mtcars, ~f[:2])
    exp = mtcars.tail(-2)
    assert_frame_equal(res, exp)
Beispiel #17
0
def test_slice_handles_empty_df():
    df = tibble(x=[])
    res = df >> slice(f[:3])
    assert nrow(res) == 0
    assert names(res) == ["x"]
Beispiel #18
0
def test_wrong_indices():
    df = tibble(x=f[:3])
    with pytest.raises(TypeError):
        df >> slice("a")
Beispiel #19
0
def test_slice_works_fine_if_n_gt_nrow():
    by_slice = mtcars >> arrange(f.cyl) >> group_by(f.cyl)
    slice_res = by_slice >> slice(7)
    filter_res = by_slice >> filter(row_number() == 8)
    assert slice_res.equals(filter_res)
Beispiel #20
0
def test_slice_strips_grouped_indices():
    res = mtcars >> group_by(f.cyl) >> slice(1) >> mutate(mpgplus=f.mpg + 1)
    assert nrow(res) == 3
    assert group_rows(res) == [[0], [1], [2]]
Beispiel #21
0
def test_empty_slice_returns_input():
    df = tibble(x=[1, 2, 3])
    assert slice(df).equals(df)
Beispiel #22
0
def test_slice_works_with_0col_dfs():
    out = tibble(a=[1, 2, 3]) >> select(~f.a) >> slice(1) >> nrow()
    assert out == 1
Beispiel #23
0
def test_slice_correctly_computes_positive_indices_from_negative_indices():
    x = tibble(y=range(1, 11))
    # negative in dplyr meaning exclusive
    assert slice(x, ~f[9:30]).equals(tibble(y=range(1, 10)))
Beispiel #24
0
def test_slice_does_not_evaluate_the_expression_in_empty_groups():
    res = mtcars >> group_by(f.cyl) >> filter(f.cyl == 6) >> slice(f[:2])
    assert nrow(res) == 2