Python filterの例、datar.dplyr.filter Pythonの例

コード例 #1

0

ファイルを表示

def test_handles_simple_symbols():
    df = tibble(x=range(1, 5), test=rep(c(True, False), each=2))
    res = filter(df, f.test)

    gdf = group_by(df, f.x)
    res = filter(gdf, f.test)

    def h(data):
        test2 = c(True, True, False, False)
        return filter(data, test2)

    out = h(df)
    assert out.equals(df.iloc[:2, :])

    def ff(data, *args):
        one = 1
        return filter(data, f.test, f.x > one, *args)

    def g(data, *args):
        four = 4
        return ff(data, f.x < four, *args)

    res = g(df)
    assert res.x.tolist() == [2]
    assert res.test.tolist() == [True]

    res = g(gdf)
    assert res.x.obj.tolist() == [2]
    assert res.test.obj.tolist() == [True]

コード例 #2

0

ファイルを表示

def test_complex_vec():
    d = tibble(x=range(1, 11), y=[i + 2j for i in range(1, 11)])
    out = d >> filter(f.x < 4)
    assert out.y.tolist() == [i + 2j for i in range(1, 4)]

    out = d >> filter(re(f.y) < 4)
    assert out.y.tolist() == [i + 2j for i in range(1, 4)]

コード例 #3

0

ファイルを表示

def test_slice_works_with_grouped_data():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)

    res = slice(g, f[:2])
    exp = filter(g, row_number() < 3)
    assert_frame_equal(res, exp)

    res = slice(g, ~f[:2])
    exp = filter(g, row_number() >= 3)
    assert_tibble_equal(res, exp)

    g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x)
    # out = group_keys(slice(g, 3, _preserve=True))
    # assert out.x.tolist() == [1, 2]
    out = group_keys(slice(g, 2, _preserve=False))
    assert out.x.tolist() == [2]

    gf = tibble(x=f[1:4]) >> group_by(
        g=Categorical([1, 1, 2], categories=[1, 2, 3]),
        _drop=False,
    )
    with pytest.raises(TypeError):
        gf >> slice("a")
    with pytest.raises(ValueError):
        gf >> slice(~f[:2], 1)

    out = gf >> slice(0)
    assert out.shape[0] == 2

    out = gf >> slice(
        Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index))
    assert_iterable_equal(out.x.obj, [2, 3])

コード例 #4

0

ファイルを表示

def test_if_any_if_all_single_arg():
    df = tibble(x=[True, False], y=[True, True])
    out = df >> filter(if_any(c(f.x, f.y)))
    assert_frame_equal(out, df)

    out = df >> filter(if_all(c(f.x, f.y)))
    assert_frame_equal(out, df.iloc[[0], :])

コード例 #5

0

ファイルを表示

def test_handles_tuple_columns():
    res = (tibble(a=[1, 2], x=[tuple(range(1, 11)),
                               tuple(range(1, 6))]) >> filter(f.a == 1))
    assert res.x.tolist() == [tuple(range(1, 11))]

    res = (tibble(a=[1, 2], x=[tuple(range(
        1, 11)), tuple(range(1, 6))]) >> group_by(f.a) >> filter(f.a == 1))
    assert res.x.obj.tolist() == [tuple(range(1, 11))]

コード例 #6

0

ファイルを表示

def test_handles_named_logical():
    tbl = tibble(a={'a': True})
    out = tbl >> filter(f.a)
    assert out.equals(tbl)

    tbl = tibble(a={'a': False})
    out = tbl >> filter(f.a)
    assert out.shape[0] == 0

コード例 #7

0

ファイルを表示

def test_preserves_grouping():
    gf = tibble(g=[1, 1, 1, 2, 2], x=[1, 2, 3, 4, 5]) >> group_by(f.g)
    out = gf >> filter(is_element(f.x, [3, 4]))
    assert group_vars(out) == ["g"]
    assert group_rows(out) == [[0], [1]]

    out = gf >> filter(f.x < 3)
    assert group_vars(out) == ["g"]
    assert group_rows(out) == [[0, 1]]

コード例 #8

0

ファイルを表示

def test_works_with_if_any_if_all():
    df = tibble(x1=range(1, 11), x2=c(range(1, 6), 10, 9, 8, 7, 6))
    df1 = df >> filter(if_all(starts_with("x"), lambda x: x > 6))
    df2 = df >> filter((f.x1 > 6) & (f.x2 > 6))
    assert df1.equals(df2)

    df1 = df >> filter(if_any(starts_with("x"), lambda x: x > 6))
    df2 = df >> filter((f.x1 > 6) | (f.x2 > 6))
    assert df1.equals(df2)

コード例 #9

0

ファイルを表示

def test_handles_scalar_results():
    df1 = mtcars >> filter(min(f.mpg) > 0)
    assert df1.equals(mtcars)

    df2 = (mtcars >> group_by(f.cyl) >> filter(min(f.mpg) > 0) >> arrange(
        f.cyl, f.mpg))
    # See TibbleGrouped's Known issues
    df3 = mtcars >> group_by(f.cyl) >> arrange(f.cyl, f.mpg)
    assert_frame_equal(df2, df3)

コード例 #10

0

ファイルを表示

def test_slice_handles_numeric_input():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)
    res = g >> slice(0)
    assert nrow(res) == 3
    exp = g >> filter(row_number() == 1)
    assert_frame_equal(res, exp)

    res1 = mtcars >> slice(0) >> as_tibble()
    res2 = mtcars >> filter(row_number() == 1)
    assert_frame_equal(res1, res2)

コード例 #11

0

ファイルを表示

def test_filter_false_handles_indices(caplog):

    out = mtcars >> group_by(f.cyl) >> filter(False, _preserve=True)
    assert "support" in caplog.text
    # out = group_rows(out)
    # assert out == [[], [], []]

    out = mtcars >> group_by(f.cyl) >> filter(False, _preserve=False)
    out = group_rows(out)
    assert out == []

コード例 #12

0

ファイルを表示

def test_contains():
    df = tibble(a=c("a", "b", "ab"), g=c(1, 1, 2))

    res = df >> filter(is_element(f.a, letters))
    rows = nrow(res)
    assert rows == 2

    res = df >> group_by(f.g) >> filter(is_element(f.a, letters))
    rows = nrow(res)
    assert rows == 2

コード例 #13

0

ファイルを表示

def test_if_any_all_na_handling():
    df = expandgrid(x=c(True, False, NA), y=c(True, False, NA))

    out = df >> filter(if_all(c(f.x, f.y), identity))
    expect = df >> filter(f.x & f.y)
    assert_frame_equal(out, expect)

    out = df >> filter(if_any(c(f.x, f.y), identity))
    expect = df >> filter(f.x | f.y)
    assert_frame_equal(out, expect)

コード例 #14

0

ファイルを表示

ファイル: test_group_by.py プロジェクト: pwwang/datar

def test_remember_drop_True():
    res = iris >> group_by(f.Species, _drop=True)
    assert group_by_drop_default(res)

    res2 = res >> filter(f.Sepal_Length > 5)
    assert group_by_drop_default(res2)

    res3 = res >> filter(f.Sepal_Length > 5, _preserve=False)
    assert group_by_drop_default(res3)

    res4 = res3 >> group_by(f.Species)
    assert group_by_drop_default(res4)

コード例 #15

0

ファイルを表示

def test_if_any_all_enforce_bool():
    d = tibble(x=10, y=10)
    out = d >> filter(if_all(f[f.x:f.y], identity))
    assert_frame_equal(out, d)

    out = d >> filter(if_any(f[f.x:f.y], identity))
    assert_frame_equal(out, d)

    out = d >> mutate(ok=if_all(f[f.x:f.y], identity))
    assert_frame_equal(out, mutate(d, ok=True))

    out = d >> mutate(ok=if_any(f[f.x:f.y], identity))
    assert_frame_equal(out, mutate(d, ok=True))

コード例 #16

0

ファイルを表示

def test_group_map_respects_empty_groups():
    res = group_by(mtcars, f.cyl) >> group_map(lambda df: head(df, 2))
    assert len(list(res)) == 3

    res = (iris >> group_by(f.Species) >> filter(f.Species == "setosa") >>
           group_map(tally))
    assert len(list(res)) == 1

    res = (iris >> group_by(f.Species, _drop=False) >>
           filter(f.Species == "setosa") >> group_map.list(tally))
    # filter unable to keep the structure
    # assert len(res) == 3
    assert len(res) == 1

コード例 #17

0

ファイルを表示

def test_handles_df_cols():
    df = tibble(x=[1, 2], z=tibble(A=[1, 2], B=[3, 4]))
    expect = df >> slice(0)

    out = df >> filter(f.x == 1)
    assert out.equals(expect)
    out = df >> filter(f["z$A"] == 1)
    assert out.equals(expect)

    gdf = df >> group_by(f.x)

    out = gdf >> filter(f["z$A"] == 1)
    assert out.equals(expect)
    out = gdf >> filter(f["z$A"] == 1)
    assert out.equals(expect)

コード例 #18

0

ファイルを表示

def test_preserve_order_across_groups():
    df = tibble(g=c(1, 2, 1, 2, 1), time=[5, 4, 3, 2, 1], x=f.time)
    res1 = (df >> group_by(f.g) >> filter(f.x <= 4) >> ungroup() >> arrange(
        f.g, f.time))

    res2 = (df >> arrange(f.g) >> group_by(f.g) >> filter(f.x <= 4) >>
            ungroup() >> arrange(f.g, f.time))

    res3 = (df >> filter(f.x <= 4) >> group_by(f.g) >> ungroup() >> arrange(
        f.g, f.time))
    res1.reset_index(drop=True, inplace=True)
    res2.reset_index(drop=True, inplace=True)
    res3.reset_index(drop=True, inplace=True)
    assert res1.equals(res2)
    assert res1.equals(res3)

コード例 #19

0

ファイルを表示

def test_group_modify_makes_a_grouped_df():
    res = group_by(mtcars, f.cyl) >> group_modify(lambda df: head(df, 2))
    assert nrow(res) == 6
    assert group_rows(res) == [[0, 1], [2, 3], [4, 5]]

    res = (iris >> group_by(f.Species) >> filter(f.Species == "setosa") >>
           group_modify(lambda df: tally(df)))
    assert nrow(res) == 1
    assert group_rows(res) == [[0]]

    res = (iris >> group_by(f.Species, _drop=False) >>
           filter(f.Species == "setosa") >> group_modify(lambda df: tally(df)))
    # assert nrow(res) == 3
    assert nrow(res) == 1
    # assert group_rows(res) == [[0], [1], [2]]
    assert group_rows(res) == [[0]]

コード例 #20

0

ファイルを表示

ファイル: test_group_by.py プロジェクト: pwwang/datar

def test_rowwise_preserved_by_major_verbs():
    rf = rowwise(tibble(x=range(1, 6), y=range(5, 0, -1)), f.x)

    out = arrange(rf, f.y)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = filter(rf, f.x < 3)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = mutate(rf, x=f.x + 1)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = rename(rf, X=f.x)
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["X"]

    out = select(rf, "x")
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    out = slice(rf, c(0, 0))
    assert isinstance(out, TibbleRowwise)
    assert group_vars(out) == ["x"]

    # Except for summarise
    out = summarise(rf, z=mean(f.x, f.y))
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]

コード例 #21

0

ファイルを表示

ファイル: test_group_by.py プロジェクト: pwwang/datar

def test_zero_row_dfs():
    df = tibble(a=[], b=[], g=[])
    dfg = group_by(df, f.g, _drop=False)
    assert dfg.shape == (0, 3)
    assert group_vars(dfg) == ["g"]
    assert group_size(dfg) == []

    x = summarise(dfg, n=n())
    assert x.shape == (0, 2)
    assert group_vars(x) == []

    x = mutate(dfg, c=f.b + 1)
    assert x.shape == (0, 4)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = filter(dfg, f.a == 100)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = arrange(dfg, f.a, f.g)
    assert x.shape == (0, 3)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

    x = select(dfg, f.a)
    assert x.shape == (0, 2)
    assert group_vars(x) == ["g"]
    assert group_size(x) == []

コード例 #22

0

ファイルを表示

ファイル: test_group_by.py プロジェクト: pwwang/datar

def test_0_groups_filter():
    df = tibble(x=1).loc[[], :] >> group_by(f.x)
    res = df >> filter(f.x > 3)
    d1 = df >> dim()
    d2 = res >> dim()
    assert d1 == d2
    assert df.columns.tolist() == res.columns.tolist()

コード例 #23

0

ファイルを表示

ファイル: test_group_by.py プロジェクト: pwwang/datar

def test_drop():
    res = (
        iris
        >> filter(f.Species == "setosa")
        >> group_by(f.Species, _drop=True)
    )
    out = res >> count() >> nrow()
    assert out == 1

コード例 #24

0

ファイルを表示

def test_grouped_filter_handles_indices():
    res = iris >> group_by(f.Species) >> filter(f.Sepal_Length > 5)
    res2 = res >> mutate(Petal=f.Petal_Width * f.Petal_Length)

    assert nrow(res) == nrow(res2)
    grows1 = group_rows(res)
    grows2 = group_rows(res2)
    assert grows1 == grows2
    assert all(group_keys(res) == group_keys(res2))

コード例 #25

0

ファイルを表示

ファイル: test_group_by.py プロジェクト: pwwang/datar

def test_remember_drop_False():
    res = (
        iris
        >> filter(f.Species == "setosa")
        >> group_by(f.Species, _drop=False)
    )
    assert not group_by_drop_default(res)

    res2 = res >> group_by(f.Species)
    assert not group_by_drop_default(res2)

コード例 #26

0

ファイルを表示

def test_group_split_bind_rows_round_trip():
    iris["Species"] = iris["Species"].astype("category")
    setosa = iris >> filter(f.Species == "setosa")

    chunks = setosa >> group_split.list(f.Species)
    assert len(chunks) == 1
    assert bind_rows(chunks).equals(setosa)

    chunks = setosa >> group_split.list(f.Species, _drop=False)
    assert len(chunks) == 3
    assert_frame_equal(chunks[0], setosa)

コード例 #27

0

ファイルを表示

def test_use_env_var():
    # not a problem, since we use f.y
    df = tibble(x=1.0, y=2.4)
    y = "x"
    out = df >> summarise(across(all_of(y), mean))
    expect = tibble(x=1.0)
    assert out.equals(expect)

    out = df >> mutate(across(all_of(y), mean))
    assert out.equals(df)

    out = df >> filter(if_all(all_of(y), lambda col: col < 2))
    assert out.equals(df)

コード例 #28

0

ファイルを表示

def test_rowwise():
    @register_func(None)
    def grepl(a, b):
        return Series([x in y for x, y in zip(a.obj, b.obj)],
                      index=a.obj.index)

    df = tibble(
        First=c("string1", "string2"),
        Second=c("Sentence with string1", "something"),
    )
    res = df >> rowwise() >> filter(grepl(f.First, f.Second))
    assert nrow(res) == 1

    df1 = df >> slice(0)
    df2 = res >> ungroup()
    assert df1.equals(df2)

コード例 #29

0

ファイルを表示

def test_returns_input_with_no_args():
    df = filter(mtcars)
    assert df.equals(mtcars)

コード例 #30

0

ファイルを表示

def test_discards_na():
    temp = tibble(i=range(1, 6), x=c(NA, 1, 1, 0, 0))
    res = filter(temp, f.x == 1)
    rows = nrow(res)
    assert rows == 2