Exemple #1
0
def test_order():
    out = order(c(3, 1, 2))
    assert_iterable_equal(out, [1, 2, 0])

    x = Series([5, 2, 3, 4])
    out = order(x)
    assert_iterable_equal(out, [1, 2, 3, 0])

    x = Series([1, 2, 3, 4]).groupby([1, 1, 2, 2])
    out = order(x)
    assert_iterable_equal(out.obj, [0, 1, 0, 1])
Exemple #2
0
def test_unique():
    a = [1, 2, 2, 3]
    assert_iterable_equal(unique(a), [1, 2, 3])
    assert unique(3) == 3

    x = Series([1, 1, 2, 2, 2, 1])
    out = unique(x)
    assert_iterable_equal(out, [1, 2])

    x = Series([1, 1, 2, 2, 2, 1]).groupby([1, 1, 1, 2, 2, 2])
    out = unique(x)
    assert_iterable_equal(out, [1, 2, 2, 1])
    assert_iterable_equal(out.index, [1, 1, 2, 2])
Exemple #3
0
def test_transform_default():
    @func_factory("transform", "x")
    def double(x):
        return x * 2

    # scalar
    out = double(3)
    assert out[0] == 6

    out = double(np.array([1, 2], dtype=int))
    assert_iterable_equal(out, [2, 4])

    @func_factory("transform", "x")
    def double(x):
        return x * 2

    out = double([1, 2])
    assert_iterable_equal(out, [2, 4])

    # default on series
    x = Series([2, 3], index=["a", "b"])
    out = double(x)
    assert isinstance(out, Series)
    assert_iterable_equal(out.index, ["a", "b"])
    assert_iterable_equal(out, [4, 6])

    # default on dataframe
    x = DataFrame({"a": [3, 4]})
    out = double(x)
    assert isinstance(out, DataFrame)
    assert_iterable_equal(out.a, [6, 8])

    # default on seriesgroupby
    x = Series([1, 2, 1, 2]).groupby([1, 1, 2, 2])
    out = double(x)
    assert isinstance(out, SeriesGroupBy)
    assert_iterable_equal(out.obj, [2, 4, 2, 4])
    assert out.grouper.ngroups == 2

    # on tibble grouped
    x = tibble(x=[1, 2, 1, 2], g=[1, 1, 2, 2]).group_by("g")
    out = double(x)
    # grouping variables not included
    assert_iterable_equal(out.x.obj, [2, 4, 2, 4])

    x = tibble(x=[1, 2, 1, 2], g=[1, 1, 2, 2]).rowwise("g")
    out = double(x)
    assert isinstance(out, TibbleRowwise)
    assert_frame_equal(out, out._datar["grouped"].obj)
    assert_iterable_equal(out.x.obj, [2, 4, 2, 4])
    assert_iterable_equal(out.group_vars, ["g"])
Exemple #4
0
def test_sum(caplog):
    assert sum(1) == 1
    assert sum([1, 2]) == 3
    assert_iterable_equal([sum([1, 2, NA], na_rm=False)], [NA])
    assert sum([1, 2, NA], na_rm=True) == 3
    Series
    assert sum(Series([1, 2])) == 3
    # Series GroupBy
    out = sum(Series([1, 2, 3, 4]).groupby([1, 1, 2, 2]))
    assert_series_equal(out, Series([3, 7], index=[1, 2], name="x"))

    caplog.clear()
    out = sum(Series([1, 2, 3, 4]).groupby([1, 1, 2, 2]), na_rm=False)
    assert "always True" in caplog.text
Exemple #5
0
def test_rev():
    assert_iterable_equal(rev(3), [3])
    assert_iterable_equal(rev([1, 2]), [2, 1])
    a = np.array([1, 2], dtype=float)
    out = rev(a)
    assert_iterable_equal(out, [2.0, 1.0])
    assert out.dtype == float

    x = Series([1, 2, 3])
    out = rev(x)
    assert_iterable_equal(out, [3, 2, 1])
    assert_iterable_equal(out.index, [0, 1, 2])

    x = Series([1, 2, 3]).groupby([1, 1, 2])
    out = rev(x)
    assert_iterable_equal(out.obj, [2, 1, 3])
Exemple #6
0
def test_slice_works_with_grouped_data():
    g = mtcars >> arrange(f.cyl) >> group_by(f.cyl)

    res = slice(g, f[:2])
    exp = filter(g, row_number() < 3)
    assert_frame_equal(res, exp)

    res = slice(g, ~f[:2])
    exp = filter(g, row_number() >= 3)
    assert_tibble_equal(res, exp)

    g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x)
    # out = group_keys(slice(g, 3, _preserve=True))
    # assert out.x.tolist() == [1, 2]
    out = group_keys(slice(g, 2, _preserve=False))
    assert out.x.tolist() == [2]

    gf = tibble(x=f[1:4]) >> group_by(
        g=Categorical([1, 1, 2], categories=[1, 2, 3]),
        _drop=False,
    )
    with pytest.raises(TypeError):
        gf >> slice("a")
    with pytest.raises(ValueError):
        gf >> slice(~f[:2], 1)

    out = gf >> slice(0)
    assert out.shape[0] == 2

    out = gf >> slice(
        Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index))
    assert_iterable_equal(out.x.obj, [2, 3])
Exemple #7
0
def test_compound_ungroup():
    assert ungroup(1) == 1
    g = Series([1, 2, 3]).groupby([1, 1, 2])
    assert ungroup(g) is g.obj

    with pytest.raises(ValueError):
        ungroup(g, "abc")

    df = tibble(x=1, y=2) >> group_by(f.x, f.y)
    out = ungroup(df)
    assert group_vars(out) == []

    out = ungroup(df, f.x)
    assert group_vars(out) == ["y"]

    out = ungroup(df, f.y)
    assert group_vars(out) == ["x"]

    out = group_by(df, f.y, _add=True)
    assert group_vars(out) == ["x", "y"]

    rf = df >> rowwise()
    with pytest.raises(ValueError):
        ungroup(rf, f.x)

    with pytest.raises(KeyError):
        group_by(df, f.w)
Exemple #8
0
def test_fct_inorder():
    f = factor(c("c", "a", "a", "b"), c("a", "b", "c"))
    f1 = fct_inorder(f)
    f2 = factor(c("c", "a", "a", "b"), c("c", "a", "b"))
    assert_iterable_equal(f1, f2)
    assert_iterable_equal(levels(f1), levels(f2))

    s = Series(f)
    s1 = fct_inorder(s)
    assert_iterable_equal(s1, f2)
    assert_iterable_equal(levels(s1), levels(f2))

    sgb = s.groupby([1, 1, 2, 2])
    s2 = fct_inorder(sgb)
    assert_iterable_equal(s2.obj, f2)
    assert_iterable_equal(levels(s2.obj), levels(f2))
Exemple #9
0
def test_c():
    assert_iterable_equal(c(1, 2, 3), [1, 2, 3])
    assert_iterable_equal(c(1, 2, 3, 4), [1, 2, 3, 4])
    assert_iterable_equal(c(1, c(2, 3), 4, 5), [1, 2, 3, 4, 5])

    x = Series([1, 2, 3, 4]).groupby([1, 1, 2, 2])
    out = c(7, [8, 9], x)
    assert_iterable_equal(out.obj, [7, 8, 9, 1, 2, 7, 8, 9, 3, 4])
Exemple #10
0
def test_agg():
    men = func_factory(
        "agg",
        "a",
        name="men",
        func=np.mean,
        signature=inspect.signature(lambda a: None),
    )

    x = [1, 2, 3]
    out = men(x)
    assert out == 2.0

    x = Series([1, 2, 3])
    out = men(x)
    assert out == 2.0

    # SeriesGroupBy
    men.register(SeriesGroupBy, func="mean")
    x = Series([1, 2, 4]).groupby([1, 2, 2])
    out = men(x)
    assert_iterable_equal(out.index, [1, 2])
    assert_iterable_equal(out, [1.0, 3.0])

    # SeriesRowwise
    df = tibble(x=[1, 2, 4]).rowwise()
    out = men(df.x)
    assert_iterable_equal(out, df.x.obj)

    men.register(SeriesRowwise, func="sum")
    out = men(df.x)
    assert_iterable_equal(out.index, [0, 1, 2])
    assert_iterable_equal(out, [1.0, 2.0, 4.0])

    # TibbleRowwise
    x = tibble(a=[1, 2, 3], b=[4, 5, 6]).rowwise()
    out = men(x)
    assert_iterable_equal(out, [2.5, 3.5, 4.5])

    # TibbleGrouped
    x = tibble(a=[1, 2, 3], b=[4, 5, 5]).group_by("b")
    out = men(x)
    assert_iterable_equal(out.a, [1.0, 2.5])
Exemple #11
0
def test_scalar_true_false_are_vectorized():
    x = c(True, True, False, False)
    out = if_else(x, 1, 2)
    assert list(out) == [1, 1, 2, 2]

    # Series
    x = Series(c(True, True, False, False))
    out = if_else(x, 1, 2)
    assert isinstance(out, Series)
    assert list(out) == [1, 1, 2, 2]
Exemple #12
0
def test_args_kwargs_works_correctly():
    # test_that(".dot argument works correctly (PR #2110)", {
    x1 = letters[:3]
    x2 = [1, 2, 3]
    x3 = factor(x1)

    out = recode(x1, a="apple", b="banana", _default=None)
    exp = recode(x1, _default=None, **{"a": "apple", "b": "banana"})
    assert_iterable_equal(out, exp)

    out = recode(x1, a="apple", b="banana", _default=None)
    exp = recode(x1, a="apple", _default=None, **{"b": "banana"})
    assert_iterable_equal(out, exp)

    out = recode(x2, **{"1": 4, "2": 5}, _default=NA_integer_)
    exp = recode(x2, _default=NA_integer_, **{"1": 4, "2": 5})
    assert_iterable_equal(out, exp)

    out = recode(x2, **{"1": 4, "2": 5}, _default=NA_integer_)
    exp = recode(x2, {"1": 4}, _default=NA_integer_, **{"2": 5})
    assert_iterable_equal(out, exp)

    out = recode(Series(x2), **{"1": 4, "2": 5}, _default=NA_integer_)
    exp = recode(x2, {"1": 4}, _default=NA_integer_, **{"2": 5})
    assert_iterable_equal(out, exp)

    out = recode_factor(x3, a="apple", b="banana", _default=NA_character_)
    exp = recode_factor(x3,
                        _default=NA_character_,
                        **{
                            "a": "apple",
                            "b": "banana"
                        })
    assert_iterable_equal(out, exp)

    out = recode_factor(Series(x3),
                        a="apple",
                        b="banana",
                        _default=NA_character_)
    assert_iterable_equal(out, exp)
Exemple #13
0
def test_errors():
    x = Series(1, name="x")
    df = tibble(x, x, _name_repair="minimal")

    with pytest.raises(NameNonUniqueError):
        df >> arrange(f.x)

    df = tibble(x=x)
    with pytest.raises(KeyError):
        df >> arrange(f.y)

    with pytest.raises(ValueError, match="Length of values"):
        df >> arrange(rep(f.x, 2))
Exemple #14
0
def test_diff():
    x = cumsum(cumsum(seq(1, 10)))
    assert_iterable_equal(diff(x, lag=2), x[2:] - x[:-2])
    assert_iterable_equal(diff(x, lag=2), seq(3, 10)**2)

    assert_iterable_equal(diff(diff(x)), diff(x, differences=2))

    assert_iterable_equal(diff(x, differences=40), [])

    x = Series([1, 2, 3, 4, 5]).groupby([1, 2, 2, 3, 3])
    out = diff(x)
    assert_iterable_equal(out.obj, [1, 1])
    assert out.grouper.ngroups == 3
Exemple #15
0
def test_transform_register():
    @func_factory(kind="transform", data_args="x")
    def double(x):
        return x * 2

    @double.register(DataFrame)
    def _(x):
        return x * 3

    x = Series([2, 3])
    out = double(x)
    assert_iterable_equal(out, [4, 6])

    double.register(Series, lambda x: x * 4)

    out = double(x)
    assert_iterable_equal(out, [8, 12])

    x = tibble(a=[1, 3])
    out = double(x)
    assert_iterable_equal(out.a, [3, 9])

    out = double([1, 4])
    assert_iterable_equal(out, [4, 16])

    # register an available string func for tranform
    double.register(SeriesGroupBy, "sum")
    x = Series([1, -2]).groupby([1, 2])
    out = double(x)
    assert_iterable_equal(out.obj, [1, -2])

    # seriesrowwise
    double.register(SeriesRowwise, lambda x: x + 1)
    x.is_rowwise = True
    out = double(x)
    assert_iterable_equal(out.obj, [2, -1])
    assert out.is_rowwise
Exemple #16
0
def test_removes_vars_with_None():
    df = tibble(x=range(1, 4), y=range(1, 4))
    gf = group_by(df, f.x)

    out = df >> mutate(y=None)
    assert out.columns.tolist() == ["x"]

    out = gf >> mutate(y=None)
    assert out.columns.tolist() == ["x"]
    assert isinstance(out, TibbleGrouped)
    assert group_vars(out) == ["x"]
    assert group_rows(out) == [[0], [1], [2]]

    # even if it doesn't exist
    out = df >> mutate(z=None)
    assert out.equals(df)

    z = Series(1, name="z")
    out = df >> mutate(z, z=None)
    assert out.equals(df)

    df = tibble(x=1, y=1)
    out = mutate(df, z=1, x=None, y=None)
    assert out.equals(tibble(z=1))
Exemple #17
0
def test_droplevels():
    fct = Series(factor([1, 2, 3], levels=[1, 2, 3, 4]))
    out = droplevels(fct)
    assert_iterable_equal(fct, out)
    assert_iterable_equal(levels(out), [1, 2, 3])
Exemple #18
0
 def grepl(a, b):
     return Series([x in y for x, y in zip(a.obj, b.obj)],
                   index=a.obj.index)
Exemple #19
0
def test_seq_len_sgb():
    x = Series([1, 2, 3, 4]).groupby([1, 1, 2, 2])
    out = seq_len(x)
    assert_iterable_equal(out, [1, 1, 2, 3])
Exemple #20
0
def test_transform_hooks():
    @func_factory(kind="transform", data_args="x")
    def times(x, t):
        return x * t

    with pytest.raises(ValueError):
        times.register(Series, meta=False, pre=1, func=None)

    times.register(
        Series,
        func=None,
        pre=lambda x, t: (x, (-t, ), {}),
        post=lambda out, x, t: out + t,
    )

    x = Series([1, 2])
    out = times(x, -1)
    assert_iterable_equal(out, [2, 3])

    @times.register(Series, meta=False)
    def _(x, t):
        return x + t

    out = times(x, 10)
    assert_iterable_equal(out, [11, 12])

    @times.register(SeriesGroupBy, meta=True)
    def _(x, t):
        return x + 10

    x = Series([1, 2, 1, 2]).groupby([1, 1, 2, 2])
    out = times(x, 1)
    assert_iterable_equal(out.obj, [11, 12, 11, 12])

    times.register(
        SeriesGroupBy,
        func=None,
        pre=lambda x, t: (x, (t + 1, ), {}),
        post=lambda out, x, *args, **kwargs: out,
    )
    out = times(x, 1)
    assert_iterable_equal(out, [2, 4, 2, 4])

    times.register(
        Series,
        func=None,
        pre=lambda *args, **kwargs: None,
        post=lambda out, x, t: out + t,
    )
    x = Series([1, 2])
    out = times(x, 3)
    assert_iterable_equal(out, [4, 5])

    @times.register(DataFrame, meta=True)
    def _(x, t):
        return x**t

    x = tibble(a=[1, 2], b=[2, 3])
    out = times(x, 3)
    assert_iterable_equal(out.a, [1, 8])
    assert_iterable_equal(out.b, [8, 27])

    # TibbleGrouped
    times.register(
        TibbleGrouped,
        func=None,
        pre=lambda x, t: (x, (t - 1, ), {}),
        post=lambda out, x, t: out.reindex([1, 0]),
    )
    x = x.group_by("a")
    out = times(x, 3)
    assert_iterable_equal(out.b, [6, 4])

    @times.register(
        TibbleGrouped,
        meta=False,
    )
    def _(x, t):
        out = x.transform(lambda d, t: d * t, 0, t - 1)
        out.iloc[0, 1] = 10
        return out

    # x = tibble(a=[1, 2], b=[2, 3])  # grouped by a
    out = times(x, 3)
    assert isinstance(out, TibbleGrouped)
    assert_iterable_equal(out.group_vars, ["a"])
    assert_iterable_equal(out.b.obj, [10, 6])
Exemple #21
0
def test_factor_sgb():
    x = Series([1, 2, 3]).groupby([1, 1, 3])
    out = factor(x)
    assert isinstance(out, SeriesGroupBy)
    assert_factor_equal(out.obj.values, factor([1, 2, 3]))