def test_order(): out = order(c(3, 1, 2)) assert_iterable_equal(out, [1, 2, 0]) x = Series([5, 2, 3, 4]) out = order(x) assert_iterable_equal(out, [1, 2, 3, 0]) x = Series([1, 2, 3, 4]).groupby([1, 1, 2, 2]) out = order(x) assert_iterable_equal(out.obj, [0, 1, 0, 1])
def test_unique(): a = [1, 2, 2, 3] assert_iterable_equal(unique(a), [1, 2, 3]) assert unique(3) == 3 x = Series([1, 1, 2, 2, 2, 1]) out = unique(x) assert_iterable_equal(out, [1, 2]) x = Series([1, 1, 2, 2, 2, 1]).groupby([1, 1, 1, 2, 2, 2]) out = unique(x) assert_iterable_equal(out, [1, 2, 2, 1]) assert_iterable_equal(out.index, [1, 1, 2, 2])
def test_transform_default(): @func_factory("transform", "x") def double(x): return x * 2 # scalar out = double(3) assert out[0] == 6 out = double(np.array([1, 2], dtype=int)) assert_iterable_equal(out, [2, 4]) @func_factory("transform", "x") def double(x): return x * 2 out = double([1, 2]) assert_iterable_equal(out, [2, 4]) # default on series x = Series([2, 3], index=["a", "b"]) out = double(x) assert isinstance(out, Series) assert_iterable_equal(out.index, ["a", "b"]) assert_iterable_equal(out, [4, 6]) # default on dataframe x = DataFrame({"a": [3, 4]}) out = double(x) assert isinstance(out, DataFrame) assert_iterable_equal(out.a, [6, 8]) # default on seriesgroupby x = Series([1, 2, 1, 2]).groupby([1, 1, 2, 2]) out = double(x) assert isinstance(out, SeriesGroupBy) assert_iterable_equal(out.obj, [2, 4, 2, 4]) assert out.grouper.ngroups == 2 # on tibble grouped x = tibble(x=[1, 2, 1, 2], g=[1, 1, 2, 2]).group_by("g") out = double(x) # grouping variables not included assert_iterable_equal(out.x.obj, [2, 4, 2, 4]) x = tibble(x=[1, 2, 1, 2], g=[1, 1, 2, 2]).rowwise("g") out = double(x) assert isinstance(out, TibbleRowwise) assert_frame_equal(out, out._datar["grouped"].obj) assert_iterable_equal(out.x.obj, [2, 4, 2, 4]) assert_iterable_equal(out.group_vars, ["g"])
def test_sum(caplog): assert sum(1) == 1 assert sum([1, 2]) == 3 assert_iterable_equal([sum([1, 2, NA], na_rm=False)], [NA]) assert sum([1, 2, NA], na_rm=True) == 3 Series assert sum(Series([1, 2])) == 3 # Series GroupBy out = sum(Series([1, 2, 3, 4]).groupby([1, 1, 2, 2])) assert_series_equal(out, Series([3, 7], index=[1, 2], name="x")) caplog.clear() out = sum(Series([1, 2, 3, 4]).groupby([1, 1, 2, 2]), na_rm=False) assert "always True" in caplog.text
def test_rev(): assert_iterable_equal(rev(3), [3]) assert_iterable_equal(rev([1, 2]), [2, 1]) a = np.array([1, 2], dtype=float) out = rev(a) assert_iterable_equal(out, [2.0, 1.0]) assert out.dtype == float x = Series([1, 2, 3]) out = rev(x) assert_iterable_equal(out, [3, 2, 1]) assert_iterable_equal(out.index, [0, 1, 2]) x = Series([1, 2, 3]).groupby([1, 1, 2]) out = rev(x) assert_iterable_equal(out.obj, [2, 1, 3])
def test_slice_works_with_grouped_data(): g = mtcars >> arrange(f.cyl) >> group_by(f.cyl) res = slice(g, f[:2]) exp = filter(g, row_number() < 3) assert_frame_equal(res, exp) res = slice(g, ~f[:2]) exp = filter(g, row_number() >= 3) assert_tibble_equal(res, exp) g = group_by(tibble(x=c(1, 1, 2, 2, 2)), f.x) # out = group_keys(slice(g, 3, _preserve=True)) # assert out.x.tolist() == [1, 2] out = group_keys(slice(g, 2, _preserve=False)) assert out.x.tolist() == [2] gf = tibble(x=f[1:4]) >> group_by( g=Categorical([1, 1, 2], categories=[1, 2, 3]), _drop=False, ) with pytest.raises(TypeError): gf >> slice("a") with pytest.raises(ValueError): gf >> slice(~f[:2], 1) out = gf >> slice(0) assert out.shape[0] == 2 out = gf >> slice( Series([1, 0, 0]).groupby(gf._datar["grouped"].grouper.result_index)) assert_iterable_equal(out.x.obj, [2, 3])
def test_compound_ungroup(): assert ungroup(1) == 1 g = Series([1, 2, 3]).groupby([1, 1, 2]) assert ungroup(g) is g.obj with pytest.raises(ValueError): ungroup(g, "abc") df = tibble(x=1, y=2) >> group_by(f.x, f.y) out = ungroup(df) assert group_vars(out) == [] out = ungroup(df, f.x) assert group_vars(out) == ["y"] out = ungroup(df, f.y) assert group_vars(out) == ["x"] out = group_by(df, f.y, _add=True) assert group_vars(out) == ["x", "y"] rf = df >> rowwise() with pytest.raises(ValueError): ungroup(rf, f.x) with pytest.raises(KeyError): group_by(df, f.w)
def test_fct_inorder(): f = factor(c("c", "a", "a", "b"), c("a", "b", "c")) f1 = fct_inorder(f) f2 = factor(c("c", "a", "a", "b"), c("c", "a", "b")) assert_iterable_equal(f1, f2) assert_iterable_equal(levels(f1), levels(f2)) s = Series(f) s1 = fct_inorder(s) assert_iterable_equal(s1, f2) assert_iterable_equal(levels(s1), levels(f2)) sgb = s.groupby([1, 1, 2, 2]) s2 = fct_inorder(sgb) assert_iterable_equal(s2.obj, f2) assert_iterable_equal(levels(s2.obj), levels(f2))
def test_c(): assert_iterable_equal(c(1, 2, 3), [1, 2, 3]) assert_iterable_equal(c(1, 2, 3, 4), [1, 2, 3, 4]) assert_iterable_equal(c(1, c(2, 3), 4, 5), [1, 2, 3, 4, 5]) x = Series([1, 2, 3, 4]).groupby([1, 1, 2, 2]) out = c(7, [8, 9], x) assert_iterable_equal(out.obj, [7, 8, 9, 1, 2, 7, 8, 9, 3, 4])
def test_agg(): men = func_factory( "agg", "a", name="men", func=np.mean, signature=inspect.signature(lambda a: None), ) x = [1, 2, 3] out = men(x) assert out == 2.0 x = Series([1, 2, 3]) out = men(x) assert out == 2.0 # SeriesGroupBy men.register(SeriesGroupBy, func="mean") x = Series([1, 2, 4]).groupby([1, 2, 2]) out = men(x) assert_iterable_equal(out.index, [1, 2]) assert_iterable_equal(out, [1.0, 3.0]) # SeriesRowwise df = tibble(x=[1, 2, 4]).rowwise() out = men(df.x) assert_iterable_equal(out, df.x.obj) men.register(SeriesRowwise, func="sum") out = men(df.x) assert_iterable_equal(out.index, [0, 1, 2]) assert_iterable_equal(out, [1.0, 2.0, 4.0]) # TibbleRowwise x = tibble(a=[1, 2, 3], b=[4, 5, 6]).rowwise() out = men(x) assert_iterable_equal(out, [2.5, 3.5, 4.5]) # TibbleGrouped x = tibble(a=[1, 2, 3], b=[4, 5, 5]).group_by("b") out = men(x) assert_iterable_equal(out.a, [1.0, 2.5])
def test_scalar_true_false_are_vectorized(): x = c(True, True, False, False) out = if_else(x, 1, 2) assert list(out) == [1, 1, 2, 2] # Series x = Series(c(True, True, False, False)) out = if_else(x, 1, 2) assert isinstance(out, Series) assert list(out) == [1, 1, 2, 2]
def test_args_kwargs_works_correctly(): # test_that(".dot argument works correctly (PR #2110)", { x1 = letters[:3] x2 = [1, 2, 3] x3 = factor(x1) out = recode(x1, a="apple", b="banana", _default=None) exp = recode(x1, _default=None, **{"a": "apple", "b": "banana"}) assert_iterable_equal(out, exp) out = recode(x1, a="apple", b="banana", _default=None) exp = recode(x1, a="apple", _default=None, **{"b": "banana"}) assert_iterable_equal(out, exp) out = recode(x2, **{"1": 4, "2": 5}, _default=NA_integer_) exp = recode(x2, _default=NA_integer_, **{"1": 4, "2": 5}) assert_iterable_equal(out, exp) out = recode(x2, **{"1": 4, "2": 5}, _default=NA_integer_) exp = recode(x2, {"1": 4}, _default=NA_integer_, **{"2": 5}) assert_iterable_equal(out, exp) out = recode(Series(x2), **{"1": 4, "2": 5}, _default=NA_integer_) exp = recode(x2, {"1": 4}, _default=NA_integer_, **{"2": 5}) assert_iterable_equal(out, exp) out = recode_factor(x3, a="apple", b="banana", _default=NA_character_) exp = recode_factor(x3, _default=NA_character_, **{ "a": "apple", "b": "banana" }) assert_iterable_equal(out, exp) out = recode_factor(Series(x3), a="apple", b="banana", _default=NA_character_) assert_iterable_equal(out, exp)
def test_errors(): x = Series(1, name="x") df = tibble(x, x, _name_repair="minimal") with pytest.raises(NameNonUniqueError): df >> arrange(f.x) df = tibble(x=x) with pytest.raises(KeyError): df >> arrange(f.y) with pytest.raises(ValueError, match="Length of values"): df >> arrange(rep(f.x, 2))
def test_diff(): x = cumsum(cumsum(seq(1, 10))) assert_iterable_equal(diff(x, lag=2), x[2:] - x[:-2]) assert_iterable_equal(diff(x, lag=2), seq(3, 10)**2) assert_iterable_equal(diff(diff(x)), diff(x, differences=2)) assert_iterable_equal(diff(x, differences=40), []) x = Series([1, 2, 3, 4, 5]).groupby([1, 2, 2, 3, 3]) out = diff(x) assert_iterable_equal(out.obj, [1, 1]) assert out.grouper.ngroups == 3
def test_transform_register(): @func_factory(kind="transform", data_args="x") def double(x): return x * 2 @double.register(DataFrame) def _(x): return x * 3 x = Series([2, 3]) out = double(x) assert_iterable_equal(out, [4, 6]) double.register(Series, lambda x: x * 4) out = double(x) assert_iterable_equal(out, [8, 12]) x = tibble(a=[1, 3]) out = double(x) assert_iterable_equal(out.a, [3, 9]) out = double([1, 4]) assert_iterable_equal(out, [4, 16]) # register an available string func for tranform double.register(SeriesGroupBy, "sum") x = Series([1, -2]).groupby([1, 2]) out = double(x) assert_iterable_equal(out.obj, [1, -2]) # seriesrowwise double.register(SeriesRowwise, lambda x: x + 1) x.is_rowwise = True out = double(x) assert_iterable_equal(out.obj, [2, -1]) assert out.is_rowwise
def test_removes_vars_with_None(): df = tibble(x=range(1, 4), y=range(1, 4)) gf = group_by(df, f.x) out = df >> mutate(y=None) assert out.columns.tolist() == ["x"] out = gf >> mutate(y=None) assert out.columns.tolist() == ["x"] assert isinstance(out, TibbleGrouped) assert group_vars(out) == ["x"] assert group_rows(out) == [[0], [1], [2]] # even if it doesn't exist out = df >> mutate(z=None) assert out.equals(df) z = Series(1, name="z") out = df >> mutate(z, z=None) assert out.equals(df) df = tibble(x=1, y=1) out = mutate(df, z=1, x=None, y=None) assert out.equals(tibble(z=1))
def test_droplevels(): fct = Series(factor([1, 2, 3], levels=[1, 2, 3, 4])) out = droplevels(fct) assert_iterable_equal(fct, out) assert_iterable_equal(levels(out), [1, 2, 3])
def grepl(a, b): return Series([x in y for x, y in zip(a.obj, b.obj)], index=a.obj.index)
def test_seq_len_sgb(): x = Series([1, 2, 3, 4]).groupby([1, 1, 2, 2]) out = seq_len(x) assert_iterable_equal(out, [1, 1, 2, 3])
def test_transform_hooks(): @func_factory(kind="transform", data_args="x") def times(x, t): return x * t with pytest.raises(ValueError): times.register(Series, meta=False, pre=1, func=None) times.register( Series, func=None, pre=lambda x, t: (x, (-t, ), {}), post=lambda out, x, t: out + t, ) x = Series([1, 2]) out = times(x, -1) assert_iterable_equal(out, [2, 3]) @times.register(Series, meta=False) def _(x, t): return x + t out = times(x, 10) assert_iterable_equal(out, [11, 12]) @times.register(SeriesGroupBy, meta=True) def _(x, t): return x + 10 x = Series([1, 2, 1, 2]).groupby([1, 1, 2, 2]) out = times(x, 1) assert_iterable_equal(out.obj, [11, 12, 11, 12]) times.register( SeriesGroupBy, func=None, pre=lambda x, t: (x, (t + 1, ), {}), post=lambda out, x, *args, **kwargs: out, ) out = times(x, 1) assert_iterable_equal(out, [2, 4, 2, 4]) times.register( Series, func=None, pre=lambda *args, **kwargs: None, post=lambda out, x, t: out + t, ) x = Series([1, 2]) out = times(x, 3) assert_iterable_equal(out, [4, 5]) @times.register(DataFrame, meta=True) def _(x, t): return x**t x = tibble(a=[1, 2], b=[2, 3]) out = times(x, 3) assert_iterable_equal(out.a, [1, 8]) assert_iterable_equal(out.b, [8, 27]) # TibbleGrouped times.register( TibbleGrouped, func=None, pre=lambda x, t: (x, (t - 1, ), {}), post=lambda out, x, t: out.reindex([1, 0]), ) x = x.group_by("a") out = times(x, 3) assert_iterable_equal(out.b, [6, 4]) @times.register( TibbleGrouped, meta=False, ) def _(x, t): out = x.transform(lambda d, t: d * t, 0, t - 1) out.iloc[0, 1] = 10 return out # x = tibble(a=[1, 2], b=[2, 3]) # grouped by a out = times(x, 3) assert isinstance(out, TibbleGrouped) assert_iterable_equal(out.group_vars, ["a"]) assert_iterable_equal(out.b.obj, [10, 6])
def test_factor_sgb(): x = Series([1, 2, 3]).groupby([1, 1, 3]) out = factor(x) assert isinstance(out, SeriesGroupBy) assert_factor_equal(out.obj.values, factor([1, 2, 3]))