class TestDataFrameAggregate: def test_agg_transform(self, axis, float_frame): other_axis = 1 if axis in {0, "index"} else 0 with np.errstate(all="ignore"): f_abs = np.abs(float_frame) f_sqrt = np.sqrt(float_frame) # ufunc result = float_frame.transform(np.sqrt, axis=axis) expected = f_sqrt.copy() tm.assert_frame_equal(result, expected) result = float_frame.apply(np.sqrt, axis=axis) tm.assert_frame_equal(result, expected) result = float_frame.transform(np.sqrt, axis=axis) tm.assert_frame_equal(result, expected) # list-like result = float_frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() if axis in {0, "index"}: expected.columns = pd.MultiIndex.from_product( [float_frame.columns, ["sqrt"]]) else: expected.index = pd.MultiIndex.from_product( [float_frame.index, ["sqrt"]]) tm.assert_frame_equal(result, expected) result = float_frame.transform([np.sqrt], axis=axis) tm.assert_frame_equal(result, expected) # multiple items in list # these are in the order as if we are applying both # functions per series and then concatting result = float_frame.apply([np.abs, np.sqrt], axis=axis) expected = zip_frames([f_abs, f_sqrt], axis=other_axis) if axis in {0, "index"}: expected.columns = pd.MultiIndex.from_product( [float_frame.columns, ["absolute", "sqrt"]]) else: expected.index = pd.MultiIndex.from_product( [float_frame.index, ["absolute", "sqrt"]]) tm.assert_frame_equal(result, expected) result = float_frame.transform([np.abs, "sqrt"], axis=axis) tm.assert_frame_equal(result, expected) def test_transform_and_agg_err(self, axis, float_frame): # cannot both transform and agg with pytest.raises(ValueError): float_frame.transform(["max", "min"], axis=axis) with pytest.raises(ValueError): with np.errstate(all="ignore"): float_frame.agg(["max", "sqrt"], axis=axis) with pytest.raises(ValueError): with np.errstate(all="ignore"): float_frame.transform(["max", "sqrt"], axis=axis) df = pd.DataFrame({"A": range(5), "B": 5}) def f(): with np.errstate(all="ignore"): df.agg({"A": ["abs", "sum"], "B": ["mean", "max"]}, axis=axis) @pytest.mark.parametrize("method", ["abs", "shift", "pct_change", "cumsum", "rank"]) def test_transform_method_name(self, method): # GH 19760 df = pd.DataFrame({"A": [-1, 2]}) result = df.transform(method) expected = operator.methodcaller(method)(df) tm.assert_frame_equal(result, expected) def test_demo(self): # demonstration tests df = pd.DataFrame({"A": range(5), "B": 5}) result = df.agg(["min", "max"]) expected = DataFrame({ "A": [0, 4], "B": [5, 5] }, columns=["A", "B"], index=["min", "max"]) tm.assert_frame_equal(result, expected) result = df.agg({"A": ["min", "max"], "B": ["sum", "max"]}) expected = DataFrame( { "A": [4.0, 0.0, np.nan], "B": [5.0, np.nan, 25.0] }, columns=["A", "B"], index=["max", "min", "sum"], ) tm.assert_frame_equal(result.reindex_like(expected), expected) def test_agg_multiple_mixed_no_warning(self): # GH 20909 mdf = pd.DataFrame({ "A": [1, 2, 3], "B": [1.0, 2.0, 3.0], "C": ["foo", "bar", "baz"], "D": pd.date_range("20130101", periods=3), }) expected = pd.DataFrame( { "A": [1, 6], "B": [1.0, 6.0], "C": ["bar", "foobarbaz"], "D": [pd.Timestamp("2013-01-01"), pd.NaT], }, index=["min", "sum"], ) # sorted index with tm.assert_produces_warning(None): result = mdf.agg(["min", "sum"]) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(None): result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"]) # For backwards compatibility, the result's index is # still sorted by function name, so it's ['min', 'sum'] # not ['sum', 'min']. expected = expected[["D", "C", "B", "A"]] tm.assert_frame_equal(result, expected) def test_agg_dict_nested_renaming_depr(self): df = pd.DataFrame({"A": range(5), "B": 5}) # nested renaming with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): df.agg({"A": {"foo": "min"}, "B": {"bar": "max"}}) def test_agg_reduce(self, axis, float_frame): other_axis = 1 if axis in {0, "index"} else 0 name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() # all reducers expected = pd.concat( [ float_frame.mean(axis=axis), float_frame.max(axis=axis), float_frame.sum(axis=axis), ], axis=1, ) expected.columns = ["mean", "max", "sum"] expected = expected.T if axis in {0, "index"} else expected result = float_frame.agg(["mean", "max", "sum"], axis=axis) tm.assert_frame_equal(result, expected) # dict input with scalars func = OrderedDict([(name1, "mean"), (name2, "sum")]) result = float_frame.agg(func, axis=axis) expected = Series( [ float_frame.loc(other_axis)[name1].mean(), float_frame.loc(other_axis)[name2].sum(), ], index=[name1, name2], ) tm.assert_series_equal(result, expected) # dict input with lists func = OrderedDict([(name1, ["mean"]), (name2, ["sum"])]) result = float_frame.agg(func, axis=axis) expected = DataFrame({ name1: Series([float_frame.loc(other_axis)[name1].mean()], index=["mean"]), name2: Series([float_frame.loc(other_axis)[name2].sum()], index=["sum"]), }) expected = expected.T if axis in {1, "columns"} else expected tm.assert_frame_equal(result, expected) # dict input with lists with multiple func = OrderedDict([(name1, ["mean", "sum"]), (name2, ["sum", "max"])]) result = float_frame.agg(func, axis=axis) expected = DataFrame( OrderedDict([ ( name1, Series( [ float_frame.loc(other_axis)[name1].mean(), float_frame.loc(other_axis)[name1].sum(), ], index=["mean", "sum"], ), ), ( name2, Series( [ float_frame.loc(other_axis)[name2].sum(), float_frame.loc(other_axis)[name2].max(), ], index=["sum", "max"], ), ), ])) expected = expected.T if axis in {1, "columns"} else expected tm.assert_frame_equal(result, expected) def test_nuiscance_columns(self): # GH 15015 df = DataFrame({ "A": [1, 2, 3], "B": [1.0, 2.0, 3.0], "C": ["foo", "bar", "baz"], "D": pd.date_range("20130101", periods=3), }) result = df.agg("min") expected = Series( [1, 1.0, "bar", pd.Timestamp("20130101")], index=df.columns) tm.assert_series_equal(result, expected) result = df.agg(["min"]) expected = DataFrame( [[1, 1.0, "bar", pd.Timestamp("20130101")]], index=["min"], columns=df.columns, ) tm.assert_frame_equal(result, expected) result = df.agg("sum") expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"]) tm.assert_series_equal(result, expected) result = df.agg(["sum"]) expected = DataFrame([[6, 6.0, "foobarbaz"]], index=["sum"], columns=["A", "B", "C"]) tm.assert_frame_equal(result, expected) def test_non_callable_aggregates(self): # GH 16405 # 'size' is a property of frame/series # validate that this is working df = DataFrame({ "A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"] }) # Function aggregate result = df.agg({"A": "count"}) expected = Series({"A": 2}) tm.assert_series_equal(result, expected) # Non-function aggregate result = df.agg({"A": "size"}) expected = Series({"A": 3}) tm.assert_series_equal(result, expected) # Mix function and non-function aggs result1 = df.agg(["count", "size"]) result2 = df.agg({ "A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"] }) expected = pd.DataFrame({ "A": { "count": 2, "size": 3 }, "B": { "count": 2, "size": 3 }, "C": { "count": 2, "size": 3 }, }) tm.assert_frame_equal(result1, result2, check_like=True) tm.assert_frame_equal(result2, expected, check_like=True) # Just functional string arg is same as calling df.arg() result = df.agg("count") expected = df.count() tm.assert_series_equal(result, expected) # Just a string attribute arg same as calling df.arg result = df.agg("size") expected = df.size assert result == expected @pytest.mark.parametrize( "df, func, expected", chain( _get_cython_table_params( DataFrame(), [ ("sum", Series()), ("max", Series()), ("min", Series()), ("all", Series(dtype=bool)), ("any", Series(dtype=bool)), ("mean", Series()), ("prod", Series()), ("std", Series()), ("var", Series()), ("median", Series()), ], ), _get_cython_table_params( DataFrame([[np.nan, 1], [1, 2]]), [ ("sum", Series([1.0, 3])), ("max", Series([1.0, 2])), ("min", Series([1.0, 1])), ("all", Series([True, True])), ("any", Series([True, True])), ("mean", Series([1, 1.5])), ("prod", Series([1.0, 2])), ("std", Series([np.nan, 0.707107])), ("var", Series([np.nan, 0.5])), ("median", Series([1, 1.5])), ], ), ), ) def test_agg_cython_table(self, df, func, expected, axis): # GH 21224 # test reducing functions in # pandas.core.base.SelectionMixin._cython_table result = df.agg(func, axis=axis) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "df, func, expected", chain( _get_cython_table_params(DataFrame(), [("cumprod", DataFrame()), ("cumsum", DataFrame())]), _get_cython_table_params( DataFrame([[np.nan, 1], [1, 2]]), [ ("cumprod", DataFrame([[np.nan, 1], [1.0, 2.0]])), ("cumsum", DataFrame([[np.nan, 1], [1.0, 3.0]])), ], ), ), ) def test_agg_cython_table_transform(self, df, func, expected, axis): # GH 21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) result = df.agg(func, axis=axis) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "df, func, expected", _get_cython_table_params(DataFrame([["a", "b"], ["b", "a"]]), [["cumprod", TypeError]]), ) def test_agg_cython_table_raises(self, df, func, expected, axis): # GH 21224 with pytest.raises(expected): df.agg(func, axis=axis) @pytest.mark.parametrize("num_cols", [2, 3, 5]) def test_frequency_is_original(self, num_cols): # GH 22150 index = pd.DatetimeIndex(["1950-06-30", "1952-10-24", "1953-05-29"]) original = index.copy() df = DataFrame(1, index=index, columns=range(num_cols)) df.apply(lambda x: x) assert index.freq == original.freq def test_apply_datetime_tz_issue(self): # GH 29052 timestamps = [ pd.Timestamp("2019-03-15 12:34:31.909000+0000", tz="UTC"), pd.Timestamp("2019-03-15 12:34:34.359000+0000", tz="UTC"), pd.Timestamp("2019-03-15 12:34:34.660000+0000", tz="UTC"), ] df = DataFrame(data=[0, 1, 2], index=timestamps) result = df.apply(lambda x: x.name, axis=1) expected = pd.Series(index=timestamps, data=timestamps) tm.assert_series_equal(result, expected)
class TestDataFrameAggregate(): def test_agg_transform(self, axis, float_frame): other_axis = 1 if axis in {0, 'index'} else 0 with np.errstate(all='ignore'): f_abs = np.abs(float_frame) f_sqrt = np.sqrt(float_frame) # ufunc result = float_frame.transform(np.sqrt, axis=axis) expected = f_sqrt.copy() assert_frame_equal(result, expected) result = float_frame.apply(np.sqrt, axis=axis) assert_frame_equal(result, expected) result = float_frame.transform(np.sqrt, axis=axis) assert_frame_equal(result, expected) # list-like result = float_frame.apply([np.sqrt], axis=axis) expected = f_sqrt.copy() if axis in {0, 'index'}: expected.columns = pd.MultiIndex.from_product( [float_frame.columns, ['sqrt']]) else: expected.index = pd.MultiIndex.from_product( [float_frame.index, ['sqrt']]) assert_frame_equal(result, expected) result = float_frame.transform([np.sqrt], axis=axis) assert_frame_equal(result, expected) # multiple items in list # these are in the order as if we are applying both # functions per series and then concatting result = float_frame.apply([np.abs, np.sqrt], axis=axis) expected = zip_frames([f_abs, f_sqrt], axis=other_axis) if axis in {0, 'index'}: expected.columns = pd.MultiIndex.from_product( [float_frame.columns, ['absolute', 'sqrt']]) else: expected.index = pd.MultiIndex.from_product( [float_frame.index, ['absolute', 'sqrt']]) assert_frame_equal(result, expected) result = float_frame.transform([np.abs, 'sqrt'], axis=axis) assert_frame_equal(result, expected) def test_transform_and_agg_err(self, axis, float_frame): # cannot both transform and agg with pytest.raises(ValueError): float_frame.transform(['max', 'min'], axis=axis) with pytest.raises(ValueError): with np.errstate(all='ignore'): float_frame.agg(['max', 'sqrt'], axis=axis) with pytest.raises(ValueError): with np.errstate(all='ignore'): float_frame.transform(['max', 'sqrt'], axis=axis) df = pd.DataFrame({'A': range(5), 'B': 5}) def f(): with np.errstate(all='ignore'): df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']}, axis=axis) @pytest.mark.parametrize('method', [ 'abs', 'shift', 'pct_change', 'cumsum', 'rank', ]) def test_transform_method_name(self, method): # GH 19760 df = pd.DataFrame({"A": [-1, 2]}) result = df.transform(method) expected = operator.methodcaller(method)(df) tm.assert_frame_equal(result, expected) def test_demo(self): # demonstration tests df = pd.DataFrame({'A': range(5), 'B': 5}) result = df.agg(['min', 'max']) expected = DataFrame({ 'A': [0, 4], 'B': [5, 5] }, columns=['A', 'B'], index=['min', 'max']) tm.assert_frame_equal(result, expected) result = df.agg({'A': ['min', 'max'], 'B': ['sum', 'max']}) expected = DataFrame( { 'A': [4.0, 0.0, np.nan], 'B': [5.0, np.nan, 25.0] }, columns=['A', 'B'], index=['max', 'min', 'sum']) tm.assert_frame_equal(result.reindex_like(expected), expected) def test_agg_multiple_mixed_no_warning(self): # GH 20909 mdf = pd.DataFrame({ 'A': [1, 2, 3], 'B': [1., 2., 3.], 'C': ['foo', 'bar', 'baz'], 'D': pd.date_range('20130101', periods=3) }) expected = pd.DataFrame( { "A": [1, 6], 'B': [1.0, 6.0], "C": ['bar', 'foobarbaz'], "D": [pd.Timestamp('2013-01-01'), pd.NaT] }, index=['min', 'sum']) # sorted index with tm.assert_produces_warning(None): result = mdf.agg(['min', 'sum']) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(None): result = mdf[['D', 'C', 'B', 'A']].agg(['sum', 'min']) # For backwards compatibility, the result's index is # still sorted by function name, so it's ['min', 'sum'] # not ['sum', 'min']. expected = expected[['D', 'C', 'B', 'A']] tm.assert_frame_equal(result, expected) def test_agg_dict_nested_renaming_depr(self): df = pd.DataFrame({'A': range(5), 'B': 5}) # nested renaming with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): df.agg({'A': {'foo': 'min'}, 'B': {'bar': 'max'}}) def test_agg_reduce(self, axis, float_frame): other_axis = 1 if axis in {0, 'index'} else 0 name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values() # all reducers expected = pd.concat([ float_frame.mean(axis=axis), float_frame.max(axis=axis), float_frame.sum(axis=axis), ], axis=1) expected.columns = ['mean', 'max', 'sum'] expected = expected.T if axis in {0, 'index'} else expected result = float_frame.agg(['mean', 'max', 'sum'], axis=axis) assert_frame_equal(result, expected) # dict input with scalars func = OrderedDict([(name1, 'mean'), (name2, 'sum')]) result = float_frame.agg(func, axis=axis) expected = Series([ float_frame.loc(other_axis)[name1].mean(), float_frame.loc(other_axis)[name2].sum() ], index=[name1, name2]) assert_series_equal(result, expected) # dict input with lists func = OrderedDict([(name1, ['mean']), (name2, ['sum'])]) result = float_frame.agg(func, axis=axis) expected = DataFrame({ name1: Series([float_frame.loc(other_axis)[name1].mean()], index=['mean']), name2: Series([float_frame.loc(other_axis)[name2].sum()], index=['sum']) }) expected = expected.T if axis in {1, 'columns'} else expected assert_frame_equal(result, expected) # dict input with lists with multiple func = OrderedDict([(name1, ['mean', 'sum']), (name2, ['sum', 'max'])]) result = float_frame.agg(func, axis=axis) expected = DataFrame( OrderedDict([ (name1, Series([ float_frame.loc(other_axis)[name1].mean(), float_frame.loc(other_axis)[name1].sum() ], index=['mean', 'sum'])), (name2, Series([ float_frame.loc(other_axis)[name2].sum(), float_frame.loc(other_axis)[name2].max() ], index=['sum', 'max'])), ])) expected = expected.T if axis in {1, 'columns'} else expected assert_frame_equal(result, expected) def test_nuiscance_columns(self): # GH 15015 df = DataFrame({ 'A': [1, 2, 3], 'B': [1., 2., 3.], 'C': ['foo', 'bar', 'baz'], 'D': pd.date_range('20130101', periods=3) }) result = df.agg('min') expected = Series( [1, 1., 'bar', pd.Timestamp('20130101')], index=df.columns) assert_series_equal(result, expected) result = df.agg(['min']) expected = DataFrame( [[1, 1., 'bar', pd.Timestamp('20130101')]], index=['min'], columns=df.columns) assert_frame_equal(result, expected) result = df.agg('sum') expected = Series([6, 6., 'foobarbaz'], index=['A', 'B', 'C']) assert_series_equal(result, expected) result = df.agg(['sum']) expected = DataFrame([[6, 6., 'foobarbaz']], index=['sum'], columns=['A', 'B', 'C']) assert_frame_equal(result, expected) def test_non_callable_aggregates(self): # GH 16405 # 'size' is a property of frame/series # validate that this is working df = DataFrame({ 'A': [None, 2, 3], 'B': [1.0, np.nan, 3.0], 'C': ['foo', None, 'bar'] }) # Function aggregate result = df.agg({'A': 'count'}) expected = Series({'A': 2}) assert_series_equal(result, expected) # Non-function aggregate result = df.agg({'A': 'size'}) expected = Series({'A': 3}) assert_series_equal(result, expected) # Mix function and non-function aggs result1 = df.agg(['count', 'size']) result2 = df.agg({ 'A': ['count', 'size'], 'B': ['count', 'size'], 'C': ['count', 'size'] }) expected = pd.DataFrame({ 'A': { 'count': 2, 'size': 3 }, 'B': { 'count': 2, 'size': 3 }, 'C': { 'count': 2, 'size': 3 } }) assert_frame_equal(result1, result2, check_like=True) assert_frame_equal(result2, expected, check_like=True) # Just functional string arg is same as calling df.arg() result = df.agg('count') expected = df.count() assert_series_equal(result, expected) # Just a string attribute arg same as calling df.arg result = df.agg('size') expected = df.size assert result == expected @pytest.mark.parametrize( "df, func, expected", chain( _get_cython_table_params(DataFrame(), [ ('sum', Series()), ('max', Series()), ('min', Series()), ('all', Series(dtype=bool)), ('any', Series(dtype=bool)), ('mean', Series()), ('prod', Series()), ('std', Series()), ('var', Series()), ('median', Series()), ]), _get_cython_table_params(DataFrame([[np.nan, 1], [1, 2]]), [ ('sum', Series([1., 3])), ('max', Series([1., 2])), ('min', Series([1., 1])), ('all', Series([True, True])), ('any', Series([True, True])), ('mean', Series([1, 1.5])), ('prod', Series([1., 2])), ('std', Series([np.nan, 0.707107])), ('var', Series([np.nan, 0.5])), ('median', Series([1, 1.5])), ]), )) def test_agg_cython_table(self, df, func, expected, axis): # GH 21224 # test reducing functions in # pandas.core.base.SelectionMixin._cython_table result = df.agg(func, axis=axis) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "df, func, expected", chain( _get_cython_table_params(DataFrame(), [ ('cumprod', DataFrame()), ('cumsum', DataFrame()), ]), _get_cython_table_params(DataFrame([[np.nan, 1], [1, 2]]), [ ('cumprod', DataFrame([[np.nan, 1], [1., 2.]])), ('cumsum', DataFrame([[np.nan, 1], [1., 3.]])), ]), )) def test_agg_cython_table_transform(self, df, func, expected, axis): # GH 21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) result = df.agg(func, axis=axis) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "df, func, expected", _get_cython_table_params(DataFrame([['a', 'b'], ['b', 'a']]), [ ['cumprod', TypeError], ]), ) def test_agg_cython_table_raises(self, df, func, expected, axis): # GH 21224 with pytest.raises(expected): df.agg(func, axis=axis) @pytest.mark.parametrize("num_cols", [2, 3, 5]) def test_frequency_is_original(self, num_cols): # GH 22150 index = pd.DatetimeIndex(["1950-06-30", "1952-10-24", "1953-05-29"]) original = index.copy() df = DataFrame(1, index=index, columns=range(num_cols)) df.apply(lambda x: x) assert index.freq == original.freq
class TestSeriesAggregate: def test_transform(self, string_series): # transforming functions with np.errstate(all="ignore"): f_sqrt = np.sqrt(string_series) f_abs = np.abs(string_series) # ufunc result = string_series.transform(np.sqrt) expected = f_sqrt.copy() tm.assert_series_equal(result, expected) result = string_series.apply(np.sqrt) tm.assert_series_equal(result, expected) # list-like result = string_series.transform([np.sqrt]) expected = f_sqrt.to_frame().copy() expected.columns = ["sqrt"] tm.assert_frame_equal(result, expected) result = string_series.transform([np.sqrt]) tm.assert_frame_equal(result, expected) result = string_series.transform(["sqrt"]) tm.assert_frame_equal(result, expected) # multiple items in list # these are in the order as if we are applying both functions per # series and then concatting expected = pd.concat([f_sqrt, f_abs], axis=1) expected.columns = ["sqrt", "absolute"] result = string_series.apply([np.sqrt, np.abs]) tm.assert_frame_equal(result, expected) result = string_series.transform(["sqrt", "abs"]) expected.columns = ["sqrt", "abs"] tm.assert_frame_equal(result, expected) # dict, provide renaming expected = pd.concat([f_sqrt, f_abs], axis=1) expected.columns = ["foo", "bar"] expected = expected.unstack().rename("series") result = string_series.apply({"foo": np.sqrt, "bar": np.abs}) tm.assert_series_equal(result.reindex_like(expected), expected) def test_transform_and_agg_error(self, string_series): # we are trying to transform with an aggregator with pytest.raises(ValueError): string_series.transform(["min", "max"]) with pytest.raises(ValueError): with np.errstate(all="ignore"): string_series.agg(["sqrt", "max"]) with pytest.raises(ValueError): with np.errstate(all="ignore"): string_series.transform(["sqrt", "max"]) with pytest.raises(ValueError): with np.errstate(all="ignore"): string_series.agg({"foo": np.sqrt, "bar": "sum"}) def test_demo(self): # demonstration tests s = Series(range(6), dtype="int64", name="series") result = s.agg(["min", "max"]) expected = Series([0, 5], index=["min", "max"], name="series") tm.assert_series_equal(result, expected) result = s.agg({"foo": "min"}) expected = Series([0], index=["foo"], name="series") tm.assert_series_equal(result, expected) # nested renaming msg = "nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): s.agg({"foo": ["min", "max"]}) def test_multiple_aggregators_with_dict_api(self): s = Series(range(6), dtype="int64", name="series") # nested renaming msg = "nested renamer is not supported" with pytest.raises(SpecificationError, match=msg): s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]}) def test_agg_apply_evaluate_lambdas_the_same(self, string_series): # test that we are evaluating row-by-row first # before vectorized evaluation result = string_series.apply(lambda x: str(x)) expected = string_series.agg(lambda x: str(x)) tm.assert_series_equal(result, expected) result = string_series.apply(str) expected = string_series.agg(str) tm.assert_series_equal(result, expected) def test_with_nested_series(self, datetime_series): # GH 2316 # .agg with a reducer and a transform, what to do result = datetime_series.apply( lambda x: Series([x, x**2], index=["x", "x^2"])) expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) tm.assert_frame_equal(result, expected) result = datetime_series.agg( lambda x: Series([x, x**2], index=["x", "x^2"])) tm.assert_frame_equal(result, expected) def test_replicate_describe(self, string_series): # this also tests a result set that is all scalars expected = string_series.describe() result = string_series.apply( OrderedDict([ ("count", "count"), ("mean", "mean"), ("std", "std"), ("min", "min"), ("25%", lambda x: x.quantile(0.25)), ("50%", "median"), ("75%", lambda x: x.quantile(0.75)), ("max", "max"), ])) tm.assert_series_equal(result, expected) def test_reduce(self, string_series): # reductions with named functions result = string_series.agg(["sum", "mean"]) expected = Series( [string_series.sum(), string_series.mean()], ["sum", "mean"], name=string_series.name, ) tm.assert_series_equal(result, expected) def test_non_callable_aggregates(self): # test agg using non-callable series attributes s = Series([1, 2, None]) # Calling agg w/ just a string arg same as calling s.arg result = s.agg("size") expected = s.size assert result == expected # test when mixed w/ callable reducers result = s.agg(["size", "count", "mean"]) expected = Series( OrderedDict([("size", 3.0), ("count", 2.0), ("mean", 1.5)])) tm.assert_series_equal(result[expected.index], expected) @pytest.mark.parametrize( "series, func, expected", chain( _get_cython_table_params( Series(), [ ("sum", 0), ("max", np.nan), ("min", np.nan), ("all", True), ("any", False), ("mean", np.nan), ("prod", 1), ("std", np.nan), ("var", np.nan), ("median", np.nan), ], ), _get_cython_table_params( Series([np.nan, 1, 2, 3]), [ ("sum", 6), ("max", 3), ("min", 1), ("all", True), ("any", True), ("mean", 2), ("prod", 6), ("std", 1), ("var", 1), ("median", 2), ], ), _get_cython_table_params( Series("a b c".split()), [ ("sum", "abc"), ("max", "c"), ("min", "a"), ("all", "c"), # see GH12863 ("any", "a"), ], ), ), ) def test_agg_cython_table(self, series, func, expected): # GH21224 # test reducing functions in # pandas.core.base.SelectionMixin._cython_table result = series.agg(func) if tm.is_number(expected): assert np.isclose(result, expected, equal_nan=True) else: assert result == expected @pytest.mark.parametrize( "series, func, expected", chain( _get_cython_table_params( Series(), [("cumprod", Series([], Index([]))), ("cumsum", Series([], Index([])))], ), _get_cython_table_params( Series([np.nan, 1, 2, 3]), [ ("cumprod", Series([np.nan, 1, 2, 6])), ("cumsum", Series([np.nan, 1, 3, 6])), ], ), _get_cython_table_params(Series("a b c".split()), [("cumsum", Series(["a", "ab", "abc"]))]), ), ) def test_agg_cython_table_transform(self, series, func, expected): # GH21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) result = series.agg(func) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "series, func, expected", chain( _get_cython_table_params( Series("a b c".split()), [ ("mean", TypeError), # mean raises TypeError ("prod", TypeError), ("std", TypeError), ("var", TypeError), ("median", TypeError), ("cumprod", TypeError), ], )), ) def test_agg_cython_table_raises(self, series, func, expected): # GH21224 with pytest.raises(expected): # e.g. Series('a b'.split()).cumprod() will raise series.agg(func)
class TestSeriesAggregate(): def test_transform(self, string_series): # transforming functions with np.errstate(all='ignore'): f_sqrt = np.sqrt(string_series) f_abs = np.abs(string_series) # ufunc result = string_series.transform(np.sqrt) expected = f_sqrt.copy() assert_series_equal(result, expected) result = string_series.apply(np.sqrt) assert_series_equal(result, expected) # list-like result = string_series.transform([np.sqrt]) expected = f_sqrt.to_frame().copy() expected.columns = ['sqrt'] assert_frame_equal(result, expected) result = string_series.transform([np.sqrt]) assert_frame_equal(result, expected) result = string_series.transform(['sqrt']) assert_frame_equal(result, expected) # multiple items in list # these are in the order as if we are applying both functions per # series and then concatting expected = pd.concat([f_sqrt, f_abs], axis=1) expected.columns = ['sqrt', 'absolute'] result = string_series.apply([np.sqrt, np.abs]) assert_frame_equal(result, expected) result = string_series.transform(['sqrt', 'abs']) expected.columns = ['sqrt', 'abs'] assert_frame_equal(result, expected) # dict, provide renaming expected = pd.concat([f_sqrt, f_abs], axis=1) expected.columns = ['foo', 'bar'] expected = expected.unstack().rename('series') result = string_series.apply({'foo': np.sqrt, 'bar': np.abs}) assert_series_equal(result.reindex_like(expected), expected) def test_transform_and_agg_error(self, string_series): # we are trying to transform with an aggregator with pytest.raises(ValueError): string_series.transform(['min', 'max']) with pytest.raises(ValueError): with np.errstate(all='ignore'): string_series.agg(['sqrt', 'max']) with pytest.raises(ValueError): with np.errstate(all='ignore'): string_series.transform(['sqrt', 'max']) with pytest.raises(ValueError): with np.errstate(all='ignore'): string_series.agg({'foo': np.sqrt, 'bar': 'sum'}) def test_demo(self): # demonstration tests s = Series(range(6), dtype='int64', name='series') result = s.agg(['min', 'max']) expected = Series([0, 5], index=['min', 'max'], name='series') tm.assert_series_equal(result, expected) result = s.agg({'foo': 'min'}) expected = Series([0], index=['foo'], name='series') tm.assert_series_equal(result, expected) # nested renaming with tm.assert_produces_warning(FutureWarning): result = s.agg({'foo': ['min', 'max']}) expected = DataFrame({ 'foo': [0, 5] }, index=['min', 'max']).unstack().rename('series') tm.assert_series_equal(result, expected) def test_multiple_aggregators_with_dict_api(self): s = Series(range(6), dtype='int64', name='series') # nested renaming with tm.assert_produces_warning(FutureWarning): result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']}) expected = DataFrame( { 'foo': [5.0, np.nan, 0.0, np.nan], 'bar': [np.nan, 2.5, np.nan, 15.0] }, columns=['foo', 'bar'], index=['max', 'mean', 'min', 'sum']).unstack().rename('series') tm.assert_series_equal(result.reindex_like(expected), expected) def test_agg_apply_evaluate_lambdas_the_same(self, string_series): # test that we are evaluating row-by-row first # before vectorized evaluation result = string_series.apply(lambda x: str(x)) expected = string_series.agg(lambda x: str(x)) tm.assert_series_equal(result, expected) result = string_series.apply(str) expected = string_series.agg(str) tm.assert_series_equal(result, expected) def test_with_nested_series(self, datetime_series): # GH 2316 # .agg with a reducer and a transform, what to do result = datetime_series.apply( lambda x: Series([x, x**2], index=['x', 'x^2'])) expected = DataFrame({'x': datetime_series, 'x^2': datetime_series**2}) tm.assert_frame_equal(result, expected) result = datetime_series.agg( lambda x: Series([x, x**2], index=['x', 'x^2'])) tm.assert_frame_equal(result, expected) def test_replicate_describe(self, string_series): # this also tests a result set that is all scalars expected = string_series.describe() result = string_series.apply( OrderedDict([('count', 'count'), ('mean', 'mean'), ('std', 'std'), ('min', 'min'), ('25%', lambda x: x.quantile(0.25)), ('50%', 'median'), ('75%', lambda x: x.quantile(0.75)), ('max', 'max')])) assert_series_equal(result, expected) def test_reduce(self, string_series): # reductions with named functions result = string_series.agg(['sum', 'mean']) expected = Series( [string_series.sum(), string_series.mean()], ['sum', 'mean'], name=string_series.name) assert_series_equal(result, expected) def test_non_callable_aggregates(self): # test agg using non-callable series attributes s = Series([1, 2, None]) # Calling agg w/ just a string arg same as calling s.arg result = s.agg('size') expected = s.size assert result == expected # test when mixed w/ callable reducers result = s.agg(['size', 'count', 'mean']) expected = Series( OrderedDict([('size', 3.0), ('count', 2.0), ('mean', 1.5)])) assert_series_equal(result[expected.index], expected) @pytest.mark.parametrize( "series, func, expected", chain( _get_cython_table_params(Series(), [ ('sum', 0), ('max', np.nan), ('min', np.nan), ('all', True), ('any', False), ('mean', np.nan), ('prod', 1), ('std', np.nan), ('var', np.nan), ('median', np.nan), ]), _get_cython_table_params(Series([np.nan, 1, 2, 3]), [ ('sum', 6), ('max', 3), ('min', 1), ('all', True), ('any', True), ('mean', 2), ('prod', 6), ('std', 1), ('var', 1), ('median', 2), ]), _get_cython_table_params( Series('a b c'.split()), [ ('sum', 'abc'), ('max', 'c'), ('min', 'a'), ('all', 'c'), # see GH12863 ('any', 'a'), ]), )) def test_agg_cython_table(self, series, func, expected): # GH21224 # test reducing functions in # pandas.core.base.SelectionMixin._cython_table result = series.agg(func) if tm.is_number(expected): assert np.isclose(result, expected, equal_nan=True) else: assert result == expected @pytest.mark.parametrize( "series, func, expected", chain( _get_cython_table_params(Series(), [ ('cumprod', Series([], Index([]))), ('cumsum', Series([], Index([]))), ]), _get_cython_table_params(Series([np.nan, 1, 2, 3]), [ ('cumprod', Series([np.nan, 1, 2, 6])), ('cumsum', Series([np.nan, 1, 3, 6])), ]), _get_cython_table_params(Series('a b c'.split()), [ ('cumsum', Series(['a', 'ab', 'abc'])), ]), )) def test_agg_cython_table_transform(self, series, func, expected): # GH21224 # test transforming functions in # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum) result = series.agg(func) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "series, func, expected", chain( _get_cython_table_params( Series('a b c'.split()), [ ('mean', TypeError), # mean raises TypeError ('prod', TypeError), ('std', TypeError), ('var', TypeError), ('median', TypeError), ('cumprod', TypeError), ]))) def test_agg_cython_table_raises(self, series, func, expected): # GH21224 with pytest.raises(expected): # e.g. Series('a b'.split()).cumprod() will raise series.agg(func)