コード例 #1
0
ファイル: test_apply.py プロジェクト: yangyayun2016/pandas
class TestDataFrameAggregate:
    def test_agg_transform(self, axis, float_frame):
        other_axis = 1 if axis in {0, "index"} else 0

        with np.errstate(all="ignore"):

            f_abs = np.abs(float_frame)
            f_sqrt = np.sqrt(float_frame)

            # ufunc
            result = float_frame.transform(np.sqrt, axis=axis)
            expected = f_sqrt.copy()
            tm.assert_frame_equal(result, expected)

            result = float_frame.apply(np.sqrt, axis=axis)
            tm.assert_frame_equal(result, expected)

            result = float_frame.transform(np.sqrt, axis=axis)
            tm.assert_frame_equal(result, expected)

            # list-like
            result = float_frame.apply([np.sqrt], axis=axis)
            expected = f_sqrt.copy()
            if axis in {0, "index"}:
                expected.columns = pd.MultiIndex.from_product(
                    [float_frame.columns, ["sqrt"]])
            else:
                expected.index = pd.MultiIndex.from_product(
                    [float_frame.index, ["sqrt"]])
            tm.assert_frame_equal(result, expected)

            result = float_frame.transform([np.sqrt], axis=axis)
            tm.assert_frame_equal(result, expected)

            # multiple items in list
            # these are in the order as if we are applying both
            # functions per series and then concatting
            result = float_frame.apply([np.abs, np.sqrt], axis=axis)
            expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
            if axis in {0, "index"}:
                expected.columns = pd.MultiIndex.from_product(
                    [float_frame.columns, ["absolute", "sqrt"]])
            else:
                expected.index = pd.MultiIndex.from_product(
                    [float_frame.index, ["absolute", "sqrt"]])
            tm.assert_frame_equal(result, expected)

            result = float_frame.transform([np.abs, "sqrt"], axis=axis)
            tm.assert_frame_equal(result, expected)

    def test_transform_and_agg_err(self, axis, float_frame):
        # cannot both transform and agg
        with pytest.raises(ValueError):
            float_frame.transform(["max", "min"], axis=axis)

        with pytest.raises(ValueError):
            with np.errstate(all="ignore"):
                float_frame.agg(["max", "sqrt"], axis=axis)

        with pytest.raises(ValueError):
            with np.errstate(all="ignore"):
                float_frame.transform(["max", "sqrt"], axis=axis)

        df = pd.DataFrame({"A": range(5), "B": 5})

        def f():
            with np.errstate(all="ignore"):
                df.agg({"A": ["abs", "sum"], "B": ["mean", "max"]}, axis=axis)

    @pytest.mark.parametrize("method",
                             ["abs", "shift", "pct_change", "cumsum", "rank"])
    def test_transform_method_name(self, method):
        # GH 19760
        df = pd.DataFrame({"A": [-1, 2]})
        result = df.transform(method)
        expected = operator.methodcaller(method)(df)
        tm.assert_frame_equal(result, expected)

    def test_demo(self):
        # demonstration tests
        df = pd.DataFrame({"A": range(5), "B": 5})

        result = df.agg(["min", "max"])
        expected = DataFrame({
            "A": [0, 4],
            "B": [5, 5]
        },
                             columns=["A", "B"],
                             index=["min", "max"])
        tm.assert_frame_equal(result, expected)

        result = df.agg({"A": ["min", "max"], "B": ["sum", "max"]})
        expected = DataFrame(
            {
                "A": [4.0, 0.0, np.nan],
                "B": [5.0, np.nan, 25.0]
            },
            columns=["A", "B"],
            index=["max", "min", "sum"],
        )
        tm.assert_frame_equal(result.reindex_like(expected), expected)

    def test_agg_multiple_mixed_no_warning(self):
        # GH 20909
        mdf = pd.DataFrame({
            "A": [1, 2, 3],
            "B": [1.0, 2.0, 3.0],
            "C": ["foo", "bar", "baz"],
            "D": pd.date_range("20130101", periods=3),
        })
        expected = pd.DataFrame(
            {
                "A": [1, 6],
                "B": [1.0, 6.0],
                "C": ["bar", "foobarbaz"],
                "D": [pd.Timestamp("2013-01-01"), pd.NaT],
            },
            index=["min", "sum"],
        )
        # sorted index
        with tm.assert_produces_warning(None):
            result = mdf.agg(["min", "sum"])

        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(None):
            result = mdf[["D", "C", "B", "A"]].agg(["sum", "min"])

        # For backwards compatibility, the result's index is
        # still sorted by function name, so it's ['min', 'sum']
        # not ['sum', 'min'].
        expected = expected[["D", "C", "B", "A"]]
        tm.assert_frame_equal(result, expected)

    def test_agg_dict_nested_renaming_depr(self):

        df = pd.DataFrame({"A": range(5), "B": 5})

        # nested renaming
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            df.agg({"A": {"foo": "min"}, "B": {"bar": "max"}})

    def test_agg_reduce(self, axis, float_frame):
        other_axis = 1 if axis in {0, "index"} else 0
        name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()

        # all reducers
        expected = pd.concat(
            [
                float_frame.mean(axis=axis),
                float_frame.max(axis=axis),
                float_frame.sum(axis=axis),
            ],
            axis=1,
        )
        expected.columns = ["mean", "max", "sum"]
        expected = expected.T if axis in {0, "index"} else expected

        result = float_frame.agg(["mean", "max", "sum"], axis=axis)
        tm.assert_frame_equal(result, expected)

        # dict input with scalars
        func = OrderedDict([(name1, "mean"), (name2, "sum")])
        result = float_frame.agg(func, axis=axis)
        expected = Series(
            [
                float_frame.loc(other_axis)[name1].mean(),
                float_frame.loc(other_axis)[name2].sum(),
            ],
            index=[name1, name2],
        )
        tm.assert_series_equal(result, expected)

        # dict input with lists
        func = OrderedDict([(name1, ["mean"]), (name2, ["sum"])])
        result = float_frame.agg(func, axis=axis)
        expected = DataFrame({
            name1:
            Series([float_frame.loc(other_axis)[name1].mean()],
                   index=["mean"]),
            name2:
            Series([float_frame.loc(other_axis)[name2].sum()], index=["sum"]),
        })
        expected = expected.T if axis in {1, "columns"} else expected
        tm.assert_frame_equal(result, expected)

        # dict input with lists with multiple
        func = OrderedDict([(name1, ["mean", "sum"]), (name2, ["sum", "max"])])
        result = float_frame.agg(func, axis=axis)
        expected = DataFrame(
            OrderedDict([
                (
                    name1,
                    Series(
                        [
                            float_frame.loc(other_axis)[name1].mean(),
                            float_frame.loc(other_axis)[name1].sum(),
                        ],
                        index=["mean", "sum"],
                    ),
                ),
                (
                    name2,
                    Series(
                        [
                            float_frame.loc(other_axis)[name2].sum(),
                            float_frame.loc(other_axis)[name2].max(),
                        ],
                        index=["sum", "max"],
                    ),
                ),
            ]))
        expected = expected.T if axis in {1, "columns"} else expected
        tm.assert_frame_equal(result, expected)

    def test_nuiscance_columns(self):

        # GH 15015
        df = DataFrame({
            "A": [1, 2, 3],
            "B": [1.0, 2.0, 3.0],
            "C": ["foo", "bar", "baz"],
            "D": pd.date_range("20130101", periods=3),
        })

        result = df.agg("min")
        expected = Series(
            [1, 1.0, "bar", pd.Timestamp("20130101")], index=df.columns)
        tm.assert_series_equal(result, expected)

        result = df.agg(["min"])
        expected = DataFrame(
            [[1, 1.0, "bar", pd.Timestamp("20130101")]],
            index=["min"],
            columns=df.columns,
        )
        tm.assert_frame_equal(result, expected)

        result = df.agg("sum")
        expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
        tm.assert_series_equal(result, expected)

        result = df.agg(["sum"])
        expected = DataFrame([[6, 6.0, "foobarbaz"]],
                             index=["sum"],
                             columns=["A", "B", "C"])
        tm.assert_frame_equal(result, expected)

    def test_non_callable_aggregates(self):

        # GH 16405
        # 'size' is a property of frame/series
        # validate that this is working
        df = DataFrame({
            "A": [None, 2, 3],
            "B": [1.0, np.nan, 3.0],
            "C": ["foo", None, "bar"]
        })

        # Function aggregate
        result = df.agg({"A": "count"})
        expected = Series({"A": 2})

        tm.assert_series_equal(result, expected)

        # Non-function aggregate
        result = df.agg({"A": "size"})
        expected = Series({"A": 3})

        tm.assert_series_equal(result, expected)

        # Mix function and non-function aggs
        result1 = df.agg(["count", "size"])
        result2 = df.agg({
            "A": ["count", "size"],
            "B": ["count", "size"],
            "C": ["count", "size"]
        })
        expected = pd.DataFrame({
            "A": {
                "count": 2,
                "size": 3
            },
            "B": {
                "count": 2,
                "size": 3
            },
            "C": {
                "count": 2,
                "size": 3
            },
        })

        tm.assert_frame_equal(result1, result2, check_like=True)
        tm.assert_frame_equal(result2, expected, check_like=True)

        # Just functional string arg is same as calling df.arg()
        result = df.agg("count")
        expected = df.count()

        tm.assert_series_equal(result, expected)

        # Just a string attribute arg same as calling df.arg
        result = df.agg("size")
        expected = df.size

        assert result == expected

    @pytest.mark.parametrize(
        "df, func, expected",
        chain(
            _get_cython_table_params(
                DataFrame(),
                [
                    ("sum", Series()),
                    ("max", Series()),
                    ("min", Series()),
                    ("all", Series(dtype=bool)),
                    ("any", Series(dtype=bool)),
                    ("mean", Series()),
                    ("prod", Series()),
                    ("std", Series()),
                    ("var", Series()),
                    ("median", Series()),
                ],
            ),
            _get_cython_table_params(
                DataFrame([[np.nan, 1], [1, 2]]),
                [
                    ("sum", Series([1.0, 3])),
                    ("max", Series([1.0, 2])),
                    ("min", Series([1.0, 1])),
                    ("all", Series([True, True])),
                    ("any", Series([True, True])),
                    ("mean", Series([1, 1.5])),
                    ("prod", Series([1.0, 2])),
                    ("std", Series([np.nan, 0.707107])),
                    ("var", Series([np.nan, 0.5])),
                    ("median", Series([1, 1.5])),
                ],
            ),
        ),
    )
    def test_agg_cython_table(self, df, func, expected, axis):
        # GH 21224
        # test reducing functions in
        # pandas.core.base.SelectionMixin._cython_table
        result = df.agg(func, axis=axis)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "df, func, expected",
        chain(
            _get_cython_table_params(DataFrame(), [("cumprod", DataFrame()),
                                                   ("cumsum", DataFrame())]),
            _get_cython_table_params(
                DataFrame([[np.nan, 1], [1, 2]]),
                [
                    ("cumprod", DataFrame([[np.nan, 1], [1.0, 2.0]])),
                    ("cumsum", DataFrame([[np.nan, 1], [1.0, 3.0]])),
                ],
            ),
        ),
    )
    def test_agg_cython_table_transform(self, df, func, expected, axis):
        # GH 21224
        # test transforming functions in
        # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
        result = df.agg(func, axis=axis)
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "df, func, expected",
        _get_cython_table_params(DataFrame([["a", "b"], ["b", "a"]]),
                                 [["cumprod", TypeError]]),
    )
    def test_agg_cython_table_raises(self, df, func, expected, axis):
        # GH 21224
        with pytest.raises(expected):
            df.agg(func, axis=axis)

    @pytest.mark.parametrize("num_cols", [2, 3, 5])
    def test_frequency_is_original(self, num_cols):
        # GH 22150
        index = pd.DatetimeIndex(["1950-06-30", "1952-10-24", "1953-05-29"])
        original = index.copy()
        df = DataFrame(1, index=index, columns=range(num_cols))
        df.apply(lambda x: x)
        assert index.freq == original.freq

    def test_apply_datetime_tz_issue(self):
        # GH 29052

        timestamps = [
            pd.Timestamp("2019-03-15 12:34:31.909000+0000", tz="UTC"),
            pd.Timestamp("2019-03-15 12:34:34.359000+0000", tz="UTC"),
            pd.Timestamp("2019-03-15 12:34:34.660000+0000", tz="UTC"),
        ]
        df = DataFrame(data=[0, 1, 2], index=timestamps)
        result = df.apply(lambda x: x.name, axis=1)
        expected = pd.Series(index=timestamps, data=timestamps)

        tm.assert_series_equal(result, expected)
コード例 #2
0
ファイル: test_apply.py プロジェクト: yuyisky88/pandas
class TestDataFrameAggregate():
    def test_agg_transform(self, axis, float_frame):
        other_axis = 1 if axis in {0, 'index'} else 0

        with np.errstate(all='ignore'):

            f_abs = np.abs(float_frame)
            f_sqrt = np.sqrt(float_frame)

            # ufunc
            result = float_frame.transform(np.sqrt, axis=axis)
            expected = f_sqrt.copy()
            assert_frame_equal(result, expected)

            result = float_frame.apply(np.sqrt, axis=axis)
            assert_frame_equal(result, expected)

            result = float_frame.transform(np.sqrt, axis=axis)
            assert_frame_equal(result, expected)

            # list-like
            result = float_frame.apply([np.sqrt], axis=axis)
            expected = f_sqrt.copy()
            if axis in {0, 'index'}:
                expected.columns = pd.MultiIndex.from_product(
                    [float_frame.columns, ['sqrt']])
            else:
                expected.index = pd.MultiIndex.from_product(
                    [float_frame.index, ['sqrt']])
            assert_frame_equal(result, expected)

            result = float_frame.transform([np.sqrt], axis=axis)
            assert_frame_equal(result, expected)

            # multiple items in list
            # these are in the order as if we are applying both
            # functions per series and then concatting
            result = float_frame.apply([np.abs, np.sqrt], axis=axis)
            expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
            if axis in {0, 'index'}:
                expected.columns = pd.MultiIndex.from_product(
                    [float_frame.columns, ['absolute', 'sqrt']])
            else:
                expected.index = pd.MultiIndex.from_product(
                    [float_frame.index, ['absolute', 'sqrt']])
            assert_frame_equal(result, expected)

            result = float_frame.transform([np.abs, 'sqrt'], axis=axis)
            assert_frame_equal(result, expected)

    def test_transform_and_agg_err(self, axis, float_frame):
        # cannot both transform and agg
        with pytest.raises(ValueError):
            float_frame.transform(['max', 'min'], axis=axis)

        with pytest.raises(ValueError):
            with np.errstate(all='ignore'):
                float_frame.agg(['max', 'sqrt'], axis=axis)

        with pytest.raises(ValueError):
            with np.errstate(all='ignore'):
                float_frame.transform(['max', 'sqrt'], axis=axis)

        df = pd.DataFrame({'A': range(5), 'B': 5})

        def f():
            with np.errstate(all='ignore'):
                df.agg({'A': ['abs', 'sum'], 'B': ['mean', 'max']}, axis=axis)

    @pytest.mark.parametrize('method', [
        'abs',
        'shift',
        'pct_change',
        'cumsum',
        'rank',
    ])
    def test_transform_method_name(self, method):
        # GH 19760
        df = pd.DataFrame({"A": [-1, 2]})
        result = df.transform(method)
        expected = operator.methodcaller(method)(df)
        tm.assert_frame_equal(result, expected)

    def test_demo(self):
        # demonstration tests
        df = pd.DataFrame({'A': range(5), 'B': 5})

        result = df.agg(['min', 'max'])
        expected = DataFrame({
            'A': [0, 4],
            'B': [5, 5]
        },
                             columns=['A', 'B'],
                             index=['min', 'max'])
        tm.assert_frame_equal(result, expected)

        result = df.agg({'A': ['min', 'max'], 'B': ['sum', 'max']})
        expected = DataFrame(
            {
                'A': [4.0, 0.0, np.nan],
                'B': [5.0, np.nan, 25.0]
            },
            columns=['A', 'B'],
            index=['max', 'min', 'sum'])
        tm.assert_frame_equal(result.reindex_like(expected), expected)

    def test_agg_multiple_mixed_no_warning(self):
        # GH 20909
        mdf = pd.DataFrame({
            'A': [1, 2, 3],
            'B': [1., 2., 3.],
            'C': ['foo', 'bar', 'baz'],
            'D': pd.date_range('20130101', periods=3)
        })
        expected = pd.DataFrame(
            {
                "A": [1, 6],
                'B': [1.0, 6.0],
                "C": ['bar', 'foobarbaz'],
                "D": [pd.Timestamp('2013-01-01'), pd.NaT]
            },
            index=['min', 'sum'])
        # sorted index
        with tm.assert_produces_warning(None):
            result = mdf.agg(['min', 'sum'])

        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(None):
            result = mdf[['D', 'C', 'B', 'A']].agg(['sum', 'min'])

        # For backwards compatibility, the result's index is
        # still sorted by function name, so it's ['min', 'sum']
        # not ['sum', 'min'].
        expected = expected[['D', 'C', 'B', 'A']]
        tm.assert_frame_equal(result, expected)

    def test_agg_dict_nested_renaming_depr(self):

        df = pd.DataFrame({'A': range(5), 'B': 5})

        # nested renaming
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            df.agg({'A': {'foo': 'min'}, 'B': {'bar': 'max'}})

    def test_agg_reduce(self, axis, float_frame):
        other_axis = 1 if axis in {0, 'index'} else 0
        name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()

        # all reducers
        expected = pd.concat([
            float_frame.mean(axis=axis),
            float_frame.max(axis=axis),
            float_frame.sum(axis=axis),
        ],
                             axis=1)
        expected.columns = ['mean', 'max', 'sum']
        expected = expected.T if axis in {0, 'index'} else expected

        result = float_frame.agg(['mean', 'max', 'sum'], axis=axis)
        assert_frame_equal(result, expected)

        # dict input with scalars
        func = OrderedDict([(name1, 'mean'), (name2, 'sum')])
        result = float_frame.agg(func, axis=axis)
        expected = Series([
            float_frame.loc(other_axis)[name1].mean(),
            float_frame.loc(other_axis)[name2].sum()
        ],
                          index=[name1, name2])
        assert_series_equal(result, expected)

        # dict input with lists
        func = OrderedDict([(name1, ['mean']), (name2, ['sum'])])
        result = float_frame.agg(func, axis=axis)
        expected = DataFrame({
            name1:
            Series([float_frame.loc(other_axis)[name1].mean()],
                   index=['mean']),
            name2:
            Series([float_frame.loc(other_axis)[name2].sum()], index=['sum'])
        })
        expected = expected.T if axis in {1, 'columns'} else expected
        assert_frame_equal(result, expected)

        # dict input with lists with multiple
        func = OrderedDict([(name1, ['mean', 'sum']), (name2, ['sum', 'max'])])
        result = float_frame.agg(func, axis=axis)
        expected = DataFrame(
            OrderedDict([
                (name1,
                 Series([
                     float_frame.loc(other_axis)[name1].mean(),
                     float_frame.loc(other_axis)[name1].sum()
                 ],
                        index=['mean', 'sum'])),
                (name2,
                 Series([
                     float_frame.loc(other_axis)[name2].sum(),
                     float_frame.loc(other_axis)[name2].max()
                 ],
                        index=['sum', 'max'])),
            ]))
        expected = expected.T if axis in {1, 'columns'} else expected
        assert_frame_equal(result, expected)

    def test_nuiscance_columns(self):

        # GH 15015
        df = DataFrame({
            'A': [1, 2, 3],
            'B': [1., 2., 3.],
            'C': ['foo', 'bar', 'baz'],
            'D': pd.date_range('20130101', periods=3)
        })

        result = df.agg('min')
        expected = Series(
            [1, 1., 'bar', pd.Timestamp('20130101')], index=df.columns)
        assert_series_equal(result, expected)

        result = df.agg(['min'])
        expected = DataFrame(
            [[1, 1., 'bar', pd.Timestamp('20130101')]],
            index=['min'],
            columns=df.columns)
        assert_frame_equal(result, expected)

        result = df.agg('sum')
        expected = Series([6, 6., 'foobarbaz'], index=['A', 'B', 'C'])
        assert_series_equal(result, expected)

        result = df.agg(['sum'])
        expected = DataFrame([[6, 6., 'foobarbaz']],
                             index=['sum'],
                             columns=['A', 'B', 'C'])
        assert_frame_equal(result, expected)

    def test_non_callable_aggregates(self):

        # GH 16405
        # 'size' is a property of frame/series
        # validate that this is working
        df = DataFrame({
            'A': [None, 2, 3],
            'B': [1.0, np.nan, 3.0],
            'C': ['foo', None, 'bar']
        })

        # Function aggregate
        result = df.agg({'A': 'count'})
        expected = Series({'A': 2})

        assert_series_equal(result, expected)

        # Non-function aggregate
        result = df.agg({'A': 'size'})
        expected = Series({'A': 3})

        assert_series_equal(result, expected)

        # Mix function and non-function aggs
        result1 = df.agg(['count', 'size'])
        result2 = df.agg({
            'A': ['count', 'size'],
            'B': ['count', 'size'],
            'C': ['count', 'size']
        })
        expected = pd.DataFrame({
            'A': {
                'count': 2,
                'size': 3
            },
            'B': {
                'count': 2,
                'size': 3
            },
            'C': {
                'count': 2,
                'size': 3
            }
        })

        assert_frame_equal(result1, result2, check_like=True)
        assert_frame_equal(result2, expected, check_like=True)

        # Just functional string arg is same as calling df.arg()
        result = df.agg('count')
        expected = df.count()

        assert_series_equal(result, expected)

        # Just a string attribute arg same as calling df.arg
        result = df.agg('size')
        expected = df.size

        assert result == expected

    @pytest.mark.parametrize(
        "df, func, expected",
        chain(
            _get_cython_table_params(DataFrame(), [
                ('sum', Series()),
                ('max', Series()),
                ('min', Series()),
                ('all', Series(dtype=bool)),
                ('any', Series(dtype=bool)),
                ('mean', Series()),
                ('prod', Series()),
                ('std', Series()),
                ('var', Series()),
                ('median', Series()),
            ]),
            _get_cython_table_params(DataFrame([[np.nan, 1], [1, 2]]), [
                ('sum', Series([1., 3])),
                ('max', Series([1., 2])),
                ('min', Series([1., 1])),
                ('all', Series([True, True])),
                ('any', Series([True, True])),
                ('mean', Series([1, 1.5])),
                ('prod', Series([1., 2])),
                ('std', Series([np.nan, 0.707107])),
                ('var', Series([np.nan, 0.5])),
                ('median', Series([1, 1.5])),
            ]),
        ))
    def test_agg_cython_table(self, df, func, expected, axis):
        # GH 21224
        # test reducing functions in
        # pandas.core.base.SelectionMixin._cython_table
        result = df.agg(func, axis=axis)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "df, func, expected",
        chain(
            _get_cython_table_params(DataFrame(), [
                ('cumprod', DataFrame()),
                ('cumsum', DataFrame()),
            ]),
            _get_cython_table_params(DataFrame([[np.nan, 1], [1, 2]]), [
                ('cumprod', DataFrame([[np.nan, 1], [1., 2.]])),
                ('cumsum', DataFrame([[np.nan, 1], [1., 3.]])),
            ]),
        ))
    def test_agg_cython_table_transform(self, df, func, expected, axis):
        # GH 21224
        # test transforming functions in
        # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
        result = df.agg(func, axis=axis)
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "df, func, expected",
        _get_cython_table_params(DataFrame([['a', 'b'], ['b', 'a']]), [
            ['cumprod', TypeError],
        ]),
    )
    def test_agg_cython_table_raises(self, df, func, expected, axis):
        # GH 21224
        with pytest.raises(expected):
            df.agg(func, axis=axis)

    @pytest.mark.parametrize("num_cols", [2, 3, 5])
    def test_frequency_is_original(self, num_cols):
        # GH 22150
        index = pd.DatetimeIndex(["1950-06-30", "1952-10-24", "1953-05-29"])
        original = index.copy()
        df = DataFrame(1, index=index, columns=range(num_cols))
        df.apply(lambda x: x)
        assert index.freq == original.freq
コード例 #3
0
class TestSeriesAggregate:
    def test_transform(self, string_series):
        # transforming functions

        with np.errstate(all="ignore"):

            f_sqrt = np.sqrt(string_series)
            f_abs = np.abs(string_series)

            # ufunc
            result = string_series.transform(np.sqrt)
            expected = f_sqrt.copy()
            tm.assert_series_equal(result, expected)

            result = string_series.apply(np.sqrt)
            tm.assert_series_equal(result, expected)

            # list-like
            result = string_series.transform([np.sqrt])
            expected = f_sqrt.to_frame().copy()
            expected.columns = ["sqrt"]
            tm.assert_frame_equal(result, expected)

            result = string_series.transform([np.sqrt])
            tm.assert_frame_equal(result, expected)

            result = string_series.transform(["sqrt"])
            tm.assert_frame_equal(result, expected)

            # multiple items in list
            # these are in the order as if we are applying both functions per
            # series and then concatting
            expected = pd.concat([f_sqrt, f_abs], axis=1)
            expected.columns = ["sqrt", "absolute"]
            result = string_series.apply([np.sqrt, np.abs])
            tm.assert_frame_equal(result, expected)

            result = string_series.transform(["sqrt", "abs"])
            expected.columns = ["sqrt", "abs"]
            tm.assert_frame_equal(result, expected)

            # dict, provide renaming
            expected = pd.concat([f_sqrt, f_abs], axis=1)
            expected.columns = ["foo", "bar"]
            expected = expected.unstack().rename("series")

            result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
            tm.assert_series_equal(result.reindex_like(expected), expected)

    def test_transform_and_agg_error(self, string_series):
        # we are trying to transform with an aggregator
        with pytest.raises(ValueError):
            string_series.transform(["min", "max"])

        with pytest.raises(ValueError):
            with np.errstate(all="ignore"):
                string_series.agg(["sqrt", "max"])

        with pytest.raises(ValueError):
            with np.errstate(all="ignore"):
                string_series.transform(["sqrt", "max"])

        with pytest.raises(ValueError):
            with np.errstate(all="ignore"):
                string_series.agg({"foo": np.sqrt, "bar": "sum"})

    def test_demo(self):
        # demonstration tests
        s = Series(range(6), dtype="int64", name="series")

        result = s.agg(["min", "max"])
        expected = Series([0, 5], index=["min", "max"], name="series")
        tm.assert_series_equal(result, expected)

        result = s.agg({"foo": "min"})
        expected = Series([0], index=["foo"], name="series")
        tm.assert_series_equal(result, expected)

        # nested renaming
        msg = "nested renamer is not supported"
        with pytest.raises(SpecificationError, match=msg):
            s.agg({"foo": ["min", "max"]})

    def test_multiple_aggregators_with_dict_api(self):

        s = Series(range(6), dtype="int64", name="series")
        # nested renaming
        msg = "nested renamer is not supported"
        with pytest.raises(SpecificationError, match=msg):
            s.agg({"foo": ["min", "max"], "bar": ["sum", "mean"]})

    def test_agg_apply_evaluate_lambdas_the_same(self, string_series):
        # test that we are evaluating row-by-row first
        # before vectorized evaluation
        result = string_series.apply(lambda x: str(x))
        expected = string_series.agg(lambda x: str(x))
        tm.assert_series_equal(result, expected)

        result = string_series.apply(str)
        expected = string_series.agg(str)
        tm.assert_series_equal(result, expected)

    def test_with_nested_series(self, datetime_series):
        # GH 2316
        # .agg with a reducer and a transform, what to do
        result = datetime_series.apply(
            lambda x: Series([x, x**2], index=["x", "x^2"]))
        expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
        tm.assert_frame_equal(result, expected)

        result = datetime_series.agg(
            lambda x: Series([x, x**2], index=["x", "x^2"]))
        tm.assert_frame_equal(result, expected)

    def test_replicate_describe(self, string_series):
        # this also tests a result set that is all scalars
        expected = string_series.describe()
        result = string_series.apply(
            OrderedDict([
                ("count", "count"),
                ("mean", "mean"),
                ("std", "std"),
                ("min", "min"),
                ("25%", lambda x: x.quantile(0.25)),
                ("50%", "median"),
                ("75%", lambda x: x.quantile(0.75)),
                ("max", "max"),
            ]))
        tm.assert_series_equal(result, expected)

    def test_reduce(self, string_series):
        # reductions with named functions
        result = string_series.agg(["sum", "mean"])
        expected = Series(
            [string_series.sum(), string_series.mean()],
            ["sum", "mean"],
            name=string_series.name,
        )
        tm.assert_series_equal(result, expected)

    def test_non_callable_aggregates(self):
        # test agg using non-callable series attributes
        s = Series([1, 2, None])

        # Calling agg w/ just a string arg same as calling s.arg
        result = s.agg("size")
        expected = s.size
        assert result == expected

        # test when mixed w/ callable reducers
        result = s.agg(["size", "count", "mean"])
        expected = Series(
            OrderedDict([("size", 3.0), ("count", 2.0), ("mean", 1.5)]))
        tm.assert_series_equal(result[expected.index], expected)

    @pytest.mark.parametrize(
        "series, func, expected",
        chain(
            _get_cython_table_params(
                Series(),
                [
                    ("sum", 0),
                    ("max", np.nan),
                    ("min", np.nan),
                    ("all", True),
                    ("any", False),
                    ("mean", np.nan),
                    ("prod", 1),
                    ("std", np.nan),
                    ("var", np.nan),
                    ("median", np.nan),
                ],
            ),
            _get_cython_table_params(
                Series([np.nan, 1, 2, 3]),
                [
                    ("sum", 6),
                    ("max", 3),
                    ("min", 1),
                    ("all", True),
                    ("any", True),
                    ("mean", 2),
                    ("prod", 6),
                    ("std", 1),
                    ("var", 1),
                    ("median", 2),
                ],
            ),
            _get_cython_table_params(
                Series("a b c".split()),
                [
                    ("sum", "abc"),
                    ("max", "c"),
                    ("min", "a"),
                    ("all", "c"),  # see GH12863
                    ("any", "a"),
                ],
            ),
        ),
    )
    def test_agg_cython_table(self, series, func, expected):
        # GH21224
        # test reducing functions in
        # pandas.core.base.SelectionMixin._cython_table
        result = series.agg(func)
        if tm.is_number(expected):
            assert np.isclose(result, expected, equal_nan=True)
        else:
            assert result == expected

    @pytest.mark.parametrize(
        "series, func, expected",
        chain(
            _get_cython_table_params(
                Series(),
                [("cumprod", Series([], Index([]))),
                 ("cumsum", Series([], Index([])))],
            ),
            _get_cython_table_params(
                Series([np.nan, 1, 2, 3]),
                [
                    ("cumprod", Series([np.nan, 1, 2, 6])),
                    ("cumsum", Series([np.nan, 1, 3, 6])),
                ],
            ),
            _get_cython_table_params(Series("a b c".split()),
                                     [("cumsum", Series(["a", "ab", "abc"]))]),
        ),
    )
    def test_agg_cython_table_transform(self, series, func, expected):
        # GH21224
        # test transforming functions in
        # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
        result = series.agg(func)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "series, func, expected",
        chain(
            _get_cython_table_params(
                Series("a b c".split()),
                [
                    ("mean", TypeError),  # mean raises TypeError
                    ("prod", TypeError),
                    ("std", TypeError),
                    ("var", TypeError),
                    ("median", TypeError),
                    ("cumprod", TypeError),
                ],
            )),
    )
    def test_agg_cython_table_raises(self, series, func, expected):
        # GH21224
        with pytest.raises(expected):
            # e.g. Series('a b'.split()).cumprod() will raise
            series.agg(func)
コード例 #4
0
class TestSeriesAggregate():
    def test_transform(self, string_series):
        # transforming functions

        with np.errstate(all='ignore'):

            f_sqrt = np.sqrt(string_series)
            f_abs = np.abs(string_series)

            # ufunc
            result = string_series.transform(np.sqrt)
            expected = f_sqrt.copy()
            assert_series_equal(result, expected)

            result = string_series.apply(np.sqrt)
            assert_series_equal(result, expected)

            # list-like
            result = string_series.transform([np.sqrt])
            expected = f_sqrt.to_frame().copy()
            expected.columns = ['sqrt']
            assert_frame_equal(result, expected)

            result = string_series.transform([np.sqrt])
            assert_frame_equal(result, expected)

            result = string_series.transform(['sqrt'])
            assert_frame_equal(result, expected)

            # multiple items in list
            # these are in the order as if we are applying both functions per
            # series and then concatting
            expected = pd.concat([f_sqrt, f_abs], axis=1)
            expected.columns = ['sqrt', 'absolute']
            result = string_series.apply([np.sqrt, np.abs])
            assert_frame_equal(result, expected)

            result = string_series.transform(['sqrt', 'abs'])
            expected.columns = ['sqrt', 'abs']
            assert_frame_equal(result, expected)

            # dict, provide renaming
            expected = pd.concat([f_sqrt, f_abs], axis=1)
            expected.columns = ['foo', 'bar']
            expected = expected.unstack().rename('series')

            result = string_series.apply({'foo': np.sqrt, 'bar': np.abs})
            assert_series_equal(result.reindex_like(expected), expected)

    def test_transform_and_agg_error(self, string_series):
        # we are trying to transform with an aggregator
        with pytest.raises(ValueError):
            string_series.transform(['min', 'max'])

        with pytest.raises(ValueError):
            with np.errstate(all='ignore'):
                string_series.agg(['sqrt', 'max'])

        with pytest.raises(ValueError):
            with np.errstate(all='ignore'):
                string_series.transform(['sqrt', 'max'])

        with pytest.raises(ValueError):
            with np.errstate(all='ignore'):
                string_series.agg({'foo': np.sqrt, 'bar': 'sum'})

    def test_demo(self):
        # demonstration tests
        s = Series(range(6), dtype='int64', name='series')

        result = s.agg(['min', 'max'])
        expected = Series([0, 5], index=['min', 'max'], name='series')
        tm.assert_series_equal(result, expected)

        result = s.agg({'foo': 'min'})
        expected = Series([0], index=['foo'], name='series')
        tm.assert_series_equal(result, expected)

        # nested renaming
        with tm.assert_produces_warning(FutureWarning):
            result = s.agg({'foo': ['min', 'max']})

        expected = DataFrame({
            'foo': [0, 5]
        }, index=['min', 'max']).unstack().rename('series')
        tm.assert_series_equal(result, expected)

    def test_multiple_aggregators_with_dict_api(self):

        s = Series(range(6), dtype='int64', name='series')
        # nested renaming
        with tm.assert_produces_warning(FutureWarning):
            result = s.agg({'foo': ['min', 'max'], 'bar': ['sum', 'mean']})

        expected = DataFrame(
            {
                'foo': [5.0, np.nan, 0.0, np.nan],
                'bar': [np.nan, 2.5, np.nan, 15.0]
            },
            columns=['foo', 'bar'],
            index=['max', 'mean', 'min', 'sum']).unstack().rename('series')
        tm.assert_series_equal(result.reindex_like(expected), expected)

    def test_agg_apply_evaluate_lambdas_the_same(self, string_series):
        # test that we are evaluating row-by-row first
        # before vectorized evaluation
        result = string_series.apply(lambda x: str(x))
        expected = string_series.agg(lambda x: str(x))
        tm.assert_series_equal(result, expected)

        result = string_series.apply(str)
        expected = string_series.agg(str)
        tm.assert_series_equal(result, expected)

    def test_with_nested_series(self, datetime_series):
        # GH 2316
        # .agg with a reducer and a transform, what to do
        result = datetime_series.apply(
            lambda x: Series([x, x**2], index=['x', 'x^2']))
        expected = DataFrame({'x': datetime_series, 'x^2': datetime_series**2})
        tm.assert_frame_equal(result, expected)

        result = datetime_series.agg(
            lambda x: Series([x, x**2], index=['x', 'x^2']))
        tm.assert_frame_equal(result, expected)

    def test_replicate_describe(self, string_series):
        # this also tests a result set that is all scalars
        expected = string_series.describe()
        result = string_series.apply(
            OrderedDict([('count', 'count'), ('mean', 'mean'), ('std', 'std'),
                         ('min', 'min'), ('25%', lambda x: x.quantile(0.25)),
                         ('50%', 'median'),
                         ('75%', lambda x: x.quantile(0.75)), ('max', 'max')]))
        assert_series_equal(result, expected)

    def test_reduce(self, string_series):
        # reductions with named functions
        result = string_series.agg(['sum', 'mean'])
        expected = Series(
            [string_series.sum(), string_series.mean()], ['sum', 'mean'],
            name=string_series.name)
        assert_series_equal(result, expected)

    def test_non_callable_aggregates(self):
        # test agg using non-callable series attributes
        s = Series([1, 2, None])

        # Calling agg w/ just a string arg same as calling s.arg
        result = s.agg('size')
        expected = s.size
        assert result == expected

        # test when mixed w/ callable reducers
        result = s.agg(['size', 'count', 'mean'])
        expected = Series(
            OrderedDict([('size', 3.0), ('count', 2.0), ('mean', 1.5)]))
        assert_series_equal(result[expected.index], expected)

    @pytest.mark.parametrize(
        "series, func, expected",
        chain(
            _get_cython_table_params(Series(), [
                ('sum', 0),
                ('max', np.nan),
                ('min', np.nan),
                ('all', True),
                ('any', False),
                ('mean', np.nan),
                ('prod', 1),
                ('std', np.nan),
                ('var', np.nan),
                ('median', np.nan),
            ]),
            _get_cython_table_params(Series([np.nan, 1, 2, 3]), [
                ('sum', 6),
                ('max', 3),
                ('min', 1),
                ('all', True),
                ('any', True),
                ('mean', 2),
                ('prod', 6),
                ('std', 1),
                ('var', 1),
                ('median', 2),
            ]),
            _get_cython_table_params(
                Series('a b c'.split()),
                [
                    ('sum', 'abc'),
                    ('max', 'c'),
                    ('min', 'a'),
                    ('all', 'c'),  # see GH12863
                    ('any', 'a'),
                ]),
        ))
    def test_agg_cython_table(self, series, func, expected):
        # GH21224
        # test reducing functions in
        # pandas.core.base.SelectionMixin._cython_table
        result = series.agg(func)
        if tm.is_number(expected):
            assert np.isclose(result, expected, equal_nan=True)
        else:
            assert result == expected

    @pytest.mark.parametrize(
        "series, func, expected",
        chain(
            _get_cython_table_params(Series(), [
                ('cumprod', Series([], Index([]))),
                ('cumsum', Series([], Index([]))),
            ]),
            _get_cython_table_params(Series([np.nan, 1, 2, 3]), [
                ('cumprod', Series([np.nan, 1, 2, 6])),
                ('cumsum', Series([np.nan, 1, 3, 6])),
            ]),
            _get_cython_table_params(Series('a b c'.split()), [
                ('cumsum', Series(['a', 'ab', 'abc'])),
            ]),
        ))
    def test_agg_cython_table_transform(self, series, func, expected):
        # GH21224
        # test transforming functions in
        # pandas.core.base.SelectionMixin._cython_table (cumprod, cumsum)
        result = series.agg(func)
        tm.assert_series_equal(result, expected)

    @pytest.mark.parametrize(
        "series, func, expected",
        chain(
            _get_cython_table_params(
                Series('a b c'.split()),
                [
                    ('mean', TypeError),  # mean raises TypeError
                    ('prod', TypeError),
                    ('std', TypeError),
                    ('var', TypeError),
                    ('median', TypeError),
                    ('cumprod', TypeError),
                ])))
    def test_agg_cython_table_raises(self, series, func, expected):
        # GH21224
        with pytest.raises(expected):
            # e.g. Series('a b'.split()).cumprod() will raise
            series.agg(func)