コード例 #1
0
    def _aggregate_series_pure_python(self, obj: Series, func: F):
        group_index, _, ngroups = self.group_info

        counts = np.zeros(ngroups, dtype=int)
        result = np.empty(ngroups, dtype="O")
        initialized = False

        splitter = get_splitter(obj, group_index, ngroups, axis=0)

        for label, group in enumerate(splitter):

            # Each step of this loop corresponds to
            #  libreduction._BaseGrouper._apply_to_group
            res = func(group)
            res = libreduction.extract_result(res)

            if not initialized:
                # We only do this validation on the first iteration
                libreduction.check_result_array(res, 0)
                initialized = True

            counts[label] = group.shape[0]
            result[label] = res

        out = lib.maybe_convert_objects(result, try_float=False)
        out = maybe_cast_pointwise_result(out, obj.dtype, numeric_only=True)

        return out, counts
コード例 #2
0
    def agg_series(self, obj: Series, func: F) -> ArrayLike:
        # Caller is responsible for checking ngroups != 0
        assert self.ngroups != 0

        cast_back = True
        if len(obj) == 0:
            # SeriesGrouper would raise if we were to call _aggregate_series_fast
            result = self._aggregate_series_pure_python(obj, func)

        elif not isinstance(obj._values, np.ndarray):
            # _aggregate_series_fast would raise TypeError when
            #  calling libreduction.Slider
            # In the datetime64tz case it would incorrectly cast to tz-naive
            # TODO: can we get a performant workaround for EAs backed by ndarray?
            result = self._aggregate_series_pure_python(obj, func)

        elif obj.index._has_complex_internals:
            # Preempt TypeError in _aggregate_series_fast
            result = self._aggregate_series_pure_python(obj, func)

        else:
            result = self._aggregate_series_fast(obj, func)
            cast_back = False

        npvalues = lib.maybe_convert_objects(result, try_float=False)
        if cast_back:
            # TODO: Is there a documented reason why we dont always cast_back?
            out = maybe_cast_pointwise_result(npvalues,
                                              obj.dtype,
                                              numeric_only=True)
        else:
            out = npvalues
        return out
コード例 #3
0
ファイル: ops.py プロジェクト: rth/pandas
    def agg_series(
        self, obj: Series, func: F, preserve_dtype: bool = False
    ) -> ArrayLike:
        """
        Parameters
        ----------
        obj : Series
        func : function taking a Series and returning a scalar-like
        preserve_dtype : bool
            Whether the aggregation is known to be dtype-preserving.

        Returns
        -------
        np.ndarray or ExtensionArray
        """
        # test_groupby_empty_with_category gets here with self.ngroups == 0
        #  and len(obj) > 0

        if len(obj) == 0:
            # SeriesGrouper would raise if we were to call _aggregate_series_fast
            result = self._aggregate_series_pure_python(obj, func)

        elif not isinstance(obj._values, np.ndarray):
            # _aggregate_series_fast would raise TypeError when
            #  calling libreduction.Slider
            # In the datetime64tz case it would incorrectly cast to tz-naive
            # TODO: can we get a performant workaround for EAs backed by ndarray?
            result = self._aggregate_series_pure_python(obj, func)

            # we can preserve a little bit more aggressively with EA dtype
            #  because maybe_cast_pointwise_result will do a try/except
            #  with _from_sequence.  NB we are assuming here that _from_sequence
            #  is sufficiently strict that it casts appropriately.
            preserve_dtype = True

        elif obj.index._has_complex_internals:
            # Preempt TypeError in _aggregate_series_fast
            result = self._aggregate_series_pure_python(obj, func)

        elif isinstance(self, BinGrouper):
            # Not yet able to remove the BaseGrouper aggregate_series_fast,
            #  as test_crosstab.test_categorical breaks without it
            result = self._aggregate_series_pure_python(obj, func)

        else:
            result = self._aggregate_series_fast(obj, func)

        npvalues = lib.maybe_convert_objects(result, try_float=False)
        if preserve_dtype:
            out = maybe_cast_pointwise_result(npvalues, obj.dtype, numeric_only=True)
        else:
            out = npvalues
        return out
コード例 #4
0
    def agg_series(self,
                   obj: Series,
                   func: F,
                   preserve_dtype: bool = False) -> ArrayLike:
        """
        Parameters
        ----------
        obj : Series
        func : function taking a Series and returning a scalar-like
        preserve_dtype : bool
            Whether the aggregation is known to be dtype-preserving.

        Returns
        -------
        np.ndarray or ExtensionArray
        """
        # test_groupby_empty_with_category gets here with self.ngroups == 0
        #  and len(obj) > 0

        if len(obj) == 0:
            # SeriesGrouper would raise if we were to call _aggregate_series_fast
            result = self._aggregate_series_pure_python(obj, func)

        elif not isinstance(obj._values, np.ndarray):
            result = self._aggregate_series_pure_python(obj, func)

            # we can preserve a little bit more aggressively with EA dtype
            #  because maybe_cast_pointwise_result will do a try/except
            #  with _from_sequence.  NB we are assuming here that _from_sequence
            #  is sufficiently strict that it casts appropriately.
            preserve_dtype = True

        else:
            result = self._aggregate_series_pure_python(obj, func)

        npvalues = lib.maybe_convert_objects(result, try_float=False)
        if preserve_dtype:
            out = maybe_cast_pointwise_result(npvalues,
                                              obj.dtype,
                                              numeric_only=True)
        else:
            out = npvalues
        return out