Example #1
0
    def _aggregate_series_pure_python(
        self,
        obj: Series,
        func: F,
        *args,
        engine: str = "cython",
        engine_kwargs=None,
        **kwargs,
    ):

        if maybe_use_numba(engine):
            numba_func, cache_key = generate_numba_func(
                func, engine_kwargs, kwargs, "groupby_agg")

        group_index, _, ngroups = self.group_info

        counts = np.zeros(ngroups, dtype=int)
        result = None

        splitter = get_splitter(obj, group_index, ngroups, axis=0)

        for label, group in splitter:
            if maybe_use_numba(engine):
                values, index = split_for_numba(group)
                res = numba_func(values, index, *args)
                if cache_key not in NUMBA_FUNC_CACHE:
                    NUMBA_FUNC_CACHE[cache_key] = numba_func
            else:
                res = func(group, *args, **kwargs)

            if result is None:
                if isinstance(res, (Series, Index, np.ndarray)):
                    if len(res) == 1:
                        # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                        # FIXME: are we potentially losing important res.index info?
                        res = res.item()
                    else:
                        raise ValueError("Function does not reduce")
                result = np.empty(ngroups, dtype="O")

            counts[label] = group.shape[0]
            result[label] = res

        assert result is not None
        result = lib.maybe_convert_objects(result, try_float=0)
        # TODO: maybe_cast_to_extension_array?

        return result, counts
Example #2
0
 def mean(self, *args, engine=None, engine_kwargs=None, **kwargs):
     if maybe_use_numba(engine):
         if self.method == "single":
             ewma_func = generate_numba_ewma_func(engine_kwargs, self._com,
                                                  self.adjust,
                                                  self.ignore_na,
                                                  self._deltas)
             numba_cache_key = (lambda x: x, "ewma")
         else:
             ewma_func = generate_ewma_numba_table_func(
                 engine_kwargs, self._com, self.adjust, self.ignore_na,
                 self._deltas)
             numba_cache_key = (lambda x: x, "ewma_table")
         return self._apply(
             ewma_func,
             numba_cache_key=numba_cache_key,
         )
     elif engine in ("cython", None):
         if engine_kwargs is not None:
             raise ValueError("cython engine does not accept engine_kwargs")
         nv.validate_window_func("mean", args, kwargs)
         window_func = partial(
             window_aggregations.ewma,
             com=self._com,
             adjust=self.adjust,
             ignore_na=self.ignore_na,
             deltas=self._deltas,
         )
         return self._apply(window_func)
     else:
         raise ValueError("engine must be either 'numba' or 'cython'")
Example #3
0
    def sum(self, *args, engine=None, engine_kwargs=None, **kwargs):
        if not self.adjust:
            raise NotImplementedError(
                "sum is not implemented with adjust=False")
        if maybe_use_numba(engine):
            if self.method == "single":
                func = generate_numba_ewm_func
            else:
                func = generate_numba_ewm_table_func
            ewm_func = func(
                **get_jit_arguments(engine_kwargs),
                com=self._com,
                adjust=self.adjust,
                ignore_na=self.ignore_na,
                deltas=tuple(self._deltas),
                normalize=False,
            )
            return self._apply(ewm_func)
        elif engine in ("cython", None):
            if engine_kwargs is not None:
                raise ValueError("cython engine does not accept engine_kwargs")
            nv.validate_window_func("sum", args, kwargs)

            deltas = None if self.times is None else self._deltas
            window_func = partial(
                window_aggregations.ewm,
                com=self._com,
                adjust=self.adjust,
                ignore_na=self.ignore_na,
                deltas=deltas,
                normalize=False,
            )
            return self._apply(window_func)
        else:
            raise ValueError("engine must be either 'numba' or 'cython'")
Example #4
0
    def mean(self, engine=None, engine_kwargs=None):
        """
        Parameters
        ----------
        engine : str, default None
            * ``'cython'`` : Runs mean through C-extensions from cython.
            * ``'numba'`` : Runs mean through JIT compiled code from numba.
              Only available when ``raw`` is set to ``True``.
            * ``None`` : Defaults to ``'cython'`` or globally setting
              ``compute.use_numba``

              .. versionadded:: 1.2.0

        engine_kwargs : dict, default None
            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
              and ``parallel`` dictionary keys. The values must either be ``True`` or
              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
              ``{'nopython': True, 'nogil': False, 'parallel': False}``.

              .. versionadded:: 1.2.0

        Returns
        -------
        Series or DataFrame
            Return type is determined by the caller.
        """
        if maybe_use_numba(engine):
            groupby_ewma_func = generate_numba_groupby_ewma_func(
                engine_kwargs,
                self.com,
                self.adjust,
                self.ignore_na,
            )
            return self._apply(
                groupby_ewma_func,
                numba_cache_key=(lambda x: x, "groupby_ewma"),
            )
        elif engine in ("cython", None):
            if engine_kwargs is not None:
                raise ValueError("cython engine does not accept engine_kwargs")

            def f(x):
                x = self._shallow_copy(x, groupby=self._groupby)
                return x.mean()

            return self._groupby.apply(f)
        else:
            raise ValueError("engine must be either 'numba' or 'cython'")
Example #5
0
    def agg_series(
        self,
        obj: Series,
        func: F,
        *args,
        engine: str = "cython",
        engine_kwargs=None,
        **kwargs,
    ):
        # Caller is responsible for checking ngroups != 0
        assert self.ngroups != 0

        if maybe_use_numba(engine):
            return self._aggregate_series_pure_python(
                obj,
                func,
                *args,
                engine=engine,
                engine_kwargs=engine_kwargs,
                **kwargs)
        if len(obj) == 0:
            # SeriesGrouper would raise if we were to call _aggregate_series_fast
            return self._aggregate_series_pure_python(obj, func)

        elif is_extension_array_dtype(obj.dtype):
            # _aggregate_series_fast would raise TypeError when
            #  calling libreduction.Slider
            # In the datetime64tz case it would incorrectly cast to tz-naive
            # TODO: can we get a performant workaround for EAs backed by ndarray?
            return self._aggregate_series_pure_python(obj, func)

        elif obj.index._has_complex_internals:
            # Preempt TypeError in _aggregate_series_fast
            return self._aggregate_series_pure_python(obj, func)

        try:
            return self._aggregate_series_fast(obj, func)
        except ValueError as err:
            if "Function does not reduce" in str(err):
                # raised in libreduction
                pass
            else:
                raise
        return self._aggregate_series_pure_python(obj, func)
Example #6
0
File: ewm.py Project: samize/pandas
 def __init__(
     self,
     obj: NDFrame,
     com: float | None = None,
     span: float | None = None,
     halflife: float | TimedeltaConvertibleTypes | None = None,
     alpha: float | None = None,
     min_periods: int | None = 0,
     adjust: bool = True,
     ignore_na: bool = False,
     axis: Axis = 0,
     times: str | np.ndarray | NDFrame | None = None,
     engine: str = "numba",
     engine_kwargs: dict[str, bool] | None = None,
     *,
     selection=None,
 ):
     if times is not None:
         raise NotImplementedError(
             "times is not implemented with online operations."
         )
     super().__init__(
         obj=obj,
         com=com,
         span=span,
         halflife=halflife,
         alpha=alpha,
         min_periods=min_periods,
         adjust=adjust,
         ignore_na=ignore_na,
         axis=axis,
         times=times,
         selection=selection,
     )
     self._mean = EWMMeanState(
         self._com, self.adjust, self.ignore_na, self.axis, obj.shape
     )
     if maybe_use_numba(engine):
         self.engine = engine
         self.engine_kwargs = engine_kwargs
     else:
         raise ValueError("'numba' is the only supported engine")
Example #7
0
File: ewm.py Project: tnir/pandas
    def mean(
        self,
        numeric_only: bool = False,
        *args,
        engine=None,
        engine_kwargs=None,
        **kwargs,
    ):
        maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs)
        if maybe_use_numba(engine):
            if self.method == "single":
                func = generate_numba_ewm_func
            else:
                func = generate_numba_ewm_table_func
            ewm_func = func(
                **get_jit_arguments(engine_kwargs),
                com=self._com,
                adjust=self.adjust,
                ignore_na=self.ignore_na,
                deltas=tuple(self._deltas),
                normalize=True,
            )
            return self._apply(ewm_func, name="mean")
        elif engine in ("cython", None):
            if engine_kwargs is not None:
                raise ValueError("cython engine does not accept engine_kwargs")
            nv.validate_window_func("mean", args, kwargs)

            deltas = None if self.times is None else self._deltas
            window_func = partial(
                window_aggregations.ewm,
                com=self._com,
                adjust=self.adjust,
                ignore_na=self.ignore_na,
                deltas=deltas,
                normalize=True,
            )
            return self._apply(window_func,
                               name="mean",
                               numeric_only=numeric_only)
        else:
            raise ValueError("engine must be either 'numba' or 'cython'")