예제 #1
0
    def sum(self, *args, engine=None, engine_kwargs=None, **kwargs):
        if not self.adjust:
            raise NotImplementedError(
                "sum is not implemented with adjust=False")
        if maybe_use_numba(engine):
            if self.method == "single":
                func = generate_numba_ewm_func
            else:
                func = generate_numba_ewm_table_func
            ewm_func = func(
                **get_jit_arguments(engine_kwargs),
                com=self._com,
                adjust=self.adjust,
                ignore_na=self.ignore_na,
                deltas=tuple(self._deltas),
                normalize=False,
            )
            return self._apply(ewm_func)
        elif engine in ("cython", None):
            if engine_kwargs is not None:
                raise ValueError("cython engine does not accept engine_kwargs")
            nv.validate_window_func("sum", args, kwargs)

            deltas = None if self.times is None else self._deltas
            window_func = partial(
                window_aggregations.ewm,
                com=self._com,
                adjust=self.adjust,
                ignore_na=self.ignore_na,
                deltas=deltas,
                normalize=False,
            )
            return self._apply(window_func)
        else:
            raise ValueError("engine must be either 'numba' or 'cython'")
예제 #2
0
def generate_numba_agg_func(
    kwargs: dict[str, Any],
    func: Callable[..., Scalar],
    engine_kwargs: dict[str, bool] | None,
) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any],
              np.ndarray]:
    """
    Generate a numba jitted agg function specified by values from engine_kwargs.

    1. jit the user's function
    2. Return a groupby agg function with the jitted function inline

    Configurations specified in engine_kwargs apply to both the user's
    function _AND_ the groupby evaluation loop.

    Parameters
    ----------
    kwargs : dict
        **kwargs to be passed into the function
    func : function
        function to be applied to each window and will be JITed
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs)

    validate_udf(func)
    cache_key = (func, "groupby_agg")
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    numba_func = jit_user_function(func, nopython, nogil, parallel)
    numba = import_optional_dependency("numba")

    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
    def group_agg(
        values: np.ndarray,
        index: np.ndarray,
        begin: np.ndarray,
        end: np.ndarray,
        num_columns: int,
        *args: Any,
    ) -> np.ndarray:

        assert len(begin) == len(end)
        num_groups = len(begin)

        result = np.empty((num_groups, num_columns))
        for i in numba.prange(num_groups):
            group_index = index[begin[i]:end[i]]
            for j in numba.prange(num_columns):
                group = values[begin[i]:end[i], j]
                result[i, j] = numba_func(group, group_index, *args)
        return result

    return group_agg
예제 #3
0
def generate_numba_apply_func(
    args: Tuple,
    kwargs: Dict[str, Any],
    func: Callable[..., Scalar],
    engine_kwargs: Optional[Dict[str, bool]],
):
    """
    Generate a numba jitted apply function specified by values from engine_kwargs.

    1. jit the user's function
    2. Return a rolling apply function with the jitted function inline

    Configurations specified in engine_kwargs apply to both the user's
    function _AND_ the rolling apply function.

    Parameters
    ----------
    args : tuple
        *args to be passed into the function
    kwargs : dict
        **kwargs to be passed into the function
    func : function
        function to be applied to each window and will be JITed
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs)

    cache_key = (func, "rolling_apply")
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    numba_func = jit_user_function(func, nopython, nogil, parallel)
    numba = import_optional_dependency("numba")
    if parallel:
        loop_range = numba.prange
    else:
        loop_range = range

    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
    def roll_apply(
        values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int
    ) -> np.ndarray:
        result = np.empty(len(begin))
        for i in loop_range(len(result)):
            start = begin[i]
            stop = end[i]
            window = values[start:stop]
            count_nan = np.sum(np.isnan(window))
            if len(window) - count_nan >= minimum_periods:
                result[i] = numba_func(window, *args)
            else:
                result[i] = np.nan
        return result

    return roll_apply
예제 #4
0
    def _aggregate_series_pure_python(
        self,
        obj: Series,
        func: F,
        *args,
        engine: str = "cython",
        engine_kwargs=None,
        **kwargs,
    ):

        if engine == "numba":
            nopython, nogil, parallel = get_jit_arguments(engine_kwargs)
            check_kwargs_and_nopython(kwargs, nopython)
            validate_udf(func)
            cache_key = (func, "groupby_agg")
            numba_func = NUMBA_FUNC_CACHE.get(
                cache_key, jit_user_function(func, nopython, nogil, parallel)
            )

        group_index, _, ngroups = self.group_info

        counts = np.zeros(ngroups, dtype=int)
        result = None

        splitter = get_splitter(obj, group_index, ngroups, axis=0)

        for label, group in splitter:
            if engine == "numba":
                values, index = split_for_numba(group)
                res = numba_func(values, index, *args)
                if cache_key not in NUMBA_FUNC_CACHE:
                    NUMBA_FUNC_CACHE[cache_key] = numba_func
            else:
                res = func(group, *args, **kwargs)

            if result is None:
                if isinstance(res, (Series, Index, np.ndarray)):
                    if len(res) == 1:
                        # e.g. test_agg_lambda_with_timezone lambda e: e.head(1)
                        # FIXME: are we potentially losing important res.index info?
                        res = res.item()
                    else:
                        raise ValueError("Function does not reduce")
                result = np.empty(ngroups, dtype="O")

            counts[label] = group.shape[0]
            result[label] = res

        assert result is not None
        result = lib.maybe_convert_objects(result, try_float=0)
        # TODO: maybe_cast_to_extension_array?

        return result, counts
예제 #5
0
def generate_shared_aggregator(
    func: Callable[..., Scalar],
    engine_kwargs: dict[str, bool] | None,
    cache_key_str: str,
):
    """
    Generate a Numba function that loops over the columns 2D object and applies
    a 1D numba kernel over each column.

    Parameters
    ----------
    func : function
        aggregation function to be applied to each column
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit
    cache_key_str: str
        string to access the compiled function of the form
        <caller_type>_<aggregation_type> e.g. rolling_mean, groupby_mean

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs, None)

    cache_key = (func, cache_key_str)
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    if TYPE_CHECKING:
        import numba
    else:
        numba = import_optional_dependency("numba")

    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
    def column_looper(
        values: np.ndarray,
        start: np.ndarray,
        end: np.ndarray,
        min_periods: int,
        *args,
    ):
        result = np.empty((len(start), values.shape[1]), dtype=np.float64)
        for i in numba.prange(values.shape[1]):
            result[:, i] = func(values[:, i], start, end, min_periods, *args)
        return result

    return column_looper
예제 #6
0
파일: ewm.py 프로젝트: tnir/pandas
    def mean(
        self,
        numeric_only: bool = False,
        *args,
        engine=None,
        engine_kwargs=None,
        **kwargs,
    ):
        maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs)
        if maybe_use_numba(engine):
            if self.method == "single":
                func = generate_numba_ewm_func
            else:
                func = generate_numba_ewm_table_func
            ewm_func = func(
                **get_jit_arguments(engine_kwargs),
                com=self._com,
                adjust=self.adjust,
                ignore_na=self.ignore_na,
                deltas=tuple(self._deltas),
                normalize=True,
            )
            return self._apply(ewm_func, name="mean")
        elif engine in ("cython", None):
            if engine_kwargs is not None:
                raise ValueError("cython engine does not accept engine_kwargs")
            nv.validate_window_func("mean", args, kwargs)

            deltas = None if self.times is None else self._deltas
            window_func = partial(
                window_aggregations.ewm,
                com=self._com,
                adjust=self.adjust,
                ignore_na=self.ignore_na,
                deltas=deltas,
                normalize=True,
            )
            return self._apply(window_func,
                               name="mean",
                               numeric_only=numeric_only)
        else:
            raise ValueError("engine must be either 'numba' or 'cython'")
예제 #7
0
파일: numba_.py 프로젝트: zbrookle/pandas
def generate_numba_func(
    func: Callable,
    engine_kwargs: Optional[Dict[str, bool]],
    kwargs: dict,
    cache_key_str: str,
) -> Tuple[Callable, Tuple[Callable, str]]:
    """
    Return a JITed function and cache key for the NUMBA_FUNC_CACHE

    This _may_ be specific to groupby (as it's only used there currently).

    Parameters
    ----------
    func : function
        user defined function
    engine_kwargs : dict or None
        numba.jit arguments
    kwargs : dict
        kwargs for func
    cache_key_str : str
        string representing the second part of the cache key tuple

    Returns
    -------
    (JITed function, cache key)

    Raises
    ------
    NumbaUtilError
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs)
    check_kwargs_and_nopython(kwargs, nopython)
    validate_udf(func)
    cache_key = (func, cache_key_str)
    numba_func = NUMBA_FUNC_CACHE.get(
        cache_key, jit_user_function(func, nopython, nogil, parallel)
    )
    return numba_func, cache_key
예제 #8
0
파일: online.py 프로젝트: Aathi410/Pro123
def generate_online_numba_ewma_func(engine_kwargs: Optional[Dict[str, bool]]):
    """
    Generate a numba jitted groupby ewma function specified by values
    from engine_kwargs.
    Parameters
    ----------
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit
    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs)

    cache_key = (lambda x: x, "online_ewma")
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    numba = import_optional_dependency("numba")

    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
    def online_ewma(
        values: np.ndarray,
        deltas: np.ndarray,
        minimum_periods: int,
        old_wt_factor: float,
        new_wt: float,
        old_wt: np.ndarray,
        adjust: bool,
        ignore_na: bool,
    ):
        """
        Compute online exponentially weighted mean per column over 2D values.

        Takes the first observation as is, then computes the subsequent
        exponentially weighted mean accounting minimum periods.
        """
        result = np.empty(values.shape)
        weighted_avg = values[0]
        nobs = (~np.isnan(weighted_avg)).astype(np.int64)
        result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan)

        for i in range(1, len(values)):
            cur = values[i]
            is_observations = ~np.isnan(cur)
            nobs += is_observations.astype(np.int64)
            for j in numba.prange(len(cur)):
                if not np.isnan(weighted_avg[j]):
                    if is_observations[j] or not ignore_na:

                        # note that len(deltas) = len(vals) - 1 and deltas[i] is to be
                        # used in conjunction with vals[i+1]
                        old_wt[j] *= old_wt_factor**deltas[j - 1]
                        if is_observations[j]:
                            # avoid numerical errors on constant series
                            if weighted_avg[j] != cur[j]:
                                weighted_avg[j] = (
                                    (old_wt[j] * weighted_avg[j]) +
                                    (new_wt * cur[j])) / (old_wt[j] + new_wt)
                            if adjust:
                                old_wt[j] += new_wt
                            else:
                                old_wt[j] = 1.0
                elif is_observations[j]:
                    weighted_avg[j] = cur[j]

            result[i] = np.where(nobs >= minimum_periods, weighted_avg, np.nan)

        return result, old_wt

    return online_ewma
예제 #9
0
def generate_numba_groupby_ewma_func(
    engine_kwargs: Optional[Dict[str, bool]],
    com: float,
    adjust: bool,
    ignore_na: bool,
):
    """
    Generate a numba jitted groupby ewma function specified by values
    from engine_kwargs.

    Parameters
    ----------
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit
    com : float
    adjust : bool
    ignore_na : bool

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs)

    cache_key = (lambda x: x, "groupby_ewma")
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    numba = import_optional_dependency("numba")
    if parallel:
        loop_range = numba.prange
    else:
        loop_range = range

    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
    def groupby_ewma(
        values: np.ndarray,
        begin: np.ndarray,
        end: np.ndarray,
        minimum_periods: int,
    ) -> np.ndarray:
        result = np.empty(len(values))
        alpha = 1.0 / (1.0 + com)
        for i in loop_range(len(begin)):
            start = begin[i]
            stop = end[i]
            window = values[start:stop]
            sub_result = np.empty(len(window))

            old_wt_factor = 1.0 - alpha
            new_wt = 1.0 if adjust else alpha

            weighted_avg = window[0]
            nobs = int(not np.isnan(weighted_avg))
            sub_result[0] = weighted_avg if nobs >= minimum_periods else np.nan
            old_wt = 1.0

            for j in range(1, len(window)):
                cur = window[j]
                is_observation = not np.isnan(cur)
                nobs += is_observation
                if not np.isnan(weighted_avg):

                    if is_observation or not ignore_na:

                        old_wt *= old_wt_factor
                        if is_observation:

                            # avoid numerical errors on constant series
                            if weighted_avg != cur:
                                weighted_avg = (
                                    (old_wt * weighted_avg) +
                                    (new_wt * cur)) / (old_wt + new_wt)
                            if adjust:
                                old_wt += new_wt
                            else:
                                old_wt = 1.0
                elif is_observation:
                    weighted_avg = cur

                sub_result[
                    j] = weighted_avg if nobs >= minimum_periods else np.nan

            result[start:stop] = sub_result

        return result

    return groupby_ewma
예제 #10
0
파일: numba_.py 프로젝트: zbrookle/pandas
def generate_numba_transform_func(
    args: Tuple,
    kwargs: Dict[str, Any],
    func: Callable[..., Scalar],
    engine_kwargs: Optional[Dict[str, bool]],
) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int], np.ndarray]:
    """
    Generate a numba jitted transform function specified by values from engine_kwargs.

    1. jit the user's function
    2. Return a groupby agg function with the jitted function inline

    Configurations specified in engine_kwargs apply to both the user's
    function _AND_ the rolling apply function.

    Parameters
    ----------
    args : tuple
        *args to be passed into the function
    kwargs : dict
        **kwargs to be passed into the function
    func : function
        function to be applied to each window and will be JITed
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs)

    check_kwargs_and_nopython(kwargs, nopython)

    validate_udf(func)

    numba_func = jit_user_function(func, nopython, nogil, parallel)

    numba = import_optional_dependency("numba")

    if parallel:
        loop_range = numba.prange
    else:
        loop_range = range

    @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
    def group_transform(
        values: np.ndarray,
        index: np.ndarray,
        begin: np.ndarray,
        end: np.ndarray,
        num_groups: int,
        num_columns: int,
    ) -> np.ndarray:
        result = np.empty((len(values), num_columns))
        for i in loop_range(num_groups):
            group_index = index[begin[i] : end[i]]
            for j in loop_range(num_columns):
                group = values[begin[i] : end[i], j]
                result[begin[i] : end[i], j] = numba_func(group, group_index, *args)
        return result

    return group_transform
예제 #11
0
파일: numba_.py 프로젝트: ukarroum/pandas
def generate_numba_ewm_func(
    engine_kwargs: dict[str, bool] | None,
    com: float,
    adjust: bool,
    ignore_na: bool,
    deltas: np.ndarray,
    normalize: bool,
):
    """
    Generate a numba jitted ewm mean or sum function specified by values
    from engine_kwargs.

    Parameters
    ----------
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit
    com : float
    adjust : bool
    ignore_na : bool
    deltas : numpy.ndarray
    normalize : bool

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs)

    str_key = "ewm_mean" if normalize else "ewm_sum"
    cache_key = (lambda x: x, str_key)
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    numba = import_optional_dependency("numba")

    # error: Untyped decorator makes function "ewma" untyped
    @numba.jit(nopython=nopython, nogil=nogil,
               parallel=parallel)  # type: ignore[misc]
    def ewm(
        values: np.ndarray,
        begin: np.ndarray,
        end: np.ndarray,
        minimum_periods: int,
    ) -> np.ndarray:
        result = np.empty(len(values))
        alpha = 1.0 / (1.0 + com)
        old_wt_factor = 1.0 - alpha
        new_wt = 1.0 if adjust else alpha

        for i in numba.prange(len(begin)):
            start = begin[i]
            stop = end[i]
            window = values[start:stop]
            sub_result = np.empty(len(window))

            weighted = window[0]
            nobs = int(not np.isnan(weighted))
            sub_result[0] = weighted if nobs >= minimum_periods else np.nan
            old_wt = 1.0

            for j in range(1, len(window)):
                cur = window[j]
                is_observation = not np.isnan(cur)
                nobs += is_observation
                if not np.isnan(weighted):

                    if is_observation or not ignore_na:
                        if normalize:
                            # note that len(deltas) = len(vals) - 1 and deltas[i]
                            # is to be used in conjunction with vals[i+1]
                            old_wt *= old_wt_factor**deltas[start + j - 1]
                        else:
                            weighted = old_wt_factor * weighted
                        if is_observation:
                            if normalize:
                                # avoid numerical errors on constant series
                                if weighted != cur:
                                    weighted = old_wt * weighted + new_wt * cur
                                    if normalize:
                                        weighted = weighted / (old_wt + new_wt)
                                if adjust:
                                    old_wt += new_wt
                                else:
                                    old_wt = 1.0
                            else:
                                weighted += cur
                elif is_observation:
                    weighted = cur

                sub_result[j] = weighted if nobs >= minimum_periods else np.nan

            result[start:stop] = sub_result

        return result

    return ewm
예제 #12
0
파일: numba_.py 프로젝트: ukarroum/pandas
def generate_numba_ewm_table_func(
    engine_kwargs: dict[str, bool] | None,
    com: float,
    adjust: bool,
    ignore_na: bool,
    deltas: np.ndarray,
    normalize: bool,
):
    """
    Generate a numba jitted ewm mean or sum function applied table wise specified
    by values from engine_kwargs.

    Parameters
    ----------
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit
    com : float
    adjust : bool
    ignore_na : bool
    deltas : numpy.ndarray
    normalize: bool

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs)

    str_key = "ewm_mean_table" if normalize else "ewm_sum_table"
    cache_key = (lambda x: x, str_key)
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    numba = import_optional_dependency("numba")

    # error: Untyped decorator makes function "ewm_table" untyped
    @numba.jit(nopython=nopython, nogil=nogil,
               parallel=parallel)  # type: ignore[misc]
    def ewm_table(
        values: np.ndarray,
        begin: np.ndarray,
        end: np.ndarray,
        minimum_periods: int,
    ) -> np.ndarray:
        alpha = 1.0 / (1.0 + com)
        old_wt_factor = 1.0 - alpha
        new_wt = 1.0 if adjust else alpha
        old_wt = np.ones(values.shape[1])

        result = np.empty(values.shape)
        weighted = values[0].copy()
        nobs = (~np.isnan(weighted)).astype(np.int64)
        result[0] = np.where(nobs >= minimum_periods, weighted, np.nan)
        for i in range(1, len(values)):
            cur = values[i]
            is_observations = ~np.isnan(cur)
            nobs += is_observations.astype(np.int64)
            for j in numba.prange(len(cur)):
                if not np.isnan(weighted[j]):
                    if is_observations[j] or not ignore_na:
                        if normalize:
                            # note that len(deltas) = len(vals) - 1 and deltas[i]
                            # is to be used in conjunction with vals[i+1]
                            old_wt[j] *= old_wt_factor**deltas[i - 1]
                        else:
                            weighted[j] = old_wt_factor * weighted[j]
                        if is_observations[j]:
                            if normalize:
                                # avoid numerical errors on constant series
                                if weighted[j] != cur[j]:
                                    weighted[j] = (old_wt[j] * weighted[j] +
                                                   new_wt * cur[j])
                                    if normalize:
                                        weighted[j] = weighted[j] / (
                                            old_wt[j] + new_wt)
                                if adjust:
                                    old_wt[j] += new_wt
                                else:
                                    old_wt[j] = 1.0
                            else:
                                weighted[j] += cur[j]
                elif is_observations[j]:
                    weighted[j] = cur[j]

            result[i] = np.where(nobs >= minimum_periods, weighted, np.nan)

        return result

    return ewm_table
예제 #13
0
파일: numba_.py 프로젝트: ukarroum/pandas
def generate_numba_apply_func(
    kwargs: dict[str, Any],
    func: Callable[..., Scalar],
    engine_kwargs: dict[str, bool] | None,
    name: str,
):
    """
    Generate a numba jitted apply function specified by values from engine_kwargs.

    1. jit the user's function
    2. Return a rolling apply function with the jitted function inline

    Configurations specified in engine_kwargs apply to both the user's
    function _AND_ the rolling apply function.

    Parameters
    ----------
    kwargs : dict
        **kwargs to be passed into the function
    func : function
        function to be applied to each window and will be JITed
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit
    name: str
        name of the caller (Rolling/Expanding)

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs)

    cache_key = (func, f"{name}_apply_single")
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    numba_func = jit_user_function(func, nopython, nogil, parallel)
    numba = import_optional_dependency("numba")

    # error: Untyped decorator makes function "roll_apply" untyped
    @numba.jit(nopython=nopython, nogil=nogil,
               parallel=parallel)  # type: ignore[misc]
    def roll_apply(
        values: np.ndarray,
        begin: np.ndarray,
        end: np.ndarray,
        minimum_periods: int,
        *args: Any,
    ) -> np.ndarray:
        result = np.empty(len(begin))
        for i in numba.prange(len(result)):
            start = begin[i]
            stop = end[i]
            window = values[start:stop]
            count_nan = np.sum(np.isnan(window))
            if len(window) - count_nan >= minimum_periods:
                result[i] = numba_func(window, *args)
            else:
                result[i] = np.nan
        return result

    return roll_apply
예제 #14
0
파일: numba_.py 프로젝트: ukarroum/pandas
def generate_numba_table_func(
    kwargs: dict[str, Any],
    func: Callable[..., np.ndarray],
    engine_kwargs: dict[str, bool] | None,
    name: str,
):
    """
    Generate a numba jitted function to apply window calculations table-wise.

    Func will be passed a M window size x N number of columns array, and
    must return a 1 x N number of columns array. Func is intended to operate
    row-wise, but the result will be transposed for axis=1.

    1. jit the user's function
    2. Return a rolling apply function with the jitted function inline

    Parameters
    ----------
    kwargs : dict
        **kwargs to be passed into the function
    func : function
        function to be applied to each window and will be JITed
    engine_kwargs : dict
        dictionary of arguments to be passed into numba.jit
    name : str
        caller (Rolling/Expanding) and original method name for numba cache key

    Returns
    -------
    Numba function
    """
    nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs)

    cache_key = (func, f"{name}_table")
    if cache_key in NUMBA_FUNC_CACHE:
        return NUMBA_FUNC_CACHE[cache_key]

    numba_func = jit_user_function(func, nopython, nogil, parallel)
    numba = import_optional_dependency("numba")

    # error: Untyped decorator makes function "roll_table" untyped
    @numba.jit(nopython=nopython, nogil=nogil,
               parallel=parallel)  # type: ignore[misc]
    def roll_table(
        values: np.ndarray,
        begin: np.ndarray,
        end: np.ndarray,
        minimum_periods: int,
        *args: Any,
    ):
        result = np.empty(values.shape)
        min_periods_mask = np.empty(values.shape)
        for i in numba.prange(len(result)):
            start = begin[i]
            stop = end[i]
            window = values[start:stop]
            count_nan = np.sum(np.isnan(window), axis=0)
            sub_result = numba_func(window, *args)
            nan_mask = len(window) - count_nan >= minimum_periods
            min_periods_mask[i, :] = nan_mask
            result[i, :] = sub_result
        result = np.where(min_periods_mask, result, np.nan)
        return result

    return roll_table
예제 #15
0
    def mean(self, *args, update=None, update_times=None, **kwargs):
        """
        Calculate an online exponentially weighted mean.

        Parameters
        ----------
        update: DataFrame or Series, default None
            New values to continue calculating the
            exponentially weighted mean from the last values and weights.
            Values should be float64 dtype.

            ``update`` needs to be ``None`` the first time the
            exponentially weighted mean is calculated.

        update_times: Series or 1-D np.ndarray, default None
            New times to continue calculating the
            exponentially weighted mean from the last values and weights.
            If ``None``, values are assumed to be evenly spaced
            in time.
            This feature is currently unsupported.

        Returns
        -------
        DataFrame or Series

        Examples
        --------
        >>> df = pd.DataFrame({"a": range(5), "b": range(5, 10)})
        >>> online_ewm = df.head(2).ewm(0.5).online()
        >>> online_ewm.mean()
              a     b
        0  0.00  5.00
        1  0.75  5.75
        >>> online_ewm.mean(update=df.tail(3))
                  a         b
        2  1.615385  6.615385
        3  2.550000  7.550000
        4  3.520661  8.520661
        >>> online_ewm.reset()
        >>> online_ewm.mean()
              a     b
        0  0.00  5.00
        1  0.75  5.75
        """
        result_kwargs = {}
        is_frame = True if self._selected_obj.ndim == 2 else False
        if update_times is not None:
            raise NotImplementedError("update_times is not implemented.")
        else:
            update_deltas = np.ones(max(
                self._selected_obj.shape[self.axis - 1] - 1, 0),
                                    dtype=np.float64)
        if update is not None:
            if self._mean.last_ewm is None:
                raise ValueError(
                    "Must call mean with update=None first before passing update"
                )
            result_from = 1
            result_kwargs["index"] = update.index
            if is_frame:
                last_value = self._mean.last_ewm[np.newaxis, :]
                result_kwargs["columns"] = update.columns
            else:
                last_value = self._mean.last_ewm
                result_kwargs["name"] = update.name
            np_array = np.concatenate((last_value, update.to_numpy()))
        else:
            result_from = 0
            result_kwargs["index"] = self._selected_obj.index
            if is_frame:
                result_kwargs["columns"] = self._selected_obj.columns
            else:
                result_kwargs["name"] = self._selected_obj.name
            np_array = self._selected_obj.astype(np.float64).to_numpy()
        ewma_func = generate_online_numba_ewma_func(
            **get_jit_arguments(self.engine_kwargs))
        result = self._mean.run_ewm(
            np_array if is_frame else np_array[:, np.newaxis],
            update_deltas,
            self.min_periods,
            ewma_func,
        )
        if not is_frame:
            result = result.squeeze()
        result = result[result_from:]
        result = self._selected_obj._constructor(result, **result_kwargs)
        return result