def sum(self, *args, engine=None, engine_kwargs=None, **kwargs): if not self.adjust: raise NotImplementedError( "sum is not implemented with adjust=False") if maybe_use_numba(engine): if self.method == "single": func = generate_numba_ewm_func else: func = generate_numba_ewm_table_func ewm_func = func( **get_jit_arguments(engine_kwargs), com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=tuple(self._deltas), normalize=False, ) return self._apply(ewm_func) elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") nv.validate_window_func("sum", args, kwargs) deltas = None if self.times is None else self._deltas window_func = partial( window_aggregations.ewm, com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=deltas, normalize=False, ) return self._apply(window_func) else: raise ValueError("engine must be either 'numba' or 'cython'")
def generate_numba_agg_func( kwargs: dict[str, Any], func: Callable[..., Scalar], engine_kwargs: dict[str, bool] | None, ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]: """ Generate a numba jitted agg function specified by values from engine_kwargs. 1. jit the user's function 2. Return a groupby agg function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the groupby evaluation loop. Parameters ---------- kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) validate_udf(func) cache_key = (func, "groupby_agg") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def group_agg( values: np.ndarray, index: np.ndarray, begin: np.ndarray, end: np.ndarray, num_columns: int, *args: Any, ) -> np.ndarray: assert len(begin) == len(end) num_groups = len(begin) result = np.empty((num_groups, num_columns)) for i in numba.prange(num_groups): group_index = index[begin[i]:end[i]] for j in numba.prange(num_columns): group = values[begin[i]:end[i], j] result[i, j] = numba_func(group, group_index, *args) return result return group_agg
def generate_numba_apply_func( args: Tuple, kwargs: Dict[str, Any], func: Callable[..., Scalar], engine_kwargs: Optional[Dict[str, bool]], ): """ Generate a numba jitted apply function specified by values from engine_kwargs. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the rolling apply function. Parameters ---------- args : tuple *args to be passed into the function kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) cache_key = (func, "rolling_apply") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") if parallel: loop_range = numba.prange else: loop_range = range @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def roll_apply( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int ) -> np.ndarray: result = np.empty(len(begin)) for i in loop_range(len(result)): start = begin[i] stop = end[i] window = values[start:stop] count_nan = np.sum(np.isnan(window)) if len(window) - count_nan >= minimum_periods: result[i] = numba_func(window, *args) else: result[i] = np.nan return result return roll_apply
def _aggregate_series_pure_python( self, obj: Series, func: F, *args, engine: str = "cython", engine_kwargs=None, **kwargs, ): if engine == "numba": nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) cache_key = (func, "groupby_agg") numba_func = NUMBA_FUNC_CACHE.get( cache_key, jit_user_function(func, nopython, nogil, parallel) ) group_index, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) result = None splitter = get_splitter(obj, group_index, ngroups, axis=0) for label, group in splitter: if engine == "numba": values, index = split_for_numba(group) res = numba_func(values, index, *args) if cache_key not in NUMBA_FUNC_CACHE: NUMBA_FUNC_CACHE[cache_key] = numba_func else: res = func(group, *args, **kwargs) if result is None: if isinstance(res, (Series, Index, np.ndarray)): if len(res) == 1: # e.g. test_agg_lambda_with_timezone lambda e: e.head(1) # FIXME: are we potentially losing important res.index info? res = res.item() else: raise ValueError("Function does not reduce") result = np.empty(ngroups, dtype="O") counts[label] = group.shape[0] result[label] = res assert result is not None result = lib.maybe_convert_objects(result, try_float=0) # TODO: maybe_cast_to_extension_array? return result, counts
def generate_shared_aggregator( func: Callable[..., Scalar], engine_kwargs: dict[str, bool] | None, cache_key_str: str, ): """ Generate a Numba function that loops over the columns 2D object and applies a 1D numba kernel over each column. Parameters ---------- func : function aggregation function to be applied to each column engine_kwargs : dict dictionary of arguments to be passed into numba.jit cache_key_str: str string to access the compiled function of the form <caller_type>_<aggregation_type> e.g. rolling_mean, groupby_mean Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, None) cache_key = (func, cache_key_str) if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] if TYPE_CHECKING: import numba else: numba = import_optional_dependency("numba") @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def column_looper( values: np.ndarray, start: np.ndarray, end: np.ndarray, min_periods: int, *args, ): result = np.empty((len(start), values.shape[1]), dtype=np.float64) for i in numba.prange(values.shape[1]): result[:, i] = func(values[:, i], start, end, min_periods, *args) return result return column_looper
def mean( self, numeric_only: bool = False, *args, engine=None, engine_kwargs=None, **kwargs, ): maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs) if maybe_use_numba(engine): if self.method == "single": func = generate_numba_ewm_func else: func = generate_numba_ewm_table_func ewm_func = func( **get_jit_arguments(engine_kwargs), com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=tuple(self._deltas), normalize=True, ) return self._apply(ewm_func, name="mean") elif engine in ("cython", None): if engine_kwargs is not None: raise ValueError("cython engine does not accept engine_kwargs") nv.validate_window_func("mean", args, kwargs) deltas = None if self.times is None else self._deltas window_func = partial( window_aggregations.ewm, com=self._com, adjust=self.adjust, ignore_na=self.ignore_na, deltas=deltas, normalize=True, ) return self._apply(window_func, name="mean", numeric_only=numeric_only) else: raise ValueError("engine must be either 'numba' or 'cython'")
def generate_numba_func( func: Callable, engine_kwargs: Optional[Dict[str, bool]], kwargs: dict, cache_key_str: str, ) -> Tuple[Callable, Tuple[Callable, str]]: """ Return a JITed function and cache key for the NUMBA_FUNC_CACHE This _may_ be specific to groupby (as it's only used there currently). Parameters ---------- func : function user defined function engine_kwargs : dict or None numba.jit arguments kwargs : dict kwargs for func cache_key_str : str string representing the second part of the cache key tuple Returns ------- (JITed function, cache key) Raises ------ NumbaUtilError """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) cache_key = (func, cache_key_str) numba_func = NUMBA_FUNC_CACHE.get( cache_key, jit_user_function(func, nopython, nogil, parallel) ) return numba_func, cache_key
def generate_online_numba_ewma_func(engine_kwargs: Optional[Dict[str, bool]]): """ Generate a numba jitted groupby ewma function specified by values from engine_kwargs. Parameters ---------- engine_kwargs : dict dictionary of arguments to be passed into numba.jit Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs) cache_key = (lambda x: x, "online_ewma") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba = import_optional_dependency("numba") @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def online_ewma( values: np.ndarray, deltas: np.ndarray, minimum_periods: int, old_wt_factor: float, new_wt: float, old_wt: np.ndarray, adjust: bool, ignore_na: bool, ): """ Compute online exponentially weighted mean per column over 2D values. Takes the first observation as is, then computes the subsequent exponentially weighted mean accounting minimum periods. """ result = np.empty(values.shape) weighted_avg = values[0] nobs = (~np.isnan(weighted_avg)).astype(np.int64) result[0] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) for i in range(1, len(values)): cur = values[i] is_observations = ~np.isnan(cur) nobs += is_observations.astype(np.int64) for j in numba.prange(len(cur)): if not np.isnan(weighted_avg[j]): if is_observations[j] or not ignore_na: # note that len(deltas) = len(vals) - 1 and deltas[i] is to be # used in conjunction with vals[i+1] old_wt[j] *= old_wt_factor**deltas[j - 1] if is_observations[j]: # avoid numerical errors on constant series if weighted_avg[j] != cur[j]: weighted_avg[j] = ( (old_wt[j] * weighted_avg[j]) + (new_wt * cur[j])) / (old_wt[j] + new_wt) if adjust: old_wt[j] += new_wt else: old_wt[j] = 1.0 elif is_observations[j]: weighted_avg[j] = cur[j] result[i] = np.where(nobs >= minimum_periods, weighted_avg, np.nan) return result, old_wt return online_ewma
def generate_numba_groupby_ewma_func( engine_kwargs: Optional[Dict[str, bool]], com: float, adjust: bool, ignore_na: bool, ): """ Generate a numba jitted groupby ewma function specified by values from engine_kwargs. Parameters ---------- engine_kwargs : dict dictionary of arguments to be passed into numba.jit com : float adjust : bool ignore_na : bool Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs) cache_key = (lambda x: x, "groupby_ewma") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba = import_optional_dependency("numba") if parallel: loop_range = numba.prange else: loop_range = range @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def groupby_ewma( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, ) -> np.ndarray: result = np.empty(len(values)) alpha = 1.0 / (1.0 + com) for i in loop_range(len(begin)): start = begin[i] stop = end[i] window = values[start:stop] sub_result = np.empty(len(window)) old_wt_factor = 1.0 - alpha new_wt = 1.0 if adjust else alpha weighted_avg = window[0] nobs = int(not np.isnan(weighted_avg)) sub_result[0] = weighted_avg if nobs >= minimum_periods else np.nan old_wt = 1.0 for j in range(1, len(window)): cur = window[j] is_observation = not np.isnan(cur) nobs += is_observation if not np.isnan(weighted_avg): if is_observation or not ignore_na: old_wt *= old_wt_factor if is_observation: # avoid numerical errors on constant series if weighted_avg != cur: weighted_avg = ( (old_wt * weighted_avg) + (new_wt * cur)) / (old_wt + new_wt) if adjust: old_wt += new_wt else: old_wt = 1.0 elif is_observation: weighted_avg = cur sub_result[ j] = weighted_avg if nobs >= minimum_periods else np.nan result[start:stop] = sub_result return result return groupby_ewma
def generate_numba_transform_func( args: Tuple, kwargs: Dict[str, Any], func: Callable[..., Scalar], engine_kwargs: Optional[Dict[str, bool]], ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int], np.ndarray]: """ Generate a numba jitted transform function specified by values from engine_kwargs. 1. jit the user's function 2. Return a groupby agg function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the rolling apply function. Parameters ---------- args : tuple *args to be passed into the function kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") if parallel: loop_range = numba.prange else: loop_range = range @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def group_transform( values: np.ndarray, index: np.ndarray, begin: np.ndarray, end: np.ndarray, num_groups: int, num_columns: int, ) -> np.ndarray: result = np.empty((len(values), num_columns)) for i in loop_range(num_groups): group_index = index[begin[i] : end[i]] for j in loop_range(num_columns): group = values[begin[i] : end[i], j] result[begin[i] : end[i], j] = numba_func(group, group_index, *args) return result return group_transform
def generate_numba_ewm_func( engine_kwargs: dict[str, bool] | None, com: float, adjust: bool, ignore_na: bool, deltas: np.ndarray, normalize: bool, ): """ Generate a numba jitted ewm mean or sum function specified by values from engine_kwargs. Parameters ---------- engine_kwargs : dict dictionary of arguments to be passed into numba.jit com : float adjust : bool ignore_na : bool deltas : numpy.ndarray normalize : bool Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs) str_key = "ewm_mean" if normalize else "ewm_sum" cache_key = (lambda x: x, str_key) if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba = import_optional_dependency("numba") # error: Untyped decorator makes function "ewma" untyped @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) # type: ignore[misc] def ewm( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, ) -> np.ndarray: result = np.empty(len(values)) alpha = 1.0 / (1.0 + com) old_wt_factor = 1.0 - alpha new_wt = 1.0 if adjust else alpha for i in numba.prange(len(begin)): start = begin[i] stop = end[i] window = values[start:stop] sub_result = np.empty(len(window)) weighted = window[0] nobs = int(not np.isnan(weighted)) sub_result[0] = weighted if nobs >= minimum_periods else np.nan old_wt = 1.0 for j in range(1, len(window)): cur = window[j] is_observation = not np.isnan(cur) nobs += is_observation if not np.isnan(weighted): if is_observation or not ignore_na: if normalize: # note that len(deltas) = len(vals) - 1 and deltas[i] # is to be used in conjunction with vals[i+1] old_wt *= old_wt_factor**deltas[start + j - 1] else: weighted = old_wt_factor * weighted if is_observation: if normalize: # avoid numerical errors on constant series if weighted != cur: weighted = old_wt * weighted + new_wt * cur if normalize: weighted = weighted / (old_wt + new_wt) if adjust: old_wt += new_wt else: old_wt = 1.0 else: weighted += cur elif is_observation: weighted = cur sub_result[j] = weighted if nobs >= minimum_periods else np.nan result[start:stop] = sub_result return result return ewm
def generate_numba_ewm_table_func( engine_kwargs: dict[str, bool] | None, com: float, adjust: bool, ignore_na: bool, deltas: np.ndarray, normalize: bool, ): """ Generate a numba jitted ewm mean or sum function applied table wise specified by values from engine_kwargs. Parameters ---------- engine_kwargs : dict dictionary of arguments to be passed into numba.jit com : float adjust : bool ignore_na : bool deltas : numpy.ndarray normalize: bool Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs) str_key = "ewm_mean_table" if normalize else "ewm_sum_table" cache_key = (lambda x: x, str_key) if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba = import_optional_dependency("numba") # error: Untyped decorator makes function "ewm_table" untyped @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) # type: ignore[misc] def ewm_table( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, ) -> np.ndarray: alpha = 1.0 / (1.0 + com) old_wt_factor = 1.0 - alpha new_wt = 1.0 if adjust else alpha old_wt = np.ones(values.shape[1]) result = np.empty(values.shape) weighted = values[0].copy() nobs = (~np.isnan(weighted)).astype(np.int64) result[0] = np.where(nobs >= minimum_periods, weighted, np.nan) for i in range(1, len(values)): cur = values[i] is_observations = ~np.isnan(cur) nobs += is_observations.astype(np.int64) for j in numba.prange(len(cur)): if not np.isnan(weighted[j]): if is_observations[j] or not ignore_na: if normalize: # note that len(deltas) = len(vals) - 1 and deltas[i] # is to be used in conjunction with vals[i+1] old_wt[j] *= old_wt_factor**deltas[i - 1] else: weighted[j] = old_wt_factor * weighted[j] if is_observations[j]: if normalize: # avoid numerical errors on constant series if weighted[j] != cur[j]: weighted[j] = (old_wt[j] * weighted[j] + new_wt * cur[j]) if normalize: weighted[j] = weighted[j] / ( old_wt[j] + new_wt) if adjust: old_wt[j] += new_wt else: old_wt[j] = 1.0 else: weighted[j] += cur[j] elif is_observations[j]: weighted[j] = cur[j] result[i] = np.where(nobs >= minimum_periods, weighted, np.nan) return result return ewm_table
def generate_numba_apply_func( kwargs: dict[str, Any], func: Callable[..., Scalar], engine_kwargs: dict[str, bool] | None, name: str, ): """ Generate a numba jitted apply function specified by values from engine_kwargs. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the rolling apply function. Parameters ---------- kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit name: str name of the caller (Rolling/Expanding) Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) cache_key = (func, f"{name}_apply_single") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") # error: Untyped decorator makes function "roll_apply" untyped @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) # type: ignore[misc] def roll_apply( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, *args: Any, ) -> np.ndarray: result = np.empty(len(begin)) for i in numba.prange(len(result)): start = begin[i] stop = end[i] window = values[start:stop] count_nan = np.sum(np.isnan(window)) if len(window) - count_nan >= minimum_periods: result[i] = numba_func(window, *args) else: result[i] = np.nan return result return roll_apply
def generate_numba_table_func( kwargs: dict[str, Any], func: Callable[..., np.ndarray], engine_kwargs: dict[str, bool] | None, name: str, ): """ Generate a numba jitted function to apply window calculations table-wise. Func will be passed a M window size x N number of columns array, and must return a 1 x N number of columns array. Func is intended to operate row-wise, but the result will be transposed for axis=1. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline Parameters ---------- kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit name : str caller (Rolling/Expanding) and original method name for numba cache key Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) cache_key = (func, f"{name}_table") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") # error: Untyped decorator makes function "roll_table" untyped @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) # type: ignore[misc] def roll_table( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, *args: Any, ): result = np.empty(values.shape) min_periods_mask = np.empty(values.shape) for i in numba.prange(len(result)): start = begin[i] stop = end[i] window = values[start:stop] count_nan = np.sum(np.isnan(window), axis=0) sub_result = numba_func(window, *args) nan_mask = len(window) - count_nan >= minimum_periods min_periods_mask[i, :] = nan_mask result[i, :] = sub_result result = np.where(min_periods_mask, result, np.nan) return result return roll_table
def mean(self, *args, update=None, update_times=None, **kwargs): """ Calculate an online exponentially weighted mean. Parameters ---------- update: DataFrame or Series, default None New values to continue calculating the exponentially weighted mean from the last values and weights. Values should be float64 dtype. ``update`` needs to be ``None`` the first time the exponentially weighted mean is calculated. update_times: Series or 1-D np.ndarray, default None New times to continue calculating the exponentially weighted mean from the last values and weights. If ``None``, values are assumed to be evenly spaced in time. This feature is currently unsupported. Returns ------- DataFrame or Series Examples -------- >>> df = pd.DataFrame({"a": range(5), "b": range(5, 10)}) >>> online_ewm = df.head(2).ewm(0.5).online() >>> online_ewm.mean() a b 0 0.00 5.00 1 0.75 5.75 >>> online_ewm.mean(update=df.tail(3)) a b 2 1.615385 6.615385 3 2.550000 7.550000 4 3.520661 8.520661 >>> online_ewm.reset() >>> online_ewm.mean() a b 0 0.00 5.00 1 0.75 5.75 """ result_kwargs = {} is_frame = True if self._selected_obj.ndim == 2 else False if update_times is not None: raise NotImplementedError("update_times is not implemented.") else: update_deltas = np.ones(max( self._selected_obj.shape[self.axis - 1] - 1, 0), dtype=np.float64) if update is not None: if self._mean.last_ewm is None: raise ValueError( "Must call mean with update=None first before passing update" ) result_from = 1 result_kwargs["index"] = update.index if is_frame: last_value = self._mean.last_ewm[np.newaxis, :] result_kwargs["columns"] = update.columns else: last_value = self._mean.last_ewm result_kwargs["name"] = update.name np_array = np.concatenate((last_value, update.to_numpy())) else: result_from = 0 result_kwargs["index"] = self._selected_obj.index if is_frame: result_kwargs["columns"] = self._selected_obj.columns else: result_kwargs["name"] = self._selected_obj.name np_array = self._selected_obj.astype(np.float64).to_numpy() ewma_func = generate_online_numba_ewma_func( **get_jit_arguments(self.engine_kwargs)) result = self._mean.run_ewm( np_array if is_frame else np_array[:, np.newaxis], update_deltas, self.min_periods, ewma_func, ) if not is_frame: result = result.squeeze() result = result[result_from:] result = self._selected_obj._constructor(result, **result_kwargs) return result