def generate_numba_agg_func( kwargs: dict[str, Any], func: Callable[..., Scalar], engine_kwargs: dict[str, bool] | None, ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]: """ Generate a numba jitted agg function specified by values from engine_kwargs. 1. jit the user's function 2. Return a groupby agg function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the groupby evaluation loop. Parameters ---------- kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) validate_udf(func) cache_key = (func, "groupby_agg") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def group_agg( values: np.ndarray, index: np.ndarray, begin: np.ndarray, end: np.ndarray, num_columns: int, *args: Any, ) -> np.ndarray: assert len(begin) == len(end) num_groups = len(begin) result = np.empty((num_groups, num_columns)) for i in numba.prange(num_groups): group_index = index[begin[i]:end[i]] for j in numba.prange(num_columns): group = values[begin[i]:end[i], j] result[i, j] = numba_func(group, group_index, *args) return result return group_agg
def generate_numba_table_func( func: Callable[..., np.ndarray], nopython: bool, nogil: bool, parallel: bool, ): """ Generate a numba jitted function to apply window calculations table-wise. Func will be passed a M window size x N number of columns array, and must return a 1 x N number of columns array. Func is intended to operate row-wise, but the result will be transposed for axis=1. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline Parameters ---------- func : function function to be applied to each window and will be JITed nopython : bool nopython to be passed into numba.jit nogil : bool nogil to be passed into numba.jit parallel : bool parallel to be passed into numba.jit Returns ------- Numba function """ numba_func = jit_user_function(func, nopython, nogil, parallel) if TYPE_CHECKING: import numba else: numba = import_optional_dependency("numba") @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def roll_table( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, *args: Any, ): result = np.empty((len(begin), values.shape[1])) min_periods_mask = np.empty(result.shape) for i in numba.prange(len(result)): start = begin[i] stop = end[i] window = values[start:stop] count_nan = np.sum(np.isnan(window), axis=0) sub_result = numba_func(window, *args) nan_mask = len(window) - count_nan >= minimum_periods min_periods_mask[i, :] = nan_mask result[i, :] = sub_result result = np.where(min_periods_mask, result, np.nan) return result return roll_table
def generate_numba_transform_func( func: Callable[..., np.ndarray], nopython: bool, nogil: bool, parallel: bool, ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, Any], np.ndarray]: """ Generate a numba jitted transform function specified by values from engine_kwargs. 1. jit the user's function 2. Return a groupby transform function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the groupby evaluation loop. Parameters ---------- func : function function to be applied to each window and will be JITed nopython : bool nopython to be passed into numba.jit nogil : bool nogil to be passed into numba.jit parallel : bool parallel to be passed into numba.jit Returns ------- Numba function """ numba_func = jit_user_function(func, nopython, nogil, parallel) if TYPE_CHECKING: import numba else: numba = import_optional_dependency("numba") @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def group_transform( values: np.ndarray, index: np.ndarray, begin: np.ndarray, end: np.ndarray, num_columns: int, *args: Any, ) -> np.ndarray: assert len(begin) == len(end) num_groups = len(begin) result = np.empty((len(values), num_columns)) for i in numba.prange(num_groups): group_index = index[begin[i]:end[i]] for j in numba.prange(num_columns): group = values[begin[i]:end[i], j] result[begin[i]:end[i], j] = numba_func(group, group_index, *args) return result return group_transform
def generate_numba_apply_func( args: Tuple, kwargs: Dict[str, Any], func: Callable[..., Scalar], engine_kwargs: Optional[Dict[str, bool]], ): """ Generate a numba jitted apply function specified by values from engine_kwargs. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the rolling apply function. Parameters ---------- args : tuple *args to be passed into the function kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) cache_key = (func, "rolling_apply") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") if parallel: loop_range = numba.prange else: loop_range = range @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def roll_apply( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int ) -> np.ndarray: result = np.empty(len(begin)) for i in loop_range(len(result)): start = begin[i] stop = end[i] window = values[start:stop] count_nan = np.sum(np.isnan(window)) if len(window) - count_nan >= minimum_periods: result[i] = numba_func(window, *args) else: result[i] = np.nan return result return roll_apply
def generate_numba_apply_func( func: Callable[..., Scalar], nopython: bool, nogil: bool, parallel: bool, ): """ Generate a numba jitted apply function specified by values from engine_kwargs. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the rolling apply function. Parameters ---------- func : function function to be applied to each window and will be JITed nopython : bool nopython to be passed into numba.jit nogil : bool nogil to be passed into numba.jit parallel : bool parallel to be passed into numba.jit Returns ------- Numba function """ numba_func = jit_user_function(func, nopython, nogil, parallel) if TYPE_CHECKING: import numba else: numba = import_optional_dependency("numba") @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def roll_apply( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, *args: Any, ) -> np.ndarray: result = np.empty(len(begin)) for i in numba.prange(len(result)): start = begin[i] stop = end[i] window = values[start:stop] count_nan = np.sum(np.isnan(window)) if len(window) - count_nan >= minimum_periods: result[i] = numba_func(window, *args) else: result[i] = np.nan return result return roll_apply
def _aggregate_series_pure_python( self, obj: Series, func: F, *args, engine: str = "cython", engine_kwargs=None, **kwargs, ): if engine == "numba": nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) cache_key = (func, "groupby_agg") numba_func = NUMBA_FUNC_CACHE.get( cache_key, jit_user_function(func, nopython, nogil, parallel) ) group_index, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) result = None splitter = get_splitter(obj, group_index, ngroups, axis=0) for label, group in splitter: if engine == "numba": values, index = split_for_numba(group) res = numba_func(values, index, *args) if cache_key not in NUMBA_FUNC_CACHE: NUMBA_FUNC_CACHE[cache_key] = numba_func else: res = func(group, *args, **kwargs) if result is None: if isinstance(res, (Series, Index, np.ndarray)): if len(res) == 1: # e.g. test_agg_lambda_with_timezone lambda e: e.head(1) # FIXME: are we potentially losing important res.index info? res = res.item() else: raise ValueError("Function does not reduce") result = np.empty(ngroups, dtype="O") counts[label] = group.shape[0] result[label] = res assert result is not None result = lib.maybe_convert_objects(result, try_float=0) # TODO: maybe_cast_to_extension_array? return result, counts
def generate_numba_func( func: Callable, engine_kwargs: Optional[Dict[str, bool]], kwargs: dict, cache_key_str: str, ) -> Tuple[Callable, Tuple[Callable, str]]: """ Return a JITed function and cache key for the NUMBA_FUNC_CACHE This _may_ be specific to groupby (as it's only used there currently). Parameters ---------- func : function user defined function engine_kwargs : dict or None numba.jit arguments kwargs : dict kwargs for func cache_key_str : str string representing the second part of the cache key tuple Returns ------- (JITed function, cache key) Raises ------ NumbaUtilError """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) cache_key = (func, cache_key_str) numba_func = NUMBA_FUNC_CACHE.get( cache_key, jit_user_function(func, nopython, nogil, parallel) ) return numba_func, cache_key
def generate_numba_transform_func( args: Tuple, kwargs: Dict[str, Any], func: Callable[..., Scalar], engine_kwargs: Optional[Dict[str, bool]], ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, np.ndarray, int, int], np.ndarray]: """ Generate a numba jitted transform function specified by values from engine_kwargs. 1. jit the user's function 2. Return a groupby agg function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the rolling apply function. Parameters ---------- args : tuple *args to be passed into the function kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs) check_kwargs_and_nopython(kwargs, nopython) validate_udf(func) numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") if parallel: loop_range = numba.prange else: loop_range = range @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) def group_transform( values: np.ndarray, index: np.ndarray, begin: np.ndarray, end: np.ndarray, num_groups: int, num_columns: int, ) -> np.ndarray: result = np.empty((len(values), num_columns)) for i in loop_range(num_groups): group_index = index[begin[i] : end[i]] for j in loop_range(num_columns): group = values[begin[i] : end[i], j] result[begin[i] : end[i], j] = numba_func(group, group_index, *args) return result return group_transform
def generate_numba_apply_func( kwargs: dict[str, Any], func: Callable[..., Scalar], engine_kwargs: dict[str, bool] | None, name: str, ): """ Generate a numba jitted apply function specified by values from engine_kwargs. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline Configurations specified in engine_kwargs apply to both the user's function _AND_ the rolling apply function. Parameters ---------- kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit name: str name of the caller (Rolling/Expanding) Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) cache_key = (func, f"{name}_apply_single") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") # error: Untyped decorator makes function "roll_apply" untyped @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) # type: ignore[misc] def roll_apply( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, *args: Any, ) -> np.ndarray: result = np.empty(len(begin)) for i in numba.prange(len(result)): start = begin[i] stop = end[i] window = values[start:stop] count_nan = np.sum(np.isnan(window)) if len(window) - count_nan >= minimum_periods: result[i] = numba_func(window, *args) else: result[i] = np.nan return result return roll_apply
def generate_numba_table_func( kwargs: dict[str, Any], func: Callable[..., np.ndarray], engine_kwargs: dict[str, bool] | None, name: str, ): """ Generate a numba jitted function to apply window calculations table-wise. Func will be passed a M window size x N number of columns array, and must return a 1 x N number of columns array. Func is intended to operate row-wise, but the result will be transposed for axis=1. 1. jit the user's function 2. Return a rolling apply function with the jitted function inline Parameters ---------- kwargs : dict **kwargs to be passed into the function func : function function to be applied to each window and will be JITed engine_kwargs : dict dictionary of arguments to be passed into numba.jit name : str caller (Rolling/Expanding) and original method name for numba cache key Returns ------- Numba function """ nopython, nogil, parallel = get_jit_arguments(engine_kwargs, kwargs) cache_key = (func, f"{name}_table") if cache_key in NUMBA_FUNC_CACHE: return NUMBA_FUNC_CACHE[cache_key] numba_func = jit_user_function(func, nopython, nogil, parallel) numba = import_optional_dependency("numba") # error: Untyped decorator makes function "roll_table" untyped @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel) # type: ignore[misc] def roll_table( values: np.ndarray, begin: np.ndarray, end: np.ndarray, minimum_periods: int, *args: Any, ): result = np.empty(values.shape) min_periods_mask = np.empty(values.shape) for i in numba.prange(len(result)): start = begin[i] stop = end[i] window = values[start:stop] count_nan = np.sum(np.isnan(window), axis=0) sub_result = numba_func(window, *args) nan_mask = len(window) - count_nan >= minimum_periods min_periods_mask[i, :] = nan_mask result[i, :] = sub_result result = np.where(min_periods_mask, result, np.nan) return result return roll_table