def _aggregate_series_pure_python(self, obj: Series, func: F): group_index, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) result = np.empty(ngroups, dtype="O") initialized = False splitter = get_splitter(obj, group_index, ngroups, axis=0) for label, group in splitter: # Each step of this loop corresponds to # libreduction._BaseGrouper._apply_to_group res = func(group) res = libreduction.extract_result(res) if not initialized: # We only do this validation on the first iteration libreduction.check_result_array(res, 0) initialized = True counts[label] = group.shape[0] result[label] = res out = lib.maybe_convert_objects(result, try_float=False) out = maybe_cast_result(out, obj, numeric_only=True) return out, counts
def _aggregate_series_pure_python(self, obj: Series, func: F): group_index, _, ngroups = self.group_info counts = np.zeros(ngroups, dtype=int) result = np.empty(ngroups, dtype="O") initialized = False splitter = get_splitter(obj, group_index, ngroups, axis=0) for label, group in splitter: # Each step of this loop corresponds to # libreduction._BaseGrouper._apply_to_group res = func(group) res = libreduction.extract_result(res) if not initialized: # We only do this validation on the first iteration libreduction.check_result_array(res, 0) initialized = True counts[label] = group.shape[0] result[label] = res result = lib.maybe_convert_objects(result, try_float=False) # error: Incompatible types in assignment (expression has type # "Union[ExtensionArray, ndarray]", variable has type "ndarray") result = maybe_cast_result( # type: ignore[assignment] result, obj, numeric_only=True) return result, counts
def _ea_wrap_cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs) -> Tuple[np.ndarray, Optional[List[str]]]: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. """ # TODO: general case implementation overrideable by EAs. orig_values = values if is_datetime64tz_dtype(values.dtype) or is_period_dtype( values.dtype): # All of the functions implemented here are ordinal, so we can # operate on the tz-naive equivalents values = values.view("M8[ns]") res_values = self._cython_operation(kind, values, how, axis, min_count, **kwargs) if how in ["rank"]: # preserve float64 dtype return res_values res_values = res_values.astype("i8", copy=False) result = type(orig_values)._simple_new(res_values, dtype=orig_values.dtype) return result elif is_integer_dtype(values.dtype) or is_bool_dtype(values.dtype): # IntegerArray or BooleanArray values = ensure_int_or_float(values) res_values = self._cython_operation(kind, values, how, axis, min_count, **kwargs) result = maybe_cast_result(result=res_values, obj=orig_values, how=how) return result raise NotImplementedError(values.dtype)
def _cython_operation(self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs) -> Tuple[np.ndarray, Optional[List[str]]]: """ Returns the values of a cython operation as a Tuple of [data, names]. Names is only useful when dealing with 2D results, like ohlc (see self._name_functions). """ assert kind in ["transform", "aggregate"] orig_values = values if values.ndim > 2: raise NotImplementedError( "number of dimensions is currently limited to 2") elif values.ndim == 2: # Note: it is *not* the case that axis is always 0 for 1-dim values, # as we can have 1D ExtensionArrays that we need to treat as 2D assert axis == 1, axis # can we do this operation with our cython functions # if not raise NotImplementedError # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes # categoricals are only 1d, so we # are not setup for dim transforming if is_categorical_dtype(values.dtype) or is_sparse(values.dtype): raise NotImplementedError(f"{values.dtype} dtype not supported") elif is_datetime64_any_dtype(values.dtype): if how in ["add", "prod", "cumsum", "cumprod"]: raise NotImplementedError( f"datetime64 type does not support {how} operations") elif is_timedelta64_dtype(values.dtype): if how in ["prod", "cumprod"]: raise NotImplementedError( f"timedelta64 type does not support {how} operations") if is_datetime64tz_dtype(values.dtype): # Cast to naive; we'll cast back at the end of the function # TODO: possible need to reshape? # TODO(EA2D):kludge can be avoided when 2D EA is allowed. values = values.view("M8[ns]") is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view("int64") is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_float64(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int_or_float(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(values) else: values = values.astype(object) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True assert axis == 1, axis values = values.T if arity > 1: raise NotImplementedError( "arity of more than 1 is not supported for the 'how' argument" ) out_shape = (self.ngroups, ) + values.shape[1:] func, values = self._get_cython_func_and_vals(kind, how, values, is_numeric) if how == "rank": out_dtype = "float" else: if is_numeric: out_dtype = f"{values.dtype.kind}{values.dtype.itemsize}" else: out_dtype = "object" codes, _, _ = self.group_info if kind == "aggregate": result = _maybe_fill(np.empty(out_shape, dtype=out_dtype), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate(result, counts, values, codes, func, min_count) elif kind == "transform": result = _maybe_fill(np.empty_like(values, dtype=out_dtype), fill_value=np.nan) # TODO: min_count result = self._transform(result, values, codes, func, is_datetimelike, **kwargs) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype("float64") result[mask] = np.nan if kind == "aggregate" and self._filter_empty_groups and not counts.all( ): assert result.ndim != 2 result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] names: Optional[List[str]] = self._name_functions.get(how, None) if swapped: result = result.swapaxes(0, axis) if is_datetime64tz_dtype(orig_values.dtype) or is_period_dtype( orig_values.dtype): # We need to use the constructors directly for these dtypes # since numpy won't recognize them # https://github.com/pandas-dev/pandas/issues/31471 result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) elif is_datetimelike and kind == "aggregate": result = result.astype(orig_values.dtype) if is_extension_array_dtype(orig_values.dtype): result = maybe_cast_result(result=result, obj=orig_values, how=how) return result, names