def _cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs ) -> Tuple[np.ndarray, Optional[List[str]]]: """ Returns the values of a cython operation as a Tuple of [data, names]. Names is only useful when dealing with 2D results, like ohlc (see self._name_functions). """ assert kind in ["transform", "aggregate"] orig_values = values if values.ndim > 2: raise NotImplementedError("number of dimensions is currently limited to 2") elif values.ndim == 2: # Note: it is *not* the case that axis is always 0 for 1-dim values, # as we can have 1D ExtensionArrays that we need to treat as 2D assert axis == 1, axis # can we do this operation with our cython functions # if not raise NotImplementedError # we raise NotImplemented if this is an invalid operation # entirely, e.g. adding datetimes # categoricals are only 1d, so we # are not setup for dim transforming if is_categorical_dtype(values.dtype) or is_sparse(values.dtype): raise NotImplementedError(f"{values.dtype} dtype not supported") elif is_datetime64_any_dtype(values.dtype): if how in ["add", "prod", "cumsum", "cumprod"]: raise NotImplementedError( f"datetime64 type does not support {how} operations" ) elif is_timedelta64_dtype(values.dtype): if how in ["prod", "cumprod"]: raise NotImplementedError( f"timedelta64 type does not support {how} operations" ) if is_datetime64tz_dtype(values.dtype): # Cast to naive; we'll cast back at the end of the function # TODO: possible need to reshape? # TODO(EA2D):kludge can be avoided when 2D EA is allowed. values = values.view("M8[ns]") is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view("int64") is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_int_or_float(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int_or_float(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(ensure_float(values)) else: values = values.astype(object) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True assert axis == 1, axis values = values.T if arity > 1: raise NotImplementedError( "arity of more than 1 is not supported for the 'how' argument" ) out_shape = (self.ngroups,) + values.shape[1:] func, values = self._get_cython_func_and_vals(kind, how, values, is_numeric) if how == "rank": out_dtype = "float" else: if is_numeric: out_dtype = f"{values.dtype.kind}{values.dtype.itemsize}" else: out_dtype = "object" codes, _, _ = self.group_info if kind == "aggregate": result = maybe_fill(np.empty(out_shape, dtype=out_dtype), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate(result, counts, values, codes, func, min_count) elif kind == "transform": result = maybe_fill( np.empty_like(values, dtype=out_dtype), fill_value=np.nan ) # TODO: min_count result = self._transform( result, values, codes, func, is_datetimelike, **kwargs ) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype("float64") result[mask] = np.nan if kind == "aggregate" and self._filter_empty_groups and not counts.all(): assert result.ndim != 2 result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] names: Optional[List[str]] = self._name_functions.get(how, None) if swapped: result = result.swapaxes(0, axis) if is_datetime64tz_dtype(orig_values.dtype) or is_period_dtype( orig_values.dtype ): # We need to use the constructors directly for these dtypes # since numpy won't recognize them # https://github.com/pandas-dev/pandas/issues/31471 result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype) elif is_datetimelike and kind == "aggregate": result = result.astype(orig_values.dtype) if is_extension_array_dtype(orig_values.dtype): result = maybe_cast_result(result=result, obj=orig_values, how=how) return result, names
def _cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs ) -> np.ndarray: """ Returns the values of a cython operation. """ orig_values = values assert kind in ["transform", "aggregate"] if values.ndim > 2: raise NotImplementedError("number of dimensions is currently limited to 2") elif values.ndim == 2: # Note: it is *not* the case that axis is always 0 for 1-dim values, # as we can have 1D ExtensionArrays that we need to treat as 2D assert axis == 1, axis # can we do this operation with our cython functions # if not raise NotImplementedError self._disallow_invalid_ops(values, how) if is_extension_array_dtype(values.dtype): return self._ea_wrap_cython_operation( kind, values, how, axis, min_count, **kwargs ) is_datetimelike = needs_i8_conversion(values.dtype) is_numeric = is_numeric_dtype(values.dtype) if is_datetimelike: values = values.view("int64") is_numeric = True elif is_bool_dtype(values.dtype): values = ensure_int_or_float(values) elif is_integer_dtype(values): # we use iNaT for the missing value on ints # so pre-convert to guard this condition if (values == iNaT).any(): values = ensure_float64(values) else: values = ensure_int_or_float(values) elif is_numeric and not is_complex_dtype(values): values = ensure_float64(ensure_float(values)) else: values = values.astype(object) arity = self._cython_arity.get(how, 1) vdim = values.ndim swapped = False if vdim == 1: values = values[:, None] out_shape = (self.ngroups, arity) else: if axis > 0: swapped = True assert axis == 1, axis values = values.T if arity > 1: raise NotImplementedError( "arity of more than 1 is not supported for the 'how' argument" ) out_shape = (self.ngroups,) + values.shape[1:] func, values = self._get_cython_func_and_vals(kind, how, values, is_numeric) if how == "rank": out_dtype = "float" else: if is_numeric: out_dtype = f"{values.dtype.kind}{values.dtype.itemsize}" else: out_dtype = "object" codes, _, _ = self.group_info if kind == "aggregate": result = maybe_fill(np.empty(out_shape, dtype=out_dtype), fill_value=np.nan) counts = np.zeros(self.ngroups, dtype=np.int64) result = self._aggregate(result, counts, values, codes, func, min_count) elif kind == "transform": result = maybe_fill( np.empty_like(values, dtype=out_dtype), fill_value=np.nan ) # TODO: min_count result = self._transform( result, values, codes, func, is_datetimelike, **kwargs ) if is_integer_dtype(result) and not is_datetimelike: mask = result == iNaT if mask.any(): result = result.astype("float64") result[mask] = np.nan if kind == "aggregate" and self._filter_empty_groups and not counts.all(): assert result.ndim != 2 result = result[counts > 0] if vdim == 1 and arity == 1: result = result[:, 0] if swapped: result = result.swapaxes(0, axis) if how not in base.cython_cast_blocklist: # e.g. if we are int64 and need to restore to datetime64/timedelta64 # "rank" is the only member of cython_cast_blocklist we get here dtype = maybe_cast_result_dtype(orig_values.dtype, how) result = maybe_downcast_to_dtype(result, dtype) return result