def quantile_ea_compat(values: ExtensionArray, qs, interpolation: str, axis: int) -> ExtensionArray: """ ExtensionArray compatibility layer for quantile_with_mask. We pretend that an ExtensionArray with shape (N,) is actually (1, N,) for compatibility with non-EA code. Parameters ---------- values : ExtensionArray qs : a scalar or list of the quantiles to be computed interpolation: str axis : int Returns ------- ExtensionArray """ # TODO(EA2D): make-believe not needed with 2D EAs orig = values # asarray needed for Sparse, see GH#24600 mask = np.asarray(values.isna()) mask = np.atleast_2d(mask) # error: Incompatible types in assignment (expression has type "ndarray", variable # has type "ExtensionArray") values, fill_value = values._values_for_factorize( ) # type: ignore[assignment] # error: No overload variant of "atleast_2d" matches argument type "ExtensionArray" values = np.atleast_2d(values) # type: ignore[call-overload] # error: Argument 1 to "quantile_with_mask" has incompatible type "ExtensionArray"; # expected "ndarray" result = quantile_with_mask( values, mask, fill_value, qs, interpolation, axis # type: ignore[arg-type] ) if not is_sparse(orig.dtype): # shape[0] should be 1 as long as EAs are 1D if result.ndim == 1: # i.e. qs was originally a scalar assert result.shape == (1, ), result.shape result = type(orig)._from_factorized(result, orig) else: assert result.shape == (1, len(qs)), result.shape result = type(orig)._from_factorized(result[0], orig) # error: Incompatible return value type (got "ndarray", expected "ExtensionArray") return result # type: ignore[return-value]
def _ea_wrap_cython_operation( self, values: ExtensionArray, min_count: int, ngroups: int, comp_ids: np.ndarray, **kwargs, ) -> ArrayLike: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. """ # TODO: general case implementation overridable by EAs. if isinstance(values, BaseMaskedArray) and self.uses_mask(): return self._masked_ea_wrap_cython_operation( values, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, **kwargs, ) if isinstance(values, (DatetimeArray, PeriodArray, TimedeltaArray)): # All of the functions implemented here are ordinal, so we can # operate on the tz-naive equivalents npvalues = values._ndarray.view("M8[ns]") elif isinstance(values.dtype, (BooleanDtype, IntegerDtype)): # IntegerArray or BooleanArray npvalues = values.to_numpy("float64", na_value=np.nan) elif isinstance(values.dtype, FloatingDtype): # FloatingArray npvalues = values.to_numpy(values.dtype.numpy_dtype, na_value=np.nan) elif isinstance(values.dtype, StringDtype): # StringArray npvalues = values.to_numpy(object, na_value=np.nan) else: raise NotImplementedError( f"function is not implemented for this dtype: {values.dtype}" ) res_values = self._cython_op_ndim_compat( npvalues, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in ["rank"]: # i.e. how in WrappedCythonOp.cast_blocklist, since # other cast_blocklist methods dont go through cython_operation return res_values return self._reconstruct_ea_result(values, res_values)
def _quantile_ea_fallback(values: ExtensionArray, qs: np.ndarray, interpolation: str) -> ExtensionArray: """ quantile compatibility for ExtensionArray subclasses that do not implement `_from_factorized`, e.g. IntegerArray. Notes ----- We assume that all impacted cases are 1D-only. """ mask = np.atleast_2d(np.asarray(values.isna())) npvalues = np.atleast_2d(np.asarray(values)) res = _quantile_with_mask( npvalues, mask=mask, fill_value=values.dtype.na_value, qs=qs, interpolation=interpolation, ) assert res.ndim == 2 assert res.shape[0] == 1 res = res[0] out = type(values)._from_sequence(res, dtype=values.dtype) return out
def _reconstruct_ea_result( self, values: ExtensionArray, res_values: np.ndarray ) -> ExtensionArray: """ Construct an ExtensionArray result from an ndarray result. """ dtype: BaseMaskedDtype | StringDtype if isinstance(values.dtype, StringDtype): dtype = values.dtype string_array_cls = dtype.construct_array_type() return string_array_cls._from_sequence(res_values, dtype=dtype) elif isinstance(values.dtype, BaseMaskedDtype): new_dtype = self._get_result_dtype(values.dtype.numpy_dtype) dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype) masked_array_cls = dtype.construct_array_type() return masked_array_cls._from_sequence(res_values, dtype=dtype) elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)): # In to_cython_values we took a view as M8[ns] assert res_values.dtype == "M8[ns]" res_values = res_values.view(values._ndarray.dtype) return values._from_backing_data(res_values) raise NotImplementedError
def _reorder_for_extension_array_stack( arr: ExtensionArray, n_rows: int, n_columns: int ) -> ExtensionArray: """ Re-orders the values when stacking multiple extension-arrays. The indirect stacking method used for EAs requires a followup take to get the order correct. Parameters ---------- arr : ExtensionArray n_rows, n_columns : int The number of rows and columns in the original DataFrame. Returns ------- taken : ExtensionArray The original `arr` with elements re-ordered appropriately Examples -------- >>> arr = np.array(['a', 'b', 'c', 'd', 'e', 'f']) >>> _reorder_for_extension_array_stack(arr, 2, 3) array(['a', 'c', 'e', 'b', 'd', 'f'], dtype='<U1') >>> _reorder_for_extension_array_stack(arr, 3, 2) array(['a', 'd', 'b', 'e', 'c', 'f'], dtype='<U1') """ # final take to get the order correct. # idx is an indexer like # [c0r0, c1r0, c2r0, ..., # c0r1, c1r1, c2r1, ...] idx = np.arange(n_rows * n_columns).reshape(n_columns, n_rows).T.ravel() return arr.take(idx)
def nargminmax(values: ExtensionArray, method: str, axis: int = 0): """ Implementation of np.argmin/argmax but for ExtensionArray and which handles missing values. Parameters ---------- values : ExtensionArray method : {"argmax", "argmin"} axis : int, default 0 Returns ------- int """ assert method in {"argmax", "argmin"} func = np.argmax if method == "argmax" else np.argmin mask = np.asarray(isna(values)) arr_values = values._values_for_argsort() if arr_values.ndim > 1: if mask.any(): if axis == 1: zipped = zip(arr_values, mask) else: zipped = zip(arr_values.T, mask.T) return np.array([_nanargminmax(v, m, func) for v, m in zipped]) return func(arr_values, axis=axis) return _nanargminmax(arr_values, mask, func)
def _quantile_ea_fallback(values: ExtensionArray, qs: npt.NDArray[np.float64], interpolation: str) -> ExtensionArray: """ quantile compatibility for ExtensionArray subclasses that do not implement `_from_factorized`, e.g. IntegerArray. Notes ----- We assume that all impacted cases are 1D-only. """ mask = np.atleast_2d(np.asarray(values.isna())) npvalues = np.atleast_2d(np.asarray(values)) res = _quantile_with_mask( npvalues, mask=mask, fill_value=values.dtype.na_value, qs=qs, interpolation=interpolation, ) assert res.ndim == 2 assert res.shape[0] == 1 res = res[0] try: out = type(values)._from_sequence(res, dtype=values.dtype) except TypeError: # GH#42626: not able to safely cast Int64 # for floating point output out = np.atleast_2d(np.asarray(res, dtype=np.float64)) return out
def _reconstruct_ea_result( self, values: ExtensionArray, res_values: np.ndarray ) -> ExtensionArray: """ Construct an ExtensionArray result from an ndarray result. """ dtype: BaseMaskedDtype | StringDtype if isinstance(values.dtype, StringDtype): dtype = values.dtype cls = dtype.construct_array_type() return cls._from_sequence(res_values, dtype=dtype) elif isinstance(values.dtype, BaseMaskedDtype): new_dtype = self._get_result_dtype(values.dtype.numpy_dtype) dtype = BaseMaskedDtype.from_numpy_dtype(new_dtype) # error: Incompatible types in assignment (expression has type # "Type[BaseMaskedArray]", variable has type "Type[BaseStringArray]") cls = dtype.construct_array_type() # type: ignore[assignment] return cls._from_sequence(res_values, dtype=dtype) elif isinstance(values, (DatetimeArray, TimedeltaArray, PeriodArray)): # In to_cython_values we took a view as M8[ns] assert res_values.dtype == "M8[ns]" res_values = res_values.view(values._ndarray.dtype) return values._from_backing_data(res_values) raise NotImplementedError
def shift(self, periods=1, fill_value=None): """ Shift values by desired number. Newly introduced missing values are filled with ``self.dtype.na_value``. .. versionadded:: 0.24.0 Parameters ---------- periods : int, default 1 The number of periods to shift. Negative values are allowed for shifting backwards. fill_value : optional, default NaT .. versionadded:: 0.24.0 Returns ------- shifted : PeriodArray """ # TODO(DatetimeArray): remove # The semantics for Index.shift differ from EA.shift # then just call super. return ExtensionArray.shift(self, periods, fill_value=fill_value)
def quantile_ea_compat(values: ExtensionArray, qs, interpolation: str, axis: int) -> ExtensionArray: """ ExtensionArray compatibility layer for quantile_with_mask. We pretend that an ExtensionArray with shape (N,) is actually (1, N,) for compatibility with non-EA code. Parameters ---------- values : ExtensionArray qs : a scalar or list of the quantiles to be computed interpolation: str axis : int Returns ------- ExtensionArray """ # TODO(EA2D): make-believe not needed with 2D EAs orig = values # asarray needed for Sparse, see GH#24600 mask = np.asarray(values.isna()) mask = np.atleast_2d(mask) values, fill_value = values._values_for_factorize() values = np.atleast_2d(values) result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis) if not is_sparse(orig.dtype): # shape[0] should be 1 as long as EAs are 1D if result.ndim == 1: # i.e. qs was originally a scalar assert result.shape == (1, ), result.shape result = type(orig)._from_factorized(result, orig) else: assert result.shape == (1, len(qs)), result.shape result = type(orig)._from_factorized(result[0], orig) return result
def _ea_wrap_cython_operation( self, values: ExtensionArray, min_count: int, ngroups: int, comp_ids: np.ndarray, **kwargs, ) -> ArrayLike: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. """ if isinstance(values, BaseMaskedArray) and self.uses_mask(): return self._masked_ea_wrap_cython_operation( values, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, **kwargs, ) elif isinstance(values, Categorical) and self.uses_mask(): assert self.how == "rank" # the only one implemented ATM assert values.ordered # checked earlier mask = values.isna() npvalues = values._ndarray res_values = self._cython_op_ndim_compat( npvalues, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=mask, **kwargs, ) # If we ever have more than just "rank" here, we'll need to do # `if self.how in self.cast_blocklist` like we do for other dtypes. return res_values npvalues = self._ea_to_cython_values(values) res_values = self._cython_op_ndim_compat( npvalues, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in self.cast_blocklist: # i.e. how in ["rank"], since other cast_blocklist methods dont go # through cython_operation return res_values return self._reconstruct_ea_result(values, res_values)
def _quantile_ea_compat( values: ExtensionArray, qs: np.ndarray, interpolation: str ) -> ExtensionArray: """ ExtensionArray compatibility layer for _quantile_with_mask. We pretend that an ExtensionArray with shape (N,) is actually (1, N,) for compatibility with non-EA code. Parameters ---------- values : ExtensionArray qs : np.ndarray[float64] interpolation: str Returns ------- ExtensionArray """ # TODO(EA2D): make-believe not needed with 2D EAs orig = values # asarray needed for Sparse, see GH#24600 mask = np.asarray(values.isna()) mask = np.atleast_2d(mask) arr, fill_value = values._values_for_factorize() arr = np.atleast_2d(arr) result = _quantile_with_mask(arr, mask, fill_value, qs, interpolation) if not is_sparse(orig.dtype): # shape[0] should be 1 as long as EAs are 1D if orig.ndim == 2: # i.e. DatetimeArray result = type(orig)._from_factorized(result, orig) else: assert result.shape == (1, len(qs)), result.shape result = type(orig)._from_factorized(result[0], orig) # error: Incompatible return value type (got "ndarray", expected "ExtensionArray") return result # type: ignore[return-value]
def _ea_to_cython_values(self, values: ExtensionArray) -> np.ndarray: # GH#43682 if isinstance(values, (DatetimeArray, PeriodArray, TimedeltaArray)): # All of the functions implemented here are ordinal, so we can # operate on the tz-naive equivalents npvalues = values._ndarray.view("M8[ns]") elif isinstance(values.dtype, (BooleanDtype, IntegerDtype)): # IntegerArray or BooleanArray npvalues = values.to_numpy("float64", na_value=np.nan) elif isinstance(values.dtype, FloatingDtype): # FloatingArray npvalues = values.to_numpy(values.dtype.numpy_dtype, na_value=np.nan) elif isinstance(values.dtype, StringDtype): # StringArray npvalues = values.to_numpy(object, na_value=np.nan) else: raise NotImplementedError( f"function is not implemented for this dtype: {values.dtype}" ) return npvalues
def shift(self, periods=1): """ Shift values by desired number. Newly introduced missing values are filled with ``self.dtype.na_value``. .. versionadded:: 0.24.0 Parameters ---------- periods : int, default 1 The number of periods to shift. Negative values are allowed for shifting backwards. Returns ------- shifted : PeriodArray """ # TODO(DatetimeArray): remove # The semantics for Index.shift differ from EA.shift # then just call super. return ExtensionArray.shift(self, periods)
def _ea_wrap_cython_operation( self, values: ExtensionArray, min_count: int, ngroups: int, comp_ids: np.ndarray, **kwargs, ) -> ArrayLike: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. """ # TODO: general case implementation overridable by EAs. if isinstance(values, BaseMaskedArray) and self.uses_mask(): return self._masked_ea_wrap_cython_operation( values, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, **kwargs, ) orig_values = values if isinstance(orig_values, (DatetimeArray, PeriodArray)): # All of the functions implemented here are ordinal, so we can # operate on the tz-naive equivalents npvalues = orig_values._ndarray.view("M8[ns]") res_values = self._cython_op_ndim_compat( npvalues, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in ["rank"]: # i.e. how in WrappedCythonOp.cast_blocklist, since # other cast_blocklist methods dont go through cython_operation # preserve float64 dtype return res_values res_values = res_values.view("i8") result = type(orig_values)(res_values, dtype=orig_values.dtype) return result elif isinstance(orig_values, TimedeltaArray): # We have an ExtensionArray but not ExtensionDtype res_values = self._cython_op_ndim_compat( orig_values._ndarray, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in ["rank"]: # i.e. how in WrappedCythonOp.cast_blocklist, since # other cast_blocklist methods dont go through cython_operation # preserve float64 dtype return res_values # otherwise res_values has the same dtype as original values return type(orig_values)(res_values) elif isinstance(values.dtype, (BooleanDtype, _IntegerDtype)): # IntegerArray or BooleanArray npvalues = values.to_numpy("float64", na_value=np.nan) res_values = self._cython_op_ndim_compat( npvalues, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in ["rank"]: # i.e. how in WrappedCythonOp.cast_blocklist, since # other cast_blocklist methods dont go through cython_operation return res_values dtype = self._get_result_dtype(orig_values.dtype) cls = dtype.construct_array_type() return cls._from_sequence(res_values, dtype=dtype) elif isinstance(values.dtype, FloatingDtype): # FloatingArray npvalues = values.to_numpy( values.dtype.numpy_dtype, na_value=np.nan, ) res_values = self._cython_op_ndim_compat( npvalues, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in ["rank"]: # i.e. how in WrappedCythonOp.cast_blocklist, since # other cast_blocklist methods dont go through cython_operation return res_values dtype = self._get_result_dtype(orig_values.dtype) cls = dtype.construct_array_type() return cls._from_sequence(res_values, dtype=dtype) raise NotImplementedError( f"function is not implemented for this dtype: {values.dtype}")
def _ea_wrap_cython_operation( self, values: ExtensionArray, min_count: int, ngroups: int, comp_ids: np.ndarray, **kwargs, ) -> ArrayLike: """ If we have an ExtensionArray, unwrap, call _cython_operation, and re-wrap if appropriate. """ # TODO: general case implementation overridable by EAs. orig_values = values if is_datetime64tz_dtype(values.dtype) or is_period_dtype( values.dtype): # All of the functions implemented here are ordinal, so we can # operate on the tz-naive equivalents npvalues = values.view("M8[ns]") res_values = self._cython_op_ndim_compat( # error: Argument 1 to "_cython_op_ndim_compat" of # "WrappedCythonOp" has incompatible type # "Union[ExtensionArray, ndarray]"; expected "ndarray" npvalues, # type: ignore[arg-type] min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in ["rank"]: # i.e. how in WrappedCythonOp.cast_blocklist, since # other cast_blocklist methods dont go through cython_operation # preserve float64 dtype return res_values res_values = res_values.astype("i8", copy=False) # error: Too many arguments for "ExtensionArray" result = type(orig_values)( # type: ignore[call-arg] res_values, dtype=orig_values.dtype) return result elif is_integer_dtype(values.dtype) or is_bool_dtype(values.dtype): # IntegerArray or BooleanArray npvalues = values.to_numpy("float64", na_value=np.nan) res_values = self._cython_op_ndim_compat( npvalues, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in ["rank"]: # i.e. how in WrappedCythonOp.cast_blocklist, since # other cast_blocklist methods dont go through cython_operation return res_values dtype = self.get_result_dtype(orig_values.dtype) # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" # has no attribute "construct_array_type" cls = dtype.construct_array_type() # type: ignore[union-attr] return cls._from_sequence(res_values, dtype=dtype) elif is_float_dtype(values.dtype): # FloatingArray # error: "ExtensionDtype" has no attribute "numpy_dtype" npvalues = values.to_numpy( values.dtype.numpy_dtype, # type: ignore[attr-defined] na_value=np.nan, ) res_values = self._cython_op_ndim_compat( npvalues, min_count=min_count, ngroups=ngroups, comp_ids=comp_ids, mask=None, **kwargs, ) if self.how in ["rank"]: # i.e. how in WrappedCythonOp.cast_blocklist, since # other cast_blocklist methods dont go through cython_operation return res_values dtype = self.get_result_dtype(orig_values.dtype) # error: Item "dtype[Any]" of "Union[dtype[Any], ExtensionDtype]" # has no attribute "construct_array_type" cls = dtype.construct_array_type() # type: ignore[union-attr] return cls._from_sequence(res_values, dtype=dtype) raise NotImplementedError( f"function is not implemented for this dtype: {values.dtype}")
def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: # the super() method NDArrayBackedExtensionArray._putmask uses # np.putmask which doesn't properly handle None/pd.NA, so using the # base class implementation that uses __setitem__ ExtensionArray._putmask(self, mask, value)