def factorize( self, na_sentinel: int | lib.NoDefault = lib.no_default, use_na_sentinel: bool | lib.NoDefault = lib.no_default, ) -> tuple[np.ndarray, ExtensionArray]: resolved_na_sentinel = algos.resolve_na_sentinel( na_sentinel, use_na_sentinel) if resolved_na_sentinel is None: raise NotImplementedError( "Encoding NaN values is not yet implemented") else: na_sentinel = resolved_na_sentinel arr = self._data mask = self._mask codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask) # check that factorize_array correctly preserves dtype. assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype) uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool)) return codes, uniques_ea
def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: arr = self._data mask = self._mask codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask) # the hashtables don't handle all different types of bits uniques = uniques.astype(self.dtype.numpy_dtype, copy=False) uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool)) return codes, uniques_ea
def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: arr = self._data mask = self._mask codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask) # check that factorize_array correctly preserves dtype. assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype) uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool)) return codes, uniques_ea
def factorize(self, na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]: """ Encode the extension array as an enumerated type. Parameters ---------- na_sentinel : int, default -1 Value to use in the `codes` array to indicate missing values. Returns ------- codes : ndarray An integer NumPy array that's an indexer into the original ExtensionArray. uniques : ExtensionArray An ExtensionArray containing the unique values of `self`. .. note:: uniques will *not* contain an entry for the NA value of the ExtensionArray if there are any missing values present in `self`. See Also -------- factorize : Top-level factorize method that dispatches here. Notes ----- :meth:`pandas.factorize` offers a `sort` keyword as well. """ # Implementer note: There are two ways to override the behavior of # pandas.factorize # 1. _values_for_factorize and _from_factorize. # Specify the values passed to pandas' internal factorization # routines, and how to convert from those values back to the # original ExtensionArray. # 2. ExtensionArray.factorize. # Complete control over factorization. arr, na_value = self._values_for_factorize() codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, na_value=na_value) uniques = self._from_factorized(uniques, self) # error: Incompatible return value type (got "Tuple[ndarray, ndarray]", # expected "Tuple[ndarray, ExtensionArray]") return codes, uniques # type: ignore[return-value]
def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]: arr = self._data mask = self._mask codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask) # the hashtables don't handle all different types of bits uniques = uniques.astype(self.dtype.numpy_dtype, copy=False) # error: Incompatible types in assignment (expression has type # "BaseMaskedArray", variable has type "ndarray") uniques = type(self)( # type: ignore[assignment] uniques, np.zeros(len(uniques), dtype=bool) ) # error: Incompatible return value type (got "Tuple[ndarray, ndarray]", # expected "Tuple[ndarray, ExtensionArray]") return codes, uniques # type: ignore[return-value]