def factorize( self, sort: bool = False, na_sentinel: int | lib.NoDefault = lib.no_default, use_na_sentinel: bool | lib.NoDefault = lib.no_default, ) -> tuple[npt.NDArray[np.intp], RangeIndex]: # resolve to emit warning if appropriate resolve_na_sentinel(na_sentinel, use_na_sentinel) codes = np.arange(len(self), dtype=np.intp) uniques = self if sort and self.step < 0: codes = codes[::-1] uniques = uniques[::-1] return codes, uniques
def factorize( self, na_sentinel: int | lib.NoDefault = lib.no_default, use_na_sentinel: bool | lib.NoDefault = lib.no_default, ) -> tuple[np.ndarray, ExtensionArray]: resolved_na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel) if resolved_na_sentinel is None: raise NotImplementedError( "Encoding NaN values is not yet implemented") else: na_sentinel = resolved_na_sentinel encoded = self._data.dictionary_encode() indices = pa.chunked_array([c.indices for c in encoded.chunks], type=encoded.type.index_type).to_pandas() if indices.dtype.kind == "f": indices[np.isnan(indices)] = na_sentinel indices = indices.astype(np.int64, copy=False) if encoded.num_chunks: uniques = type(self)(encoded.chunk(0).dictionary) else: uniques = type(self)(pa.array([], type=encoded.type.value_type)) return indices.values, uniques
def factorize( self, na_sentinel: int | lib.NoDefault = lib.no_default, use_na_sentinel: bool | lib.NoDefault = lib.no_default, ) -> tuple[np.ndarray, ExtensionArray]: resolved_na_sentinel = algos.resolve_na_sentinel( na_sentinel, use_na_sentinel) if resolved_na_sentinel is None: raise NotImplementedError( "Encoding NaN values is not yet implemented") else: na_sentinel = resolved_na_sentinel arr = self._data mask = self._mask codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask) # check that factorize_array correctly preserves dtype. assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype) uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool)) return codes, uniques_ea