Ejemplo n.º 1
0
Archivo: range.py Proyecto: tnir/pandas
 def factorize(
     self,
     sort: bool = False,
     na_sentinel: int | lib.NoDefault = lib.no_default,
     use_na_sentinel: bool | lib.NoDefault = lib.no_default,
 ) -> tuple[npt.NDArray[np.intp], RangeIndex]:
     # resolve to emit warning if appropriate
     resolve_na_sentinel(na_sentinel, use_na_sentinel)
     codes = np.arange(len(self), dtype=np.intp)
     uniques = self
     if sort and self.step < 0:
         codes = codes[::-1]
         uniques = uniques[::-1]
     return codes, uniques
Ejemplo n.º 2
0
    def factorize(
        self,
        na_sentinel: int | lib.NoDefault = lib.no_default,
        use_na_sentinel: bool | lib.NoDefault = lib.no_default,
    ) -> tuple[np.ndarray, ExtensionArray]:
        resolved_na_sentinel = resolve_na_sentinel(na_sentinel,
                                                   use_na_sentinel)
        if resolved_na_sentinel is None:
            raise NotImplementedError(
                "Encoding NaN values is not yet implemented")
        else:
            na_sentinel = resolved_na_sentinel
        encoded = self._data.dictionary_encode()
        indices = pa.chunked_array([c.indices for c in encoded.chunks],
                                   type=encoded.type.index_type).to_pandas()
        if indices.dtype.kind == "f":
            indices[np.isnan(indices)] = na_sentinel
        indices = indices.astype(np.int64, copy=False)

        if encoded.num_chunks:
            uniques = type(self)(encoded.chunk(0).dictionary)
        else:
            uniques = type(self)(pa.array([], type=encoded.type.value_type))

        return indices.values, uniques
Ejemplo n.º 3
0
    def factorize(
        self,
        na_sentinel: int | lib.NoDefault = lib.no_default,
        use_na_sentinel: bool | lib.NoDefault = lib.no_default,
    ) -> tuple[np.ndarray, ExtensionArray]:
        resolved_na_sentinel = algos.resolve_na_sentinel(
            na_sentinel, use_na_sentinel)
        if resolved_na_sentinel is None:
            raise NotImplementedError(
                "Encoding NaN values is not yet implemented")
        else:
            na_sentinel = resolved_na_sentinel
        arr = self._data
        mask = self._mask

        codes, uniques = factorize_array(arr,
                                         na_sentinel=na_sentinel,
                                         mask=mask)

        # check that factorize_array correctly preserves dtype.
        assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype,
                                                         self.dtype)

        uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool))
        return codes, uniques_ea