예제 #1
0
파일: masked.py 프로젝트: tnir/pandas
    def factorize(
        self,
        na_sentinel: int | lib.NoDefault = lib.no_default,
        use_na_sentinel: bool | lib.NoDefault = lib.no_default,
    ) -> tuple[np.ndarray, ExtensionArray]:
        resolved_na_sentinel = algos.resolve_na_sentinel(
            na_sentinel, use_na_sentinel)
        if resolved_na_sentinel is None:
            raise NotImplementedError(
                "Encoding NaN values is not yet implemented")
        else:
            na_sentinel = resolved_na_sentinel
        arr = self._data
        mask = self._mask

        codes, uniques = factorize_array(arr,
                                         na_sentinel=na_sentinel,
                                         mask=mask)

        # check that factorize_array correctly preserves dtype.
        assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype,
                                                         self.dtype)

        uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool))
        return codes, uniques_ea
예제 #2
0
    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
        arr = self._data
        mask = self._mask

        codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask)

        # the hashtables don't handle all different types of bits
        uniques = uniques.astype(self.dtype.numpy_dtype, copy=False)
        uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool))
        return codes, uniques_ea
예제 #3
0
    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
        arr = self._data
        mask = self._mask

        codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask)

        # check that factorize_array correctly preserves dtype.
        assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype)

        uniques_ea = type(self)(uniques, np.zeros(len(uniques), dtype=bool))
        return codes, uniques_ea
예제 #4
0
파일: base.py 프로젝트: zaghambajwa/pandas
    def factorize(self,
                  na_sentinel: int = -1) -> Tuple[np.ndarray, ExtensionArray]:
        """
        Encode the extension array as an enumerated type.

        Parameters
        ----------
        na_sentinel : int, default -1
            Value to use in the `codes` array to indicate missing values.

        Returns
        -------
        codes : ndarray
            An integer NumPy array that's an indexer into the original
            ExtensionArray.
        uniques : ExtensionArray
            An ExtensionArray containing the unique values of `self`.

            .. note::

               uniques will *not* contain an entry for the NA value of
               the ExtensionArray if there are any missing values present
               in `self`.

        See Also
        --------
        factorize : Top-level factorize method that dispatches here.

        Notes
        -----
        :meth:`pandas.factorize` offers a `sort` keyword as well.
        """
        # Implementer note: There are two ways to override the behavior of
        # pandas.factorize
        # 1. _values_for_factorize and _from_factorize.
        #    Specify the values passed to pandas' internal factorization
        #    routines, and how to convert from those values back to the
        #    original ExtensionArray.
        # 2. ExtensionArray.factorize.
        #    Complete control over factorization.
        arr, na_value = self._values_for_factorize()

        codes, uniques = factorize_array(arr,
                                         na_sentinel=na_sentinel,
                                         na_value=na_value)

        uniques = self._from_factorized(uniques, self)
        # error: Incompatible return value type (got "Tuple[ndarray, ndarray]",
        # expected "Tuple[ndarray, ExtensionArray]")
        return codes, uniques  # type: ignore[return-value]
예제 #5
0
파일: masked.py 프로젝트: prakhar987/pandas
    def factorize(self, na_sentinel: int = -1) -> tuple[np.ndarray, ExtensionArray]:
        arr = self._data
        mask = self._mask

        codes, uniques = factorize_array(arr, na_sentinel=na_sentinel, mask=mask)

        # the hashtables don't handle all different types of bits
        uniques = uniques.astype(self.dtype.numpy_dtype, copy=False)
        # error: Incompatible types in assignment (expression has type
        # "BaseMaskedArray", variable has type "ndarray")
        uniques = type(self)(  # type: ignore[assignment]
            uniques, np.zeros(len(uniques), dtype=bool)
        )
        # error: Incompatible return value type (got "Tuple[ndarray, ndarray]",
        # expected "Tuple[ndarray, ExtensionArray]")
        return codes, uniques  # type: ignore[return-value]