예제 #1
0
    def inverse_transform(self, y: cudf.Series) -> cudf.Series:
        ''' Revert ordinal label to original label

        Parameters
        ----------
        y : cudf.Series, dtype=int32
            Ordinal labels to be reverted

        Returns
        -------
        reverted : cudf.Series
            Reverted labels
        '''
        # check LabelEncoder is fitted
        self._check_is_fitted()
        # check input type is cudf.Series
        if not isinstance(y, cudf.Series):
            raise TypeError(
                'Input of type {} is not cudf.Series'.format(type(y)))

        # check if y's dtype is np.int32, otherwise convert it
        y = _enforce_npint32(y)

        # check if ord_label out of bound
        ord_label = y.unique()
        category_num = len(self._cats.keys())
        for ordi in ord_label:
            if ordi < 0 or ordi >= category_num:
                raise ValueError(
                    'y contains previously unseen label {}'.format(ordi))
        # convert ordinal label to string label
        reverted = cudf.Series(self._cats.gather_strings(
            y.data.mem.device_ctypes_pointer.value, len(y)))

        return reverted
예제 #2
0
    def inverse_transform(self, y: cudf.Series) -> cudf.Series:
        """
        Revert ordinal label to original label

        Parameters
        ----------
        y : cudf.Series, dtype=int32
            Ordinal labels to be reverted

        Returns
        -------
        reverted : cudf.Series
            Reverted labels
        """
        # check LabelEncoder is fitted
        self._check_is_fitted()
        # check input type is cudf.Series
        if not isinstance(y, cudf.Series):
            raise TypeError(
                'Input of type {} is not cudf.Series'.format(type(y)))

        # check if ord_label out of bound
        ord_label = y.unique()
        category_num = len(self.classes_)
        if self.handle_unknown == 'error':
            for ordi in ord_label.values_host:
                if ordi < 0 or ordi >= category_num:
                    raise ValueError(
                        'y contains previously unseen label {}'.format(ordi))

        y = y.astype(self.dtype)

        ran_idx = cudf.Series(cp.arange(len(self.classes_))).astype(self.dtype)

        reverted = y._column.find_and_replace(ran_idx, self.classes_, False)

        return cudf.Series(reverted)
예제 #3
0
    def inverse_transform(self, y: cudf.Series) -> cudf.Series:
        """
        Revert ordinal label to original label

        Parameters
        ----------
        y : cudf.Series, pandas.Series, cupy.ndarray or numpy.ndarray
            dtype=int32
            Ordinal labels to be reverted

        Returns
        -------
        reverted : the same type as y
            Reverted labels
        """
        # check LabelEncoder is fitted
        self._check_is_fitted()
        # check input type is cudf.Series
        y = self._to_cudf_series(y)

        # check if ord_label out of bound
        ord_label = y.unique()
        category_num = len(self.classes_)
        if self.handle_unknown == 'error':
            for ordi in ord_label.values_host:
                if ordi < 0 or ordi >= category_num:
                    raise ValueError(
                        'y contains previously unseen label {}'.format(ordi))

        y = y.astype(self.dtype)

        ran_idx = cudf.Series(cp.arange(len(self.classes_))).astype(self.dtype)

        reverted = y._column.find_and_replace(ran_idx, self.classes_, False)

        res = cudf.Series(reverted)
        return res
예제 #4
0
def test_label_encode_dtype(ncats, cat_dtype):
    s = Series([str(i % ncats) for i in range(ncats + 1)])
    cats = s.unique().astype(s.dtype)
    encoded_col = s.label_encoding(cats=cats)
    np.testing.assert_equal(encoded_col.dtype, cat_dtype)
예제 #5
0
 def _fit_unique(self, y: cudf.Series):
     y_uniqs = y.unique()
     self._cats_parts.append(y_uniqs.to_pandas())