def inverse_transform(self, X): """ Convert the data back to the original representation. In case unknown categories are encountered (all zeros in the one-hot encoding), ``None`` is used to represent this category. The return type is the same as the type of the input used by the first call to fit on this estimator instance. Parameters ---------- X : array-like or sparse matrix, shape [n_samples, n_encoded_features] The transformed data. Returns ------- X_tr : cudf.DataFrame or cupy.ndarray Inverse transformed array. """ self._check_is_fitted() if cp.sparse.issparse(X): # cupy.sparse 7.x does not support argmax, when we upgrade cupy to # 8.x, we should add a condition in the # if close: `and not cp.sparse.issparsecsc(X)` # and change the following line by `X = X.tocsc()` X = X.toarray() result = DataFrame(columns=self._encoders.keys()) j = 0 for feature in self._encoders.keys(): feature_enc = self._encoders[feature] cats = feature_enc.classes_ if self.drop is not None: # Remove dropped categories dropped_class_idx = Series(self.drop_idx_[feature]) dropped_class_mask = Series(cats).isin(cats[dropped_class_idx]) if len(cats) == 1: inv = Series(GenericIndex(cats[0]).repeat(X.shape[0])) result[feature] = inv continue cats = cats[~dropped_class_mask] enc_size = len(cats) x_feature = X[:, j:j + enc_size] idx = cp.argmax(x_feature, axis=1) inv = Series(cats.iloc[idx]).reset_index(drop=True) if self.handle_unknown == 'ignore': not_null_idx = x_feature.any(axis=1) inv.iloc[~not_null_idx] = None elif self.drop is not None: # drop will either be None or handle_unknown will be error. If # self.drop is not None, then we can safely assume that all of # the nulls in each column are the dropped value dropped_mask = cp.asarray(x_feature.sum(axis=1) == 0).flatten() if dropped_mask.any(): inv[dropped_mask] = feature_enc.inverse_transform( Series(self.drop_idx_[feature]))[0] result[feature] = inv j += enc_size if self.input_type == 'array': try: result = cp.asarray(result.as_gpu_matrix()) except ValueError: warnings.warn("The input one hot encoding contains rows with " "unknown categories. Arrays do not support null " "values. Returning output as a DataFrame " "instead.") return result