def inverse_transform(self, y, threshold=None): """ Transform binary labels back to original multi-class labels Parameters ---------- y : array of shape [n_samples, n_classes] threshold : float this value is currently ignored Returns ------- arr : array with original labels """ if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # If we are already given multi-class, just return it. if cupyx.scipy.sparse.isspmatrix(y): y_mapped = y.tocsr().indices.astype(self._classes_.dtype) elif scipy_sparse_isspmatrix(y): y = y.tocsr() y_mapped = rmm_cupy_ary(cp.array, y.indices, dtype=y.indices.dtype) else: y_mapped = rmm_cupy_ary(cp.argmax, rmm_cupy_ary(cp.asarray, y, dtype=y.dtype), axis=1).astype(y.dtype) return invert_labels(y_mapped, self._classes_)
def predict(self, X) -> CumlArray: """ Perform classification on an array of test vectors X. """ if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # todo: use a sparse CumlArray style approach when ready # https://github.com/rapidsai/cuml/issues/2216 if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X): X = X.tocoo() rows = cp.asarray(X.row, dtype=X.row.dtype) cols = cp.asarray(X.col, dtype=X.col.dtype) data = cp.asarray(X.data, dtype=X.data.dtype) X = cupyx.scipy.sparse.coo_matrix((data, (rows, cols)), shape=X.shape) else: X = input_to_cupy_array(X, order='K').array jll = self._joint_log_likelihood(X) indices = cp.argmax(jll, axis=1).astype(self.classes_.dtype) y_hat = invert_labels(indices, classes=self.classes_) return y_hat
def _conv_array_to_sparse(arr): """ Converts an array (or cudf.DataFrame) to a sparse array :param arr: scipy or cupy sparse matrix, cudf DataFrame, dense numpy or cupy array :return: cupy sparse CSR matrix """ if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix if scipy_sparse_isspmatrix(arr): ret = \ cupyx.scipy.sparse.csr_matrix(arr.tocsr()) elif cupyx.scipy.sparse.isspmatrix(arr): ret = arr elif isinstance(arr, cudf.DataFrame): ret = _conv_df_to_sparse(arr) elif isinstance(arr, np.ndarray): cupy_ary = rmm_cupy_ary(cp.asarray, arr, dtype=arr.dtype) ret = cupyx.scipy.sparse.csr_matrix(cupy_ary) elif isinstance(arr, cp.core.core.ndarray): ret = cupyx.scipy.sparse.csr_matrix(arr) else: raise ValueError("Unexpected input type %s" % type(arr)) return ret
def predict(self, X) -> CumlArray: """ Perform classification on an array of test vectors X. """ if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # todo: use a sparse CumlArray style approach when ready # https://github.com/rapidsai/cuml/issues/2216 if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X): X = _convert_x_sparse(X) else: X = input_to_cupy_array( X, order='K', check_dtype=[cp.float32, cp.float64, cp.int32]).array jll = self._joint_log_likelihood(X) indices = cp.argmax(jll, axis=1).astype(self.classes_.dtype) y_hat = invert_labels(indices, classes=self.classes_) return y_hat
def _partial_fit(self, X, y, sample_weight=None, _classes=None) -> "MultinomialNB": if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # todo: use a sparse CumlArray style approach when ready # https://github.com/rapidsai/cuml/issues/2216 if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X): X = X.tocoo() rows = cp.asarray(X.row, dtype=X.row.dtype) cols = cp.asarray(X.col, dtype=X.col.dtype) data = cp.asarray(X.data, dtype=X.data.dtype) X = cupyx.scipy.sparse.coo_matrix((data, (rows, cols)), shape=X.shape) else: X = input_to_cupy_array(X, order='K').array y = input_to_cupy_array(y).array Y, label_classes = make_monotonic(y, copy=True) if not self.fit_called_: self.fit_called_ = True if _classes is not None: _classes, *_ = input_to_cuml_array(_classes, order='K') check_labels(Y, _classes) self.classes_ = _classes else: self.classes_ = label_classes self._n_classes_ = self.classes_.shape[0] self._n_features_ = X.shape[1] self._init_counters(self._n_classes_, self._n_features_, X.dtype) else: check_labels(Y, self.classes_) self._count(X, Y) self._update_feature_log_prob(self.alpha) self._update_class_log_prior(class_prior=self._class_prior_) return self
def predict_log_proba(self, X): """ Return log-probability estimates for the test vector X. """ out_type = self._get_output_type(X) if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # todo: use a sparse CumlArray style approach when ready # https://github.com/rapidsai/cuml/issues/2216 if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X): X = X.tocoo() rows = cp.asarray(X.row, dtype=X.row.dtype) cols = cp.asarray(X.col, dtype=X.col.dtype) data = cp.asarray(X.data, dtype=X.data.dtype) X = cupyx.scipy.sparse.coo_matrix((data, (rows, cols)), shape=X.shape) else: X = input_to_cuml_array(X, order='K').array.to_output('cupy') jll = self._joint_log_likelihood(X) # normalize by P(X) = P(f_1, ..., f_n) # Compute log(sum(exp())) # Subtract max in exp to prevent inf a_max = cp.amax(jll, axis=1, keepdims=True) exp = cp.exp(jll - a_max) logsumexp = cp.log(cp.sum(exp, axis=1)) a_max = cp.squeeze(a_max, axis=1) log_prob_x = a_max + logsumexp if log_prob_x.ndim < 2: log_prob_x = log_prob_x.reshape((1, log_prob_x.shape[0])) result = jll - log_prob_x.T return CumlArray(result).to_output(out_type)
def predict_log_proba(self, X) -> CumlArray: """ Return log-probability estimates for the test vector X. """ if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # todo: use a sparse CumlArray style approach when ready # https://github.com/rapidsai/cuml/issues/2216 if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X): X = _convert_x_sparse(X) else: X = input_to_cupy_array( X, order='K', check_dtype=[cp.float32, cp.float64, cp.int32]).array jll = self._joint_log_likelihood(X) # normalize by P(X) = P(f_1, ..., f_n) # Compute log(sum(exp())) # Subtract max in exp to prevent inf a_max = cp.amax(jll, axis=1, keepdims=True) exp = cp.exp(jll - a_max) logsumexp = cp.log(cp.sum(exp, axis=1)) a_max = cp.squeeze(a_max, axis=1) log_prob_x = a_max + logsumexp if log_prob_x.ndim < 2: log_prob_x = log_prob_x.reshape((1, log_prob_x.shape[0])) result = jll - log_prob_x.T return result
def _partial_fit(self, X, y, sample_weight=None, _classes=None, convert_dtype=True) -> "MultinomialNB": if has_scipy(): from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix else: from cuml.common.import_utils import dummy_function_always_false \ as scipy_sparse_isspmatrix # todo: use a sparse CumlArray style approach when ready # https://github.com/rapidsai/cuml/issues/2216 if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X): X = _convert_x_sparse(X) # TODO: Expanded this since sparse kernel doesn't # actually require the scipy sparse container format. else: X = input_to_cupy_array( X, order='K', check_dtype=[cp.float32, cp.float64, cp.int32]).array expected_y_dtype = cp.int32 if X.dtype in [cp.float32, cp.int32 ] else cp.int64 y = input_to_cupy_array( y, convert_to_dtype=(expected_y_dtype if convert_dtype else False), check_dtype=expected_y_dtype).array Y, label_classes = make_monotonic(y, copy=True) if not self.fit_called_: self.fit_called_ = True if _classes is not None: _classes, *_ = input_to_cuml_array( _classes, order='K', convert_to_dtype=(expected_y_dtype if convert_dtype else False)) check_labels(Y, _classes) self.classes_ = _classes else: self.classes_ = label_classes self._n_classes_ = self.classes_.shape[0] self._n_features_ = X.shape[1] self._init_counters(self._n_classes_, self._n_features_, X.dtype) else: check_labels(Y, self.classes_) if cp.sparse.isspmatrix(X): self._count_sparse(X.row, X.col, X.data, X.shape, Y) else: self._count(X, Y) self._update_feature_log_prob(self.alpha) self._update_class_log_prior(class_prior=self._class_prior_) return self