예제 #1
0
def test_monotonic_validate_invert_labels(arr_type, dtype, copy):

    arr = np.array([0, 15, 10, 50, 20, 50], dtype=dtype)

    original = arr.copy()

    if arr_type == "cp":
        arr = cp.asarray(arr, dtype=dtype)
        arr_orig = arr.copy()

    monotonic, mapped_classes = make_monotonic(arr, copy=copy)

    cp.cuda.Stream.null.synchronize()

    assert array_equal(monotonic.get(), np.array([0, 2, 1, 4, 3, 4]))

    # We only care about in-place updating if data is on device
    if arr_type == "cp":
        if copy:
            assert array_equal(arr_orig.get(), arr.get())
        else:
            assert array_equal(arr.get(), monotonic.get())

    wrong_classes = cp.asarray([0, 1, 2], dtype=dtype)
    val_labels = check_labels(monotonic.get(), classes=wrong_classes)

    cp.cuda.Stream.null.synchronize()

    assert not val_labels

    correct_classes = cp.asarray([0, 1, 2, 3, 4], dtype=dtype)
    val_labels = check_labels(monotonic.get(), classes=correct_classes)

    cp.cuda.Stream.null.synchronize()

    assert val_labels

    if arr_type == "cp":
        monotonic_copy = monotonic.copy()

    inverted = invert_labels(monotonic,
                             classes=cp.asarray([0, 10, 15, 20, 50],
                                                dtype=dtype),
                             copy=copy)

    cp.cuda.Stream.null.synchronize()

    if arr_type == "cp":
        if copy:
            assert array_equal(monotonic_copy.get(), monotonic.get())
        else:
            assert array_equal(monotonic.get(), arr_orig.get())

    assert array_equal(inverted.get(), original)
예제 #2
0
def _local_cm(inputs, labels, use_sample_weight):
    if use_sample_weight:
        y_true, y_pred, sample_weight = inputs
    else:
        y_true, y_pred = inputs
        sample_weight = cp.ones(y_true.shape[0], dtype=y_true.dtype)

    y_true, _ = make_monotonic(y_true, labels, copy=True)
    y_pred, _ = make_monotonic(y_pred, labels, copy=True)

    n_labels = labels.size

    # intersect y_pred, y_true with labels, eliminate items not in labels
    ind = cp.logical_and(y_pred < n_labels, y_true < n_labels)
    y_pred = y_pred[ind]
    y_true = y_true[ind]
    sample_weight = sample_weight[ind]
    cm = cupyx.scipy.sparse.coo_matrix((sample_weight, (y_true, y_pred)),
                                       shape=(n_labels, n_labels),
                                       dtype=cp.float64).toarray()
    return cp.nan_to_num(cm)
예제 #3
0
    def _partial_fit(self,
                     X,
                     y,
                     sample_weight=None,
                     _classes=None) -> "MultinomialNB":

        if has_scipy():
            from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix
        else:
            from cuml.common.import_utils import dummy_function_always_false \
                as scipy_sparse_isspmatrix

        # todo: use a sparse CumlArray style approach when ready
        # https://github.com/rapidsai/cuml/issues/2216
        if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X):
            X = X.tocoo()
            rows = cp.asarray(X.row, dtype=X.row.dtype)
            cols = cp.asarray(X.col, dtype=X.col.dtype)
            data = cp.asarray(X.data, dtype=X.data.dtype)
            X = cupyx.scipy.sparse.coo_matrix((data, (rows, cols)),
                                              shape=X.shape)
        else:
            X = input_to_cupy_array(X, order='K').array

        y = input_to_cupy_array(y).array

        Y, label_classes = make_monotonic(y, copy=True)

        if not self.fit_called_:
            self.fit_called_ = True
            if _classes is not None:
                _classes, *_ = input_to_cuml_array(_classes, order='K')
                check_labels(Y, _classes)
                self.classes_ = _classes
            else:
                self.classes_ = label_classes

            self._n_classes_ = self.classes_.shape[0]
            self._n_features_ = X.shape[1]
            self._init_counters(self._n_classes_, self._n_features_, X.dtype)
        else:
            check_labels(Y, self.classes_)

        self._count(X, Y)

        self._update_feature_log_prob(self.alpha)
        self._update_class_log_prior(class_prior=self._class_prior_)

        return self
예제 #4
0
def label_binarize(y,
                   classes,
                   neg_label=0,
                   pos_label=1,
                   sparse_output=False) -> SparseCumlArray:
    """
    A stateless helper function to dummy encode multi-class labels.

    Parameters
    ----------

    y : array-like of size [n_samples,] or [n_samples, n_classes]
    classes : the set of unique classes in the input
    neg_label : integer the negative value for transformed output
    pos_label : integer the positive value for transformed output
    sparse_output : bool whether to return sparse array
    """

    classes = cp.asarray(classes, dtype=classes.dtype)
    labels = cp.asarray(y, dtype=y.dtype)

    if not check_labels(labels, classes):
        raise ValueError("Unseen classes encountered in input")

    row_ind = cp.arange(0, labels.shape[0], 1, dtype=y.dtype)
    col_ind, _ = make_monotonic(labels, classes, copy=True)

    # Convert from CumlArray to cupy
    col_ind = cp.asarray(col_ind)

    val = cp.full(row_ind.shape[0], pos_label, dtype=y.dtype)

    sp = cupyx.scipy.sparse.coo_matrix(
        (val, (row_ind, col_ind)),
        shape=(col_ind.shape[0], classes.shape[0]),
        dtype=cp.float32)

    cp.cuda.Stream.null.synchronize()

    if sparse_output:
        sp = sp.tocsr()
        return sp
    else:

        arr = sp.toarray().astype(y.dtype)
        arr[arr == 0] = neg_label

        return arr
예제 #5
0
    def _partial_fit(self, X, y, sample_weight=None, _classes=None):

        if isinstance(X, np.ndarray) or isinstance(X, cp.ndarray):
            X = cp.asarray(X, X.dtype)
        elif scipy.sparse.isspmatrix(X) or cp.sparse.isspmatrix(X):
            X = X.tocoo()
            rows = cp.asarray(X.row, dtype=X.row.dtype)
            cols = cp.asarray(X.col, dtype=X.col.dtype)
            data = cp.asarray(X.data, dtype=X.data.dtype)
            X = cp.sparse.coo_matrix((data, (rows, cols)), shape=X.shape)

        if isinstance(y, np.ndarray) or isinstance(y, cp.ndarray):
            y = cp.asarray(y, y.dtype)

        Y, label_classes = make_monotonic(y, copy=True)

        if not self.fit_called_:
            self.fit_called_ = True
            if _classes is not None:
                check_labels(Y, _classes)
                self.classes_ = _classes
            else:
                self.classes_ = label_classes

            self.n_classes_ = self.classes_.shape[0]
            self.n_features_ = X.shape[1]
            self._init_counters(self.n_classes_, self.n_features_, X.dtype)
        else:
            check_labels(Y, self.classes_)

        self._count(X, Y)

        self._update_feature_log_prob(self.alpha)
        self._update_class_log_prior(class_prior=self.class_prior)

        return self
예제 #6
0
def confusion_matrix(y_true,
                     y_pred,
                     labels=None,
                     sample_weight=None,
                     normalize=None) -> CumlArray:
    """Compute confusion matrix to evaluate the accuracy of a classification.

    Parameters
    ----------
    y_true : array-like (device or host) shape = (n_samples,)
        or (n_samples, n_outputs)
        Ground truth (correct) target values.
    y_pred : array-like (device or host) shape = (n_samples,)
        or (n_samples, n_outputs)
        Estimated target values.
    labels : array-like (device or host) shape = (n_classes,), optional
        List of labels to index the matrix. This may be used to reorder or
        select a subset of labels. If None is given, those that appear at least
        once in y_true or y_pred are used in sorted order.
    sample_weight : array-like (device or host) shape = (n_samples,), optional
        Sample weights.
    normalize : string in [‘true’, ‘pred’, ‘all’]
        Normalizes confusion matrix over the true (rows), predicted (columns)
        conditions or all the population. If None, confusion matrix will not be
        normalized.

    Returns
    -------
    C : array-like (device or host) shape = (n_classes, n_classes)
        Confusion matrix.
    """
    y_true, n_rows, n_cols, dtype = \
        input_to_cuml_array(y_true, check_dtype=[cp.int32, cp.int64])

    y_pred, _, _, _ = \
        input_to_cuml_array(y_pred, check_dtype=dtype,
                            check_rows=n_rows, check_cols=n_cols)

    if labels is None:
        labels = sorted_unique_labels(y_true, y_pred)
        n_labels = len(labels)
    else:
        labels, n_labels, _, _ = \
            input_to_cupy_array(labels, check_dtype=dtype, check_cols=1)
    if sample_weight is None:
        sample_weight = cp.ones(n_rows, dtype=dtype)
    else:
        sample_weight, _, _, _ = \
            input_to_cupy_array(sample_weight,
                                check_dtype=[cp.float32, cp.float64,
                                             cp.int32, cp.int64],
                                check_rows=n_rows, check_cols=n_cols)

    if normalize not in ['true', 'pred', 'all', None]:
        msg = "normalize must be one of " \
              f"{{'true', 'pred', 'all', None}}, got {normalize}."
        raise ValueError(msg)

    with using_output_type("cupy"):
        y_true, _ = make_monotonic(y_true, labels, copy=True)
        y_pred, _ = make_monotonic(y_pred, labels, copy=True)

    # intersect y_pred, y_true with labels, eliminate items not in labels
    ind = cp.logical_and(y_pred < n_labels, y_true < n_labels)
    y_pred = y_pred[ind]
    y_true = y_true[ind]
    sample_weight = sample_weight[ind]

    cm = cupyx.scipy.sparse.coo_matrix((sample_weight, (y_true, y_pred)),
                                       shape=(n_labels, n_labels),
                                       dtype=np.float64).toarray()

    # Choose the accumulator dtype to always have high precision
    if sample_weight.dtype.kind in {'i', 'u', 'b'}:
        cm = cm.astype(np.int64)

    with np.errstate(all='ignore'):
        if normalize == 'true':
            cm = cp.divide(cm, cm.sum(axis=1, keepdims=True))
        elif normalize == 'pred':
            cm = cp.divide(cm, cm.sum(axis=0, keepdims=True))
        elif normalize == 'all':
            cm = cp.divide(cm, cm.sum())
        cm = cp.nan_to_num(cm)

    return cm
예제 #7
0
    def _partial_fit(self,
                     X,
                     y,
                     sample_weight=None,
                     _classes=None,
                     convert_dtype=True) -> "MultinomialNB":

        if has_scipy():
            from scipy.sparse import isspmatrix as scipy_sparse_isspmatrix
        else:
            from cuml.common.import_utils import dummy_function_always_false \
                as scipy_sparse_isspmatrix

        # todo: use a sparse CumlArray style approach when ready
        # https://github.com/rapidsai/cuml/issues/2216
        if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X):
            X = _convert_x_sparse(X)
            # TODO: Expanded this since sparse kernel doesn't
            # actually require the scipy sparse container format.
        else:
            X = input_to_cupy_array(
                X, order='K', check_dtype=[cp.float32, cp.float64,
                                           cp.int32]).array

        expected_y_dtype = cp.int32 if X.dtype in [cp.float32, cp.int32
                                                   ] else cp.int64
        y = input_to_cupy_array(
            y,
            convert_to_dtype=(expected_y_dtype if convert_dtype else False),
            check_dtype=expected_y_dtype).array

        Y, label_classes = make_monotonic(y, copy=True)

        if not self.fit_called_:
            self.fit_called_ = True
            if _classes is not None:
                _classes, *_ = input_to_cuml_array(
                    _classes,
                    order='K',
                    convert_to_dtype=(expected_y_dtype
                                      if convert_dtype else False))
                check_labels(Y, _classes)
                self.classes_ = _classes
            else:
                self.classes_ = label_classes

            self._n_classes_ = self.classes_.shape[0]
            self._n_features_ = X.shape[1]
            self._init_counters(self._n_classes_, self._n_features_, X.dtype)
        else:
            check_labels(Y, self.classes_)

        if cp.sparse.isspmatrix(X):
            self._count_sparse(X.row, X.col, X.data, X.shape, Y)
        else:
            self._count(X, Y)

        self._update_feature_log_prob(self.alpha)
        self._update_class_log_prior(class_prior=self._class_prior_)

        return self