예제 #1
0
    def score(self, X, y, sample_weight=None):
        """
        Return the mean accuracy on the given test data and labels.

        In multi-label classification, this is the subset accuracy which is a
        harsh metric since you require for each sample that each label set be
        correctly predicted.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
        Test samples.

        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
        True labels for X.

        sample_weight : array-like of shape (n_samples,), default=None
        Sample weights. Currently, sample weight is ignored

        Returns
        -------

        score : float Mean accuracy of self.predict(X) with respect to y.
        """
        y_hat = self.predict(X)
        return accuracy_score(y_hat, cp.asarray(y, dtype=y.dtype))
예제 #2
0
 def get_loss(self, y, y_pred):
     if self.metric == 'error':
         return 1 - accuracy_score(y, y_pred)
     elif self.metric == 'auc':  # TODO: Add a warning checking if y_predict is all [0, 1], it should be probability
         return 1 - roc_auc_score(y, y_pred)
     else:
         raise Exception("Not implemented yet.")
    def score(self, y_test, predictions):
        """ Score predictions vs ground truth labels on test data """
        dataset_dtype = self.hpo_config.dataset_dtype
        score = accuracy_score(y_test.astype(dataset_dtype),
                               predictions.astype(dataset_dtype))

        hpo_log.info(f'score = {round(score,5)}')
        self.cv_fold_scores.append(score)
        return score
예제 #4
0
def train_and_eval(X_param, y_param, max_depth=16, n_estimators=100):
    X_train, X_valid, y_train, y_valid = train_test_split(X_param,
                                                          y_param,
                                                          random_state=77)
    classifier = RandomForestClassifier(max_depth=max_depth,
                                        n_estimators=n_estimators)
    classifier.fit(X_train, y_train)
    y_pred = classifier.predict(X_valid)
    score = accuracy_score(y_valid, y_pred)
    return score
    def score(self, y_test, predictions):
        """ Score predictions vs ground truth labels on test data """
        hpo_log.info('> score predictions')
        y_test = y_test.compute()
        score = accuracy_score(
            y_test.astype(self.hpo_config.dataset_dtype),
            predictions.astype(self.hpo_config.dataset_dtype))

        hpo_log.info(f'\t score = {score}')
        self.cv_fold_scores.append(score)
        return score
예제 #6
0
    def score(self, X, y, sample_weight=None) -> float:
        """
        Return the mean accuracy on the given test data and labels.

        In multi-label classification, this is the subset accuracy which is a
        harsh metric since you require for each sample that each label set be
        correctly predicted.

        Currently, sample weight is ignored

        """
        y_hat = self.predict(X)
        return accuracy_score(y_hat, cp.asarray(y, dtype=y.dtype))
예제 #7
0
 def cross_val_score(clf, X, y, cv=3, scoring=None):
     from sklearn.model_selection import StratifiedKFold
     # from sklearn.model_selection import KFold
     from cupy import asnumpy
     from cuml.metrics import accuracy_score
     kf = StratifiedKFold(cv)
     # kf = KFold(cv)
     acc_scores = []
     i = 0
     # for train_index, test_index in kf.split(X):
     for train_index, test_index in kf.split(X, asnumpy(y)):
         X_train, X_test = X.iloc[train_index], X.iloc[test_index]
         y_train, y_test = y.iloc[train_index], y.iloc[test_index]
         # X_train, X_test = X[train_index], X[test_index]
         # y_train, y_test = y[train_index], y[test_index]
         try:
             clf.fit(X_train, y_train, convert_dtype=True)
         except:
             clf.fit(X_train, y_train)
         y_pred = clf.predict(X_test)
         acc_score = accuracy_score(y_test, y_pred)
         acc_scores.append(acc_score)
         i += 1
     return acc_scores
예제 #8
0
def _calc_score_cuml(y_true, y_preds, y_proba=None, metrics=('accuracy',), task=const.TASK_BINARY, pos_label=1,
                     classes=None, average=None):
    if y_proba is None:
        y_proba = y_preds
    if len(y_proba.shape) == 2 and y_proba.shape[-1] == 1:
        y_proba = y_proba.reshape(-1)
    if len(y_preds.shape) == 2 and y_preds.shape[-1] == 1:
        y_preds = y_preds.reshape(-1)

    y_true = _to_dtype(y_true, 'float64')
    y_preds = _to_dtype(y_preds, 'float64')
    y_proba = _to_dtype(y_proba, 'float64')

    if task == const.TASK_REGRESSION:
        if isinstance(y_true, cudf.Series):
            y_true = y_true.values
        if isinstance(y_preds, cudf.Series):
            y_preds = y_preds.values
        if isinstance(y_proba, cudf.Series):
            y_proba = y_proba.values

    scores = {}
    for metric in metrics:
        if callable(metric):
            scores[metric.__name__] = metric(y_true, y_preds)
        else:
            metric_lower = metric.lower()
            if metric_lower == 'auc':
                if len(y_proba.shape) == 2:
                    # if task == const.TASK_MULTICLASS:
                    #     s = cu_metrics.roc_auc_score(y_true, y_proba, multi_class='ovo', labels=classes)
                    # else:
                    #     s = cu_metrics.roc_auc_score(y_true, y_proba[:, 1])
                    s = cu_metrics.roc_auc_score(y_true, y_proba[:, 1])
                else:
                    s = cu_metrics.roc_auc_score(y_true, y_proba)
            elif metric_lower == 'accuracy':
                if y_preds is None:
                    s = 0
                else:
                    s = cu_metrics.accuracy_score(y_true, y_preds)
            # elif metric_lower == 'recall':
            #     s = cu_metrics.recall_score(y_true, y_preds, **recall_options)
            # elif metric_lower == 'precision':
            #     s = cu_metrics.precision_score(y_true, y_preds, **recall_options)
            # elif metric_lower == 'f1':
            #     s = cu_metrics.f1_score(y_true, y_preds, **recall_options)
            elif metric_lower == 'mse':
                s = cu_metrics.mean_squared_error(y_true, y_preds)
            elif metric_lower == 'mae':
                s = cu_metrics.mean_absolute_error(y_true, y_preds)
            elif metric_lower == 'msle':
                s = cu_metrics.mean_squared_log_error(y_true, y_preds)
            elif metric_lower in {'rmse', 'rootmeansquarederror', 'root_mean_squared_error'}:
                s = cu_metrics.mean_squared_error(y_true, y_preds, squared=False)
            elif metric_lower == 'r2':
                s = cu_metrics.r2_score(y_true, y_preds)
            elif metric_lower in {'logloss', 'log_loss'}:
                # s = cu_metrics.log_loss(y_true, y_proba, labels=classes)
                s = cu_metrics.log_loss(y_true, y_proba)
            else:
                logger.warning(f'unknown metric: {metric}')
                continue
            if isinstance(s, cp.ndarray):
                s = float(cp.asnumpy(s))
            scores[metric] = s
    return scores
예제 #9
0
def test_qn(loss, dtype, penalty, l1_strength, l2_strength, fit_intercept):

    if penalty == "none" and (l1_strength > 0 or l2_strength > 0):
        pytest.skip("`none` penalty does not take l1/l2_strength")

    tol = 1e-6

    qn = cuQN(loss=loss,
              fit_intercept=fit_intercept,
              l1_strength=l1_strength,
              l2_strength=l2_strength,
              tol=1e-8,
              output_type="cupy")

    if loss == 'softmax':
        X, y = make_classification(n_samples=5000,
                                   n_informative=10,
                                   n_features=20,
                                   n_classes=4,
                                   dtype=dtype)

        stratify = y.astype(dtype)
        X_train, X_test, y_train, y_test = train_test_split(X.astype(dtype),
                                                            y.astype(dtype),
                                                            stratify=stratify)
        most_class = cp.unique(y)[cp.argmax(cp.bincount(y))]

        baseline_preds = cp.array([most_class] * y_test.shape[0], dtype=dtype)
        baseline_score = accuracy_score(y_test, baseline_preds)

        y_pred = qn.fit(X_train, y_train).predict(X_test)
        cuml_score = accuracy_score(y_test, y_pred)

        assert (cuml_score > baseline_score)
        assert (cuml_score >= 0.50)

    elif loss == 'sigmoid':
        X = np.array(precomputed_X, dtype=dtype)
        y = np.array(precomputed_y_log, dtype=dtype)
        qn.fit(X, y)
        print(qn.objective)
        print(qn.coef_)

        if penalty == 'none' and l1_strength == 0.0 and l2_strength == 0.0:
            if fit_intercept:
                assert (qn.objective - 0.40263831615448) < tol
                cp.testing.assert_array_almost_equal(qn.coef_,
                                                     np.array([[-2.1088872],
                                                               [2.4812558]]),
                                                     decimal=3)
            else:
                assert (qn.objective - 0.4317452311515808) < tol
                cp.testing.assert_array_almost_equal(qn.coef_,
                                                     np.array([[-2.120777],
                                                               [3.056865]]),
                                                     decimal=3)
        elif penalty == 'l1' and l2_strength == 0.0:
            if fit_intercept:
                if l1_strength == 0.0:
                    assert (qn.objective - 0.40263831615448) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-2.1088872],
                                                              [2.4812558]]),
                                                         decimal=3)
                else:
                    assert (qn.objective - 0.44295936822891235) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.6899368],
                                                              [1.9021575]]),
                                                         decimal=3)

            else:
                if l1_strength == 0.0:
                    assert (qn.objective - 0.4317452311515808) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array([[-2.120777],
                                                                   [3.056865]
                                                                   ]),
                                                         decimal=3)

                else:
                    assert (qn.objective - 0.4769895672798157) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.6214856],
                                                              [2.3650239]]),
                                                         decimal=3)

                # assert False

        elif penalty == 'l2' and l1_strength == 0.0:
            if fit_intercept:
                if l2_strength == 0.0:
                    assert (qn.objective - 0.40263831615448) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-2.1088872],
                                                              [2.4812558]]),
                                                         decimal=3)
                else:
                    assert (qn.objective - 0.43780848383903503) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.5337948],
                                                              [1.678699]]),
                                                         decimal=3)

            else:
                if l2_strength == 0.0:
                    assert (qn.objective - 0.4317452311515808) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array([[-2.120777],
                                                                   [3.056865]
                                                                   ]),
                                                         decimal=3)

                else:
                    assert (qn.objective - 0.4750209450721741) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.3931049],
                                                              [2.0140104]]),
                                                         decimal=3)

        if penalty == 'elasticnet':
            if fit_intercept:
                if l1_strength == 0.0 and l2_strength == 0.0:
                    assert (qn.objective - 0.40263831615448) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-2.1088872],
                                                              [2.4812558]]),
                                                         decimal=3)
                elif l1_strength == 0.0:
                    assert (qn.objective - 0.43780848383903503) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.5337948],
                                                              [1.678699]]),
                                                         decimal=3)
                elif l2_strength == 0.0:
                    assert (qn.objective - 0.44295936822891235) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.6899368],
                                                              [1.9021575]]),
                                                         decimal=3)
                else:
                    assert (qn.objective - 0.467987984418869) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.3727235],
                                                              [1.4639963]]),
                                                         decimal=3)
            else:
                if l1_strength == 0.0 and l2_strength == 0.0:
                    assert (qn.objective - 0.4317452311515808) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array([[-2.120777],
                                                                   [3.056865]
                                                                   ]),
                                                         decimal=3)
                elif l1_strength == 0.0:
                    assert (qn.objective - 0.4750209450721741) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.3931049],
                                                              [2.0140104]]),
                                                         decimal=3)

                elif l2_strength == 0.0:
                    assert (qn.objective - 0.4769895672798157) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.6214856],
                                                              [2.3650239]]),
                                                         decimal=3)
                else:
                    assert (qn.objective - 0.5067970156669617) < tol
                    cp.testing.assert_array_almost_equal(qn.coef_,
                                                         np.array(
                                                             [[-1.2102532],
                                                              [1.752459]]),
                                                         decimal=3)

                print()
예제 #10
0
 def _accuracy(actuals, preds):
     return accuracy_score(ground_truth=actuals, predictions=preds)