Exemple #1
0
def binary_classifier_quality(model, X_test, Y_test):
    """
    Meant for binary classification.
    If `model` is Grid it uses best model.
    """
    if isinstance(model, GridSearchCV):
        result = pd.DataFrame(
            {k: model.cv_results_[k] for k in \
                 ['params', 'mean_test_score', 'std_test_score', 'rank_test_score']}
            )
        print(result)
        print()
        print(f"best params: {model.best_params_}")
        print("best score: {:f}".format(model.best_score_))
        print()

    Y_hat = model.predict(X_test)
    print("Confusion matrix (true x pred):")
    print(confusion_matrix(Y_test, Y_hat))
    print("Sensitivity: {:f}".format( sum(Y_hat[Y_test==1]) / sum(Y_test) ))
    print("Specificity: {:f}".format( sum(1 - Y_hat[Y_test==0]) / sum(Y_test==0)))
    print("Accuracy score on test data: {:f}".format( accuracy_score(Y_test, Y_hat) ))
    print("F1 score on test data: {:f}".format( f1_score(Y_test, Y_hat) ))

    #print(confusion_matrix(grid.predict(X_test), y_test))
    ConfusionMatrixDisplay.from_predictions(Y_test, Y_hat)
    RocCurveDisplay.from_estimator(model, X_test, Y_test)
def evaluate_roc(y_true, y_pred, method, plot=True):
    '''A quick helper for ad-hoc ROC analysis, more seasoned comparison later in R.'''
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)

    roc_auc = auc(fpr, tpr)
    # best point on the ROC curve --> Youden's J
    J = tpr - fpr
    best_ind = np.argmax(J)
    best_threshold = thresholds[best_ind]

    print(f'Best threshold: < {np.round(best_threshold,3)} --> negative')

    # compute precision and recall at that threshold
    binarized = (y_pred >= best_threshold).astype(int)
    recall = recall_score(y_true, binarized)
    precision = precision_score(y_true, binarized)

    print(
        f'Recall = {np.round(recall,3)}, Precision = {np.round(precision,3)}')
    if plot:
        viz = RocCurveDisplay(fpr=fpr,
                              tpr=tpr,
                              roc_auc=roc_auc,
                              estimator_name=method)

        viz.plot()
        plt.show()

    print(f'AUC: {np.round(roc_auc,3)}')

    return best_threshold
Exemple #3
0
def loo_roc_curve_plot(clf, x, y):
    from functools import partial

    def loo_proba(i, x, y, clf):
        idx = list(range(len(y)))
        idx.pop(i)
        clf.fit(x[idx, :], y[idx])
        return clf.predict_proba(x[[i], :])[0, 1]

    func_ = partial(loo_proba, x=x, y=y, clf=clf)
    y_proba = [func_(i) for i in range(len(y))]
    fpr, tpr, _ = roc_curve(y, y_proba)
    roc_auc = auc(fpr, tpr)
    name = clf.__class__.__name__
    ax = plt.figure().gca()
    ax.plot([0, 1], [0, 1],
            linestyle='--',
            lw=2,
            color='r',
            label='Chance',
            alpha=.8)
    viz = RocCurveDisplay(fpr=fpr,
                          tpr=tpr,
                          roc_auc=roc_auc,
                          estimator_name=name)
    return viz.plot(name=name, ax=ax)
Exemple #4
0
class RocAucCurve(CurveFabric):
    def __init__(self, col_score, col_target, name=None, **kwargs):
        super().__init__(col_score, col_target, name=name)
        self.fpr = None
        self.tpr = None
        self.roc_auc = None

    def fit(self, df):
        self.fpr, self.tpr, _ = roc_curve(df[self.col_target],
                                          df[self.col_score])
        self.roc_auc = auc(self.fpr, self.tpr)
        return self

    def plot(self, ax=None, title=None, **kwargs):
        if ax is None:
            fig, ax = plt.subplots()
        self.ax = ax

        self.viz = RocCurveDisplay(fpr=self.fpr,
                                   tpr=self.tpr,
                                   roc_auc=self.roc_auc * 100,
                                   estimator_name=self.name)

        if title:
            ax.set_title(title, fontsize=14, fontweight='bold')

        self.viz.plot(ax=ax, name=self.name, **kwargs)
        return self
Exemple #5
0
def sklearn_visualizations():
    import matplotlib.pyplot as plt
    from sklearn.model_selection import train_test_split
    from sklearn.svm import SVC
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import RocCurveDisplay
    from sklearn import datasets

    # data
    X, y = datasets.load_wine(return_X_y=True)
    y = y == 2
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)

    # svm
    svc = SVC(random_state=rng)
    svc.fit(X_train, y_train)
    svc_disp = RocCurveDisplay.from_estimator(svc, X_test, y_test)
    plt.show()

    # random forest
    rfc = RandomForestClassifier(random_state=rng)
    rfc.fit(X_train, y_train)
    ax = plt.gca()
    rfc_disp = RocCurveDisplay.from_estimator(rfc,
                                              X_test,
                                              y_test,
                                              ax=ax,
                                              alpha=0.8)
    svc_disp.plot(ax=ax, alpha=0.8)
    plt.show()
 def evaluate(self, x_test, y_test):
     y_pred = self.model.predict(x_test)
     y_pred = [1 * (x[0] >= 0.5) for x in y_pred]
     print('MLP performance on test for', self.feature_name)
     print('Accuracy:', accuracy_score(y_test, y_pred), 'Precision:',
           precision_score(y_test, y_pred), 'Recall:',
           recall_score(y_test, y_pred))
     # Confusion matrix
     cm = confusion_matrix(y_test, y_pred)
     cm_display = ConfusionMatrixDisplay(cm)
     # Precision recall
     precision, recall, _ = precision_recall_curve(y_test, y_pred)
     pr_display = PrecisionRecallDisplay(precision=precision, recall=recall)
     # Roc
     fpr, tpr, _ = roc_curve(y_test, y_pred)
     roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr)
     # Figure
     figure: Figure = plt.figure(1, figsize=(15, 6))
     figure.suptitle('MLP on {}'.format(self.feature_name), fontsize=20)
     (ax1, ax2, ax3) = figure.subplots(1, 3)
     ax1.set_title('Confusion matrix')
     cm_display.plot(ax=ax1)
     ax2.set_title('Precision recall')
     pr_display.plot(ax=ax2)
     ax3.set_title('Roc curve')
     roc_display.plot(ax=ax3)
     file_name = '{}-mlp.png'.format(self.feature_name)
     figure.savefig(
         os.path.join(get_folder_path_from_root('images'), file_name))
     plt.show()
Exemple #7
0
def roc_curve_plot(clf, x, y):
    y_prob = clf.predict_proba(x)[:, 1]
    fpr, tpr, _ = roc_curve(y, y_prob)
    roc_auc = auc(fpr, tpr)
    name = clf.__class__.__name__
    viz = RocCurveDisplay(fpr=fpr,
                          tpr=tpr,
                          roc_auc=roc_auc,
                          estimator_name=name)
    return viz.plot(name=name)
Exemple #8
0
def plot_roc_curves(train_roc_auc, split_roc_auc, valid_roc_auc):
    fig, ax = plt.subplots()
    for name, (roc_auc, auc) in [
        ('train', train_roc_auc),
        ('split', split_roc_auc),
        ('valid', valid_roc_auc),
    ]:
        viz = RocCurveDisplay(fpr=[x[0] for x in auc],
                              tpr=[x[1] for x in auc],
                              roc_auc=roc_auc,
                              estimator_name=name,
                              pos_label=1.0)
        viz.plot(ax=ax, name=name)
    return fig
Exemple #9
0
    def get_roc_curve(self, gt_index=0, pred_index=1, display=True, model_name="autopilot-model") :
            
        y = self._y()
        yh = self._yh()
        
        fpr, tpr, thresholds = roc_curve(y, yh)
        roc_auc = auc(fpr, tpr)

        viz = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=model_name) 

        if display :
            viz.plot()
            
        return viz, roc_auc, fpr, tpr, thresholds
Exemple #10
0
    def plot(self, ax=None, title=None, **kwargs):
        if ax is None:
            fig, ax = plt.subplots()
        self.ax = ax

        self.viz = RocCurveDisplay(fpr=self.fpr,
                                   tpr=self.tpr,
                                   roc_auc=self.roc_auc * 100,
                                   estimator_name=self.name)

        if title:
            ax.set_title(title, fontsize=14, fontweight='bold')

        self.viz.plot(ax=ax, name=self.name, **kwargs)
        return self
Exemple #11
0
def plot_roc_curve(pred: torch.Tensor, label: torch.Tensor, name="example estimator"):
    pred, label = pred.detach().cpu().numpy().flatten(), label.detach().cpu().numpy().flatten()
    fpr, tpr, thresholds = roc_curve(label, pred)
    roc_auc = auc(fpr, tpr)
    display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc,
                              estimator_name=name)

    # calc optimal threshold
    idx = np.arange(len(tpr))
    roc = pd.DataFrame({'tf': pd.Series(tpr - (1 - fpr), index=idx), 'threshold': pd.Series(thresholds, index=idx)})
    roc_t = roc.iloc[(roc.tf - 0).abs().argsort()[:1]]

    display.plot()
    plt.show()

    return list(roc_t['threshold'])[0]
    def plot(self, data_original_test):
        """"Plot ROC-AOC Curves of both original and synthetic in single figure"""
        X_test, y_test = self._split_xy(data_original_test)

        fig, ax = plt.subplots(1, 2, figsize=(14, 6))
        sns.despine()
        # roc curve
        RocCurveDisplay.from_estimator(self.stats_original_,
                                       X_test,
                                       y_test,
                                       name=self.labels[0],
                                       color=COLOR_PALETTE[0],
                                       ax=ax[0])
        RocCurveDisplay.from_estimator(self.stats_synthetic_,
                                       X_test,
                                       y_test,
                                       name=self.labels[1],
                                       color=COLOR_PALETTE[1],
                                       ax=ax[0])

        ax[0].plot([0, 1], [0, 1],
                   linestyle="--",
                   lw=1,
                   color="black",
                   alpha=0.7)
        ax[0].set_title('ROC Curve')

        # pr curve
        PrecisionRecallDisplay.from_estimator(self.stats_original_,
                                              X_test,
                                              y_test,
                                              name=self.labels[0],
                                              color=COLOR_PALETTE[0],
                                              ax=ax[1])
        PrecisionRecallDisplay.from_estimator(self.stats_synthetic_,
                                              X_test,
                                              y_test,
                                              name=self.labels[1],
                                              color=COLOR_PALETTE[1],
                                              ax=ax[1])
        no_skill = len(y_test[y_test == 1]) / len(y_test)
        ax[1].plot([0, 1], [no_skill, no_skill],
                   lw=1,
                   linestyle='--',
                   color='black',
                   alpha=0.7)
        ax[1].set_title('Precision-Recall Curve')
def test_default_labels(pyplot, roc_auc, estimator_name, expected_label):
    fpr = np.array([0, 0.5, 1])
    tpr = np.array([0, 0.5, 1])
    disp = RocCurveDisplay(fpr=fpr,
                           tpr=tpr,
                           roc_auc=roc_auc,
                           estimator_name=estimator_name).plot()
    assert disp.line_.get_label() == expected_label
Exemple #14
0
def plot_roc(y_test_df, y_score, trained_pipeline):
    fpr, tpr, _ = roc_curve(y_test_df,
                            y_score,
                            pos_label=trained_pipeline.classes_[1])
    RocCurveDisplay(fpr=fpr, tpr=tpr).plot()
    plt.title(f"AUC: {roc_auc_score(y_test_df, y_score)}")
    plt.tight_layout()
    plt.savefig(os.path.join(pass_success_model_eval_dir, "roc.png"))
Exemple #15
0
    def plot(self, ax=None, figsize=(10, 5)):
        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=figsize)

        ax.set_title("ROC Curve")
        possible_colors = GeneralUtils.shuffled_colors()
        for class_index, label in enumerate(self.labels):
            fpr, tpr = self._roc_curve[label]['fpr'], self._roc_curve[label][
                'tpr']
            roc_auc = self.auc[label]
            viz = RocCurveDisplay(fpr=fpr,
                                  tpr=tpr,
                                  roc_auc=roc_auc,
                                  estimator_name='Classifier')

            viz.plot(ax=ax, name=label, color=possible_colors[class_index])

        plt.draw()
Exemple #16
0
def cv_roc_curve_plot(clf, x, y, cv):
    tprs = []
    aucs = []
    mean_fpr = np.linspace(0, 1, len(y))
    fig, ax = plt.subplots()
    cver = StratifiedKFold(n_splits=cv)
    for i, (train_idx, test_idx) in enumerate(cver.split(x, y)):
        clf.fit(x[train_idx], y[train_idx])
        viz = plot_roc_curve(
            clf,
            x[test_idx],
            y[test_idx],
            ax=ax,
            name=f"ROC fold {i}",
            alpha=.3,
            lw=1,
        )
        interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr)
        tprs.append(interp_tpr)
        aucs.append(viz.roc_auc)
    ax.plot([0, 1], [0, 1],
            linestyle='--',
            lw=2,
            color='r',
            label='Chance',
            alpha=.8)
    mean_fpr = np.linspace(0, 1, len(y))
    mean_tpr = np.mean(tprs, axis=0)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(aucs)
    ax.plot(mean_fpr,
            mean_tpr,
            color='b',
            lw=2,
            alpha=.8,
            label=r'Mean ROC (AUC = %0.4f)' % mean_auc)
    std_tpr = np.std(tprs, axis=0)
    tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
    tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
    ax.fill_between(mean_fpr,
                    tprs_lower,
                    tprs_upper,
                    color='grey',
                    alpha=.2,
                    label=r'$\pm$ %0.4f std. dev.' % std_auc)
    ax.set(xlim=[-0.05, 1.05],
           ylim=[-0.05, 1.05],
           title="Receiver operating characteristic")
    ax.legend(loc="lower right")
    name = clf.__class__.__name__
    viz = RocCurveDisplay(fpr=mean_fpr,
                          tpr=mean_tpr,
                          roc_auc=mean_auc,
                          estimator_name=name)
    return viz
Exemple #17
0
def test_roc_curve_display_default_labels(pyplot, roc_auc, estimator_name,
                                          expected_label):
    """Check the default labels used in the display."""
    fpr = np.array([0, 0.5, 1])
    tpr = np.array([0, 0.5, 1])
    disp = RocCurveDisplay(fpr=fpr,
                           tpr=tpr,
                           roc_auc=roc_auc,
                           estimator_name=estimator_name).plot()
    assert disp.line_.get_label() == expected_label
Exemple #18
0
def test_roc_curve_display_complex_pipeline(pyplot, data_binary, clf,
                                            constructor_name):
    """Check the behaviour with complex pipeline."""
    X, y = data_binary

    if constructor_name == "from_estimator":
        with pytest.raises(NotFittedError):
            RocCurveDisplay.from_estimator(clf, X, y)

    clf.fit(X, y)

    if constructor_name == "from_estimator":
        display = RocCurveDisplay.from_estimator(clf, X, y)
        name = clf.__class__.__name__
    else:
        display = RocCurveDisplay.from_predictions(y, y)
        name = "Classifier"

    assert name in display.line_.get_label()
    assert display.estimator_name == name
Exemple #19
0
    def roc_curve(self, test_label=None, plot_type='test'):
        if test_label is not None:
            self.test_label = test_label
        if plot_type == 'test':
            predict = [self.y_pred]
            label = [self.test_label]
        elif plot_type == 'train':
            predict = [self.y_oof]
            label = [self.y_train]

        method_name = ['lgb']

        fig = plt.figure(figsize=(6, 6))
        ax = fig.add_subplot(1, 1, 1)
        for pred, label, method_name in zip(predict, label, method_name):
            fpr, tpr, thresholds = metrics.roc_curve(label, pred)
            auc = metrics.auc(fpr, tpr)
            roc_display = RocCurveDisplay(fpr=fpr,
                                          tpr=tpr,
                                          roc_auc=auc,
                                          estimator_name=method_name)
            roc_display.plot(ax=ax)
            ax.set_title('ROC curve : LightGBM', fontsize=16)
        plt.show()
Exemple #20
0
    def plot_roc_curves(self,
                        level: shared.enums.EvaluationLevel,
                        dataset_type: shared.enums.DatasetType,
                        font_size: int = 10):
        """
        Plots roc curves for each class in self._predictor.get_classes().
        """
        #set matplotlib font size globally
        plt.rcParams['font.size'] = font_size

        objs = self.__get_objects_according_to_evaluation_level(
            level=level, dataset_type=dataset_type)
        classes = self._predictor.get_classes()
        for Class in tqdm(classes):
            y_preds_raw = []  # list of the predicted percentages
            y_true = []  # list of True and False
            for obj in objs:
                # if the raw predictions contain NaN values, this is mostly because the wsi/case did not contain any tile
                # and therefore during prediction calculation a division by 0 resulted in NaN values
                # This is fixed in the latest version of the patient_manager. It now checks for tilesummaries, that do not
                # contain any top tile
                if (numpy.isnan(list(obj.predictions_raw.values())).any()):
                    continue
                y_preds_raw.append(obj.predictions_raw[Class])
                y_true.append((Class in obj.get_labels()))

            fpr, tpr, threshold = roc_curve(y_true, y_preds_raw, pos_label=1)
            roc_auc = auc(fpr, tpr)

            #plt.title(f'{Class}')
            #plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
            #plt.legend(loc = 'lower right')
            #plt.plot([0, 1], [0, 1],'r--')
            #plt.xlim([0, 1])
            #plt.ylim([0, 1])
            #plt.ylabel('True Positive Rate')
            #plt.xlabel('False Positive Rate')
            #plt.show()
            roc_display = RocCurveDisplay(fpr=fpr,
                                          tpr=tpr,
                                          roc_auc=roc_auc,
                                          estimator_name=Class).plot()

        #set matplotlib font size back to default
        plt.rcParams['font.size'] = 10
Exemple #21
0
import matplotlib.pyplot as plt
from sklearn.metrics import RocCurveDisplay

fig, ax = plt.subplots()

models = [
    ("RT embedding -> LR", rt_model),
    ("RF", random_forest),
    ("RF embedding -> LR", rf_model),
    ("GBDT", gradient_boosting),
    ("GBDT embedding -> LR", gbdt_model),
]

model_displays = {}
for name, pipeline in models:
    model_displays[name] = RocCurveDisplay.from_estimator(pipeline,
                                                          X_test,
                                                          y_test,
                                                          ax=ax,
                                                          name=name)
_ = ax.set_title("ROC curve")

# %%
fig, ax = plt.subplots()
for name, pipeline in models:
    model_displays[name].plot(ax=ax)

ax.set_xlim(0, 0.2)
ax.set_ylim(0.8, 1)
_ = ax.set_title("ROC curve (zoomed in at top left)")
Exemple #22
0
    # Make up some plots
    from sklearn.metrics import plot_roc_curve
    from sklearn.metrics import roc_curve, auc, RocCurveDisplay
    from sklearn.metrics import plot_confusion_matrix
    from sklearn.metrics import confusion_matrix
    from sklearn.metrics import ConfusionMatrixDisplay

    for model_name, model in models.items():
        print("Model: ", model_name)
        # ROC Curve
        if model_name == "elasticnetlinear":

            fpr, tpr, thresholds = roc_curve(model[1], y_test)
            roc_auc = auc(fpr, tpr)
            svc_disp = RocCurveDisplay(fpr=fpr,
                                       tpr=tpr,
                                       roc_auc=roc_auc,
                                       estimator_name='Elastic Net Linear')

            svc_disp.plot()
            plt.title(
                f"ROC Curve of {PREDICTION_PHENO} by {model_name} (Scaled)")
            plt.savefig(
                f"finalplots/scaledroc_{PREDICTION_PHENO}_{model_name}.png",
                dpi=600,
                transparent=True,
                bbox_inches="tight",
                pad_inches=0.3)

        else:
            if model_name == "rbfsvmapprox":
                svc_disp = plot_roc_curve(
Exemple #23
0
y_pred_proba = final_model.predict_proba(x_test)[:, 1]
y_pred = final_model.predict(x_test)

fraction_of_positives, mean_predicted_value = calibration_curve(
    np.array(y_test), y_pred_proba, strategy='uniform', n_bins=20)
plt.figure()
plt.plot(mean_predicted_value, fraction_of_positives, "sr-")
plt.title("Calibration")
plt.xlabel("mean_predicted_value")
plt.ylabel("fraction_of_positives")

fpr, tpr, _ = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
display = RocCurveDisplay(fpr=fpr,
                          tpr=tpr,
                          roc_auc=roc_auc,
                          estimator_name=None)
display.plot()
plt.title("ROC")

range_class = np.linspace(np.min(y_pred_proba), np.max(y_pred_proba), 100)
range_class = np.delete(range_class, 0)
range_class = np.delete(range_class, -1)
PPV = np.zeros(len(range_class))
NPV = np.zeros(len(range_class))
j = 0
for i in range_class:
    PPV[j] = precision_score(y_test, y_pred_proba > i, pos_label=1)
    NPV[j] = precision_score(y_test, y_pred_proba > i, pos_label=0)
    j += 1
plt.figure()
cm_display = ConfusionMatrixDisplay(cm).plot()


# %%
# Create :class:`RocCurveDisplay`
##############################################################################
# The roc curve requires either the probabilities or the non-thresholded
# decision values from the estimator. Since the logistic regression provides
# a decision function, we will use it to plot the roc curve:
from sklearn.metrics import roc_curve
from sklearn.metrics import RocCurveDisplay

y_score = clf.decision_function(X_test)

fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=clf.classes_[1])
roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()

# %%
# Create :class:`PrecisionRecallDisplay`
##############################################################################
# Similarly, the precision recall curve can be plotted using `y_score` from
# the prevision sections.
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import PrecisionRecallDisplay

prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=clf.classes_[1])
pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot()

# %%
# Combining the display objects into a single plot
##############################################################################
        fpr, tpr, thresholds = roc_curve(y[test],
                                         y_proba[:, pos_label_idx],
                                         pos_label=pos_label)
        mean_tpr += np.interp(mean_fpr, fpr, tpr)
        mean_tpr[0] = 0.0

    mean_tpr /= cv.get_n_splits(X, y)
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)

    # Create a display that we will reuse to make the aggregated plots for
    # all methods
    disp.append(
        RocCurveDisplay(
            fpr=mean_fpr,
            tpr=mean_tpr,
            roc_auc=mean_auc,
            estimator_name=f"{model[0].__class__.__name__}",
        ))

# %% [markdown]
# In the previous cell, we created the different mean ROC curve and we can plot
# them on the same plot.

# %%
fig, ax = plt.subplots(figsize=(9, 9))
for d in disp:
    d.plot(ax=ax, linestyle="--")
ax.plot([0, 1], [0, 1], linestyle="--", color="k")
ax.axis("square")
fig.suptitle("Comparison of over-sampling methods with a 3NN classifier")
ax.set_xlim([0, 1])
Exemple #26
0
X, y = load_wine(return_X_y=True)
y = y == 2

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
svc = SVC(random_state=42)
svc.fit(X_train, y_train)

# %%
# Plotting the ROC Curve
# ----------------------
# Next, we plot the ROC curve with a single call to
# :func:`sklearn.metrics.RocCurveDisplay.from_estimator`. The returned
# `svc_disp` object allows us to continue using the already computed ROC curve
# for the SVC in future plots.
svc_disp = RocCurveDisplay.from_estimator(svc, X_test, y_test)
plt.show()

# %%
# Training a Random Forest and Plotting the ROC Curve
# ---------------------------------------------------
# We train a random forest classifier and create a plot comparing it to the SVC
# ROC curve. Notice how `svc_disp` uses
# :func:`~sklearn.metrics.RocCurveDisplay.plot` to plot the SVC ROC curve
# without recomputing the values of the roc curve itself. Furthermore, we
# pass `alpha=0.8` to the plot functions to adjust the alpha values of the
# curves.
rfc = RandomForestClassifier(n_estimators=10, random_state=42)
rfc.fit(X_train, y_train)
ax = plt.gca()
rfc_disp = RocCurveDisplay.from_estimator(rfc,
Exemple #27
0
def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
    # check that we can provide the positive label and display the proper
    # statistics
    X, y = load_breast_cancer(return_X_y=True)
    # create an highly imbalanced
    idx_positive = np.flatnonzero(y == 1)
    idx_negative = np.flatnonzero(y == 0)
    idx_selected = np.hstack([idx_negative, idx_positive[:25]])
    X, y = X[idx_selected], y[idx_selected]
    X, y = shuffle(X, y, random_state=42)
    # only use 2 features to make the problem even harder
    X = X[:, :2]
    y = np.array(["cancer" if c == 1 else "not cancer" for c in y],
                 dtype=object)
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        stratify=y,
        random_state=0,
    )

    classifier = LogisticRegression()
    classifier.fit(X_train, y_train)

    # sanity check to be sure the positive class is classes_[0] and that we
    # are betrayed by the class imbalance
    assert classifier.classes_.tolist() == ["cancer", "not cancer"]

    y_pred = getattr(classifier, response_method)(X_test)
    # we select the correcponding probability columns or reverse the decision
    # function otherwise
    y_pred_cancer = -1 * y_pred if y_pred.ndim == 1 else y_pred[:, 0]
    y_pred_not_cancer = y_pred if y_pred.ndim == 1 else y_pred[:, 1]

    if constructor_name == "from_estimator":
        display = RocCurveDisplay.from_estimator(
            classifier,
            X_test,
            y_test,
            pos_label="cancer",
            response_method=response_method,
        )
    else:
        display = RocCurveDisplay.from_predictions(
            y_test,
            y_pred_cancer,
            pos_label="cancer",
        )

    roc_auc_limit = 0.95679

    assert display.roc_auc == pytest.approx(roc_auc_limit)
    assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)

    if constructor_name == "from_estimator":
        display = RocCurveDisplay.from_estimator(
            classifier,
            X_test,
            y_test,
            response_method=response_method,
            pos_label="not cancer",
        )
    else:
        display = RocCurveDisplay.from_predictions(
            y_test,
            y_pred_not_cancer,
            pos_label="not cancer",
        )

    assert display.roc_auc == pytest.approx(roc_auc_limit)
    assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
Exemple #28
0
import numpy as np
y = np.array([0, 1, 1, 0, 1, 0, 1, 0, 0, 1])
y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])

from sklearn.metrics import roc_curve, RocCurveDisplay, auc

my_fpr, my_tpr, _ = roc_curve(y_true=y, y_score=y_score, pos_label=1)
RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot()

import matplotlib.pyplot as plt
plt.savefig('10-p-roc.pdf')
Exemple #29
0
def test_roc_curve_display_plotting(
    pyplot,
    response_method,
    data_binary,
    with_sample_weight,
    drop_intermediate,
    with_strings,
    constructor_name,
    default_name,
):
    """Check the overall plotting behaviour."""
    X, y = data_binary

    pos_label = None
    if with_strings:
        y = np.array(["c", "b"])[y]
        pos_label = "c"

    if with_sample_weight:
        rng = np.random.RandomState(42)
        sample_weight = rng.randint(1, 4, size=(X.shape[0]))
    else:
        sample_weight = None

    lr = LogisticRegression()
    lr.fit(X, y)

    y_pred = getattr(lr, response_method)(X)
    y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1]

    if constructor_name == "from_estimator":
        display = RocCurveDisplay.from_estimator(
            lr,
            X,
            y,
            sample_weight=sample_weight,
            drop_intermediate=drop_intermediate,
            pos_label=pos_label,
            alpha=0.8,
        )
    else:
        display = RocCurveDisplay.from_predictions(
            y,
            y_pred,
            sample_weight=sample_weight,
            drop_intermediate=drop_intermediate,
            pos_label=pos_label,
            alpha=0.8,
        )

    fpr, tpr, _ = roc_curve(
        y,
        y_pred,
        sample_weight=sample_weight,
        drop_intermediate=drop_intermediate,
        pos_label=pos_label,
    )

    assert_allclose(display.roc_auc, auc(fpr, tpr))
    assert_allclose(display.fpr, fpr)
    assert_allclose(display.tpr, tpr)

    assert display.estimator_name == default_name

    import matplotlib as mpl  # noqal

    assert isinstance(display.line_, mpl.lines.Line2D)
    assert display.line_.get_alpha() == 0.8
    assert isinstance(display.ax_, mpl.axes.Axes)
    assert isinstance(display.figure_, mpl.figure.Figure)

    expected_label = f"{default_name} (AUC = {display.roc_auc:.2f})"
    assert display.line_.get_label() == expected_label

    expected_pos_label = 1 if pos_label is None else pos_label
    expected_ylabel = f"True Positive Rate (Positive label: {expected_pos_label})"
    expected_xlabel = f"False Positive Rate (Positive label: {expected_pos_label})"

    assert display.ax_.get_ylabel() == expected_ylabel
    assert display.ax_.get_xlabel() == expected_xlabel
import pandas as pd
from sklearn import tree
from sklearn.metrics import roc_curve, RocCurveDisplay, auc
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

my_url = ('https://raw.githubusercontent.com'
          '/taroyabuki/fromzero/master/data/titanic.csv')
my_data = pd.read_csv(my_url)

X, y = my_data.iloc[:, 0:3], my_data.Survived

my_pipeline = Pipeline([
    ('ohe', OneHotEncoder(drop='first')),
    ('tree',
     tree.DecisionTreeClassifier(max_depth=2, min_impurity_decrease=0.01))
])
my_pipeline.fit(X, y)

tmp = pd.DataFrame(my_pipeline.predict_proba(X), columns=my_pipeline.classes_)
y_score = tmp.Yes

my_fpr, my_tpr, _ = roc_curve(y_true=y, y_score=y_score, pos_label='Yes')
my_auc = auc(x=my_fpr, y=my_tpr)
RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot()

import matplotlib.pyplot as plt
plt.savefig('10-p-titanic-roc.pdf')