Ejemplo n.º 1
0
 def evaluate(self, x_test, y_test):
     y_pred = self.model.predict(x_test)
     y_pred = [1 * (x[0] >= 0.5) for x in y_pred]
     print('MLP performance on test for', self.feature_name)
     print('Accuracy:', accuracy_score(y_test, y_pred), 'Precision:',
           precision_score(y_test, y_pred), 'Recall:',
           recall_score(y_test, y_pred))
     # Confusion matrix
     cm = confusion_matrix(y_test, y_pred)
     cm_display = ConfusionMatrixDisplay(cm)
     # Precision recall
     precision, recall, _ = precision_recall_curve(y_test, y_pred)
     pr_display = PrecisionRecallDisplay(precision=precision, recall=recall)
     # Roc
     fpr, tpr, _ = roc_curve(y_test, y_pred)
     roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr)
     # Figure
     figure: Figure = plt.figure(1, figsize=(15, 6))
     figure.suptitle('MLP on {}'.format(self.feature_name), fontsize=20)
     (ax1, ax2, ax3) = figure.subplots(1, 3)
     ax1.set_title('Confusion matrix')
     cm_display.plot(ax=ax1)
     ax2.set_title('Precision recall')
     pr_display.plot(ax=ax2)
     ax3.set_title('Roc curve')
     roc_display.plot(ax=ax3)
     file_name = '{}-mlp.png'.format(self.feature_name)
     figure.savefig(
         os.path.join(get_folder_path_from_root('images'), file_name))
     plt.show()
Ejemplo n.º 2
0
class PrecisionRecallCurve(CurveFabric):
    def __init__(self, col_score, col_target, name=None, **kwargs):
        super().__init__(col_score, col_target, name=name)
        self.precision = None
        self.recall = None
        self.average_precision = None

    def fit(self, df):
        self.precision, self.recall, _ = precision_recall_curve(
            df[self.col_target], df[self.col_score])
        self.average_precision = average_precision_score(
            df[self.col_target], df[self.col_score])
        return self

    def plot(self, ax=None, title=None, **kwargs):

        if ax is None:
            fig, ax = plt.subplots()
        self.ax = ax

        self.viz = PrecisionRecallDisplay(
            precision=self.precision,
            recall=self.recall,
            average_precision=self.average_precision,
            estimator_name=self.name)

        if title:
            ax.set_title(title, fontsize=14, fontweight='bold')

        self.viz.plot(ax=ax, name=self.name, **kwargs)
        return self
def test_precision_recall_display_string_labels(pyplot):
    # regression test #15738
    cancer = load_breast_cancer()
    X, y = cancer.data, cancer.target_names[cancer.target]

    lr = make_pipeline(StandardScaler(), LogisticRegression())
    lr.fit(X, y)
    for klass in cancer.target_names:
        assert klass in lr.classes_
    display = PrecisionRecallDisplay.from_estimator(lr, X, y)

    y_pred = lr.predict_proba(X)[:, 1]
    avg_prec = average_precision_score(y, y_pred, pos_label=lr.classes_[1])

    assert display.average_precision == pytest.approx(avg_prec)
    assert display.estimator_name == lr.__class__.__name__

    err_msg = r"y_true takes value in {'benign', 'malignant'}"
    with pytest.raises(ValueError, match=err_msg):
        PrecisionRecallDisplay.from_predictions(y, y_pred)

    display = PrecisionRecallDisplay.from_predictions(y,
                                                      y_pred,
                                                      pos_label=lr.classes_[1])
    assert display.average_precision == pytest.approx(avg_prec)
def test_precision_recall_display_name(pyplot, constructor_name,
                                       default_label):
    """Check the behaviour of the name parameters"""
    X, y = make_classification(n_classes=2, n_samples=100, random_state=0)
    pos_label = 1

    classifier = LogisticRegression().fit(X, y)
    classifier.fit(X, y)

    y_pred = classifier.predict_proba(X)[:, pos_label]

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    if constructor_name == "from_estimator":
        display = PrecisionRecallDisplay.from_estimator(classifier, X, y)
    else:
        display = PrecisionRecallDisplay.from_predictions(y,
                                                          y_pred,
                                                          pos_label=pos_label)

    average_precision = average_precision_score(y, y_pred, pos_label=pos_label)

    # check that the default name is used
    assert display.line_.get_label() == default_label.format(average_precision)

    # check that the name can be set
    display.plot(name="MySpecialEstimator")
    assert (display.line_.get_label() ==
            f"MySpecialEstimator (AP = {average_precision:.2f})")
def test_precision_recall_display_pipeline(pyplot, clf):
    X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
    with pytest.raises(NotFittedError):
        PrecisionRecallDisplay.from_estimator(clf, X, y)
    clf.fit(X, y)
    display = PrecisionRecallDisplay.from_estimator(clf, X, y)
    assert display.estimator_name == clf.__class__.__name__
Ejemplo n.º 6
0
 def plotPrecisionRecallCurve(self, titleAdd: str = None):
     from sklearn.metrics import PrecisionRecallDisplay  # only supported by newer versions of sklearn
     if not self._probabilitiesAvailable:
         raise Exception("Precision-recall curve requires probabilities")
     if not self.isBinary:
         raise Exception(
             "Precision-recall curve is not applicable to non-binary classification"
         )
     probabilities = self.y_predictedClassProbabilities[
         self.binaryPositiveLabel]
     precision, recall, thresholds = precision_recall_curve(
         y_true=self.y_true,
         probas_pred=probabilities,
         pos_label=self.binaryPositiveLabel)
     disp = PrecisionRecallDisplay(precision, recall)
     disp.plot()
     ax: plt.Axes = disp.ax_
     ax.set_xlabel("recall")
     ax.set_ylabel("precision")
     title = "Precision-Recall Curve"
     if titleAdd is not None:
         title += "\n" + titleAdd
     ax.set_title(title)
     ax.xaxis.set_major_locator(plticker.MultipleLocator(base=0.1))
     ax.yaxis.set_major_locator(plticker.MultipleLocator(base=0.1))
     return disp.figure_
def test_default_labels(pyplot, average_precision, estimator_name, expected_label):
    prec = np.array([1, 0.5, 0])
    recall = np.array([0, 0.5, 1])
    disp = PrecisionRecallDisplay(
        prec, recall, average_precision=average_precision, estimator_name=estimator_name
    )
    disp.plot()
    assert disp.line_.get_label() == expected_label
Ejemplo n.º 8
0
def plot_ap(y_true, y_pred_proba):
    # AP curve
    aps = average_precision_score(y_true, y_pred_proba)
    precision, recall, _ = precision_recall_curve(y_true, y_pred_proba)
    disp = PrecisionRecallDisplay(precision=precision,
                                  recall=recall,
                                  average_precision=aps,
                                  estimator_name=None)
    disp.plot()
    return disp.figure_
Ejemplo n.º 9
0
def gold_scores(alignment):
    pairs = read(alignment.l2_fn.name)

    for score in alignment.scores:
        gold_df = score['df'].copy()

        gold_df[gold_df.columns] = 0

        for x, y in pairs:
            gold_df.loc[x, y] = 1

        true_values = gold_df.to_numpy().flatten()
        pred_values = score['df'].to_numpy().flatten()

        print('-------')
        print(score['id'])
        print('spearman')
        rho, p_value = stats.spearmanr(pred_values.argsort(),
                                       true_values.argsort())
        print(rho)
        print(p_value)
        print('kendall tau')
        tau, p_value = stats.kendalltau(pred_values.argsort(),
                                        true_values.argsort())
        print(tau)
        print(p_value)
        print('-------')

        precision, recall, _ = metrics.precision_recall_curve(
            true_values, pred_values)
        disp = PrecisionRecallDisplay(precision, recall, 0, score['id'])
        disp.plot()
        a = score['id']
        plt.savefig(f'plots/{a}.png')
        plt.close()

        # roc_auc = metrics.auc(precision, recall)

        # plt.title('Precision-recall curve')
        # plt.plot(precision, recall, 'b', label = 'AUC = %0.2f' % roc_auc)
        # plt.legend(loc = 'lower right')
        # plt.plot([0, 1], [0, 1],'r--')
        # plt.xlim([0, 1])
        # plt.ylim([0, 1])
        # plt.ylabel('Recall')
        # plt.xlabel('Precision')
        # a = score['id']
        # plt.savefig(f'{a}.png')
        # plt.close()

        print('boa')


# print(read('salsa'))
def test_default_labels(pyplot, average_precision, estimator_name,
                        expected_label):
    """Check the default labels used in the display."""
    precision = np.array([1, 0.5, 0])
    recall = np.array([0, 0.5, 1])
    display = PrecisionRecallDisplay(
        precision,
        recall,
        average_precision=average_precision,
        estimator_name=estimator_name,
    )
    display.plot()
    assert display.line_.get_label() == expected_label
Ejemplo n.º 11
0
def plot_auc(y_true, y_score, ax, color, label=''):
    lw = 2
    roc_auc = roc_auc_score(y_true=y_true,
                            y_score=y_score)

    pr_auc = average_precision_score(y_true=y_true,
                                     y_score=y_score)

    fpr, tpr, thresholds = roc_curve(y_true=y_true,
                                     y_score=y_score, pos_label=1)

    prec, recall, _ = precision_recall_curve(y_true=y_true,
                                             probas_pred=y_score, pos_label=1)

    ax[0].plot(fpr,
               tpr,
               lw=lw,
               label='{label} ROC curve (area = {roc_auc:0.4f})'.format(label=label, roc_auc=roc_auc),
               color=color)
    ax[0].legend()
    pr_label = '{label} PR AUC curve (area = {pr_auc:0.4f})'
    PrecisionRecallDisplay(precision=prec, recall=recall).plot(ax=ax[1],
                                                               color=color,
                                                               label=pr_label.format(
                                                                   label=label,
                                                                   pr_auc=pr_auc))
    def plot(self, data_original_test):
        """"Plot ROC-AOC Curves of both original and synthetic in single figure"""
        X_test, y_test = self._split_xy(data_original_test)

        fig, ax = plt.subplots(1, 2, figsize=(14, 6))
        sns.despine()
        # roc curve
        RocCurveDisplay.from_estimator(self.stats_original_,
                                       X_test,
                                       y_test,
                                       name=self.labels[0],
                                       color=COLOR_PALETTE[0],
                                       ax=ax[0])
        RocCurveDisplay.from_estimator(self.stats_synthetic_,
                                       X_test,
                                       y_test,
                                       name=self.labels[1],
                                       color=COLOR_PALETTE[1],
                                       ax=ax[0])

        ax[0].plot([0, 1], [0, 1],
                   linestyle="--",
                   lw=1,
                   color="black",
                   alpha=0.7)
        ax[0].set_title('ROC Curve')

        # pr curve
        PrecisionRecallDisplay.from_estimator(self.stats_original_,
                                              X_test,
                                              y_test,
                                              name=self.labels[0],
                                              color=COLOR_PALETTE[0],
                                              ax=ax[1])
        PrecisionRecallDisplay.from_estimator(self.stats_synthetic_,
                                              X_test,
                                              y_test,
                                              name=self.labels[1],
                                              color=COLOR_PALETTE[1],
                                              ax=ax[1])
        no_skill = len(y_test[y_test == 1]) / len(y_test)
        ax[1].plot([0, 1], [no_skill, no_skill],
                   lw=1,
                   linestyle='--',
                   color='black',
                   alpha=0.7)
        ax[1].set_title('Precision-Recall Curve')
def test_precision_recall_display_plotting(pyplot, constructor_name,
                                           response_method):
    """Check the overall plotting rendering."""
    X, y = make_classification(n_classes=2, n_samples=50, random_state=0)
    pos_label = 1

    classifier = LogisticRegression().fit(X, y)
    classifier.fit(X, y)

    y_pred = getattr(classifier, response_method)(X)
    y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, pos_label]

    # safe guard for the binary if/else construction
    assert constructor_name in ("from_estimator", "from_predictions")

    if constructor_name == "from_estimator":
        display = PrecisionRecallDisplay.from_estimator(
            classifier, X, y, response_method=response_method)
    else:
        display = PrecisionRecallDisplay.from_predictions(y,
                                                          y_pred,
                                                          pos_label=pos_label)

    precision, recall, _ = precision_recall_curve(y,
                                                  y_pred,
                                                  pos_label=pos_label)
    average_precision = average_precision_score(y, y_pred, pos_label=pos_label)

    np.testing.assert_allclose(display.precision, precision)
    np.testing.assert_allclose(display.recall, recall)
    assert display.average_precision == pytest.approx(average_precision)

    import matplotlib as mpl

    assert isinstance(display.line_, mpl.lines.Line2D)
    assert isinstance(display.ax_, mpl.axes.Axes)
    assert isinstance(display.figure_, mpl.figure.Figure)

    assert display.ax_.get_xlabel() == "Recall (Positive label: 1)"
    assert display.ax_.get_ylabel() == "Precision (Positive label: 1)"

    # plotting passing some new parameters
    display.plot(alpha=0.8, name="MySpecialEstimator")
    expected_label = f"MySpecialEstimator (AP = {average_precision:0.2f})"
    assert display.line_.get_label() == expected_label
    assert display.line_.get_alpha() == pytest.approx(0.8)
Ejemplo n.º 14
0
    def plot(self, ax=None, figsize=(10, 5)):
        if ax is None:
            fig, ax = plt.subplots(1, 1, figsize=figsize)

        ax.set_title("Precision Recall Curve")
        possible_colors = GeneralUtils.shuffled_colors()
        for class_index, label in enumerate(self.labels):
            precision = self._recall_precision_curve[label]['precision']
            recall = self._recall_precision_curve[label]['recall']
            average_precision = self._average_precision[label]

            viz = PrecisionRecallDisplay(precision=precision,
                                         recall=recall,
                                         average_precision=average_precision,
                                         estimator_name='Classifier')

            viz.plot(ax=ax, name=label, color=possible_colors[class_index])
Ejemplo n.º 15
0
    def plot(self, ax=None, title=None, **kwargs):

        if ax is None:
            fig, ax = plt.subplots()
        self.ax = ax

        self.viz = PrecisionRecallDisplay(
            precision=self.precision,
            recall=self.recall,
            average_precision=self.average_precision,
            estimator_name=self.name)

        if title:
            ax.set_title(title, fontsize=14, fontweight='bold')

        self.viz.plot(ax=ax, name=self.name, **kwargs)
        return self
Ejemplo n.º 16
0
 def precision_recall_curve(self, fig_name):
     precision, recall, thresholds = precision_recall_curve(
         self.label, self.pred)
     max_f1 = 0
     max_f2 = 0
     max_threshold = 0
     for p, r, tr in zip(precision, recall, thresholds):
         f1 = self.f1_score(p, r)
         f2 = self.f2_score(p, r)
         if f1 >= max_f1:
             max_f1 = f1
             max_threshold = tr
         if f2 >= max_f2:
             max_f2 = f2
     viz = PrecisionRecallDisplay(precision=precision, recall=recall)
     viz.plot()
     if os.path.isdir(self.output_dir):
         fig_path = os.path.join(self.output_dir, fig_name)
         plt.savefig(fig_path)
         plt.close()
     detail = self.f1_details(max_threshold)
     return round(max_f1, 3), round(max_f2, 3), detail, max_threshold
Ejemplo n.º 17
0
def save_pr_curve(filename, y_true, y_score):
    """
    filename:出力ファイル名
    y_true:正解(2クラス 0 or 1)
    y_score:予測(2クラス クラス1の予測確率(0.0-1.0))
    """
    prec, recall, thresholds = precision_recall_curve(y_true, y_score)
    auc_score = auc(recall, prec)
    pr_display = PrecisionRecallDisplay(
        precision=prec,
        recall=recall).plot(name="PR curve (area = %0.3f)" % auc_score)
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.savefig(filename)
    plt.clf()
    return auc_score
Ejemplo n.º 18
0
    def plot_precision_recall_curves(self,
                                     level: shared.enums.EvaluationLevel,
                                     dataset_type: shared.enums.DatasetType,
                                     font_size: int = 10):
        """
        Plots precision-recall curves for each class in self._predictor.get_classes().
        """

        #set matplotlib font size globally
        plt.rcParams['font.size'] = font_size

        objs = self.__get_objects_according_to_evaluation_level(
            level=level, dataset_type=dataset_type)
        classes = self._predictor.get_classes()
        for Class in tqdm(classes):
            y_preds_raw = []  # list of the predicted percentages
            y_true = []  # list of True and False
            for obj in objs:
                # if the raw predictions contain NaN values, this is mostly because the wsi/case did not contain any tile
                # and therefore during prediction calculation a division by 0 resulted in NaN values
                # This is fixed in the latest version of the patient_manager. It now checks for tilesummaries, that do not
                # contain any top tile
                if (numpy.isnan(list(obj.predictions_raw.values())).any()):
                    continue
                y_preds_raw.append(obj.predictions_raw[Class])
                y_true.append((Class in obj.get_labels()))

            #print(f'{Class}')
            #print(f'y_true: {y_true}')
            #print(f'y_preds_raw: {y_preds_raw}')
            #print('')
            #print('------------------------------------------------------------------------')
            #print('')

            precision, recall, thresholds = precision_recall_curve(y_true,
                                                                   y_preds_raw,
                                                                   pos_label=1)
            average_precision = average_precision_score(y_true, y_preds_raw)

            pr_display = PrecisionRecallDisplay(
                precision=precision,
                recall=recall,
                average_precision=average_precision,
                estimator_name=Class).plot()
        #set matplotlib font size back to default
        plt.rcParams['font.size'] = 10
Ejemplo n.º 19
0
def plot_pr_curve(stats, outfile):
    print("Plotting Stat Curves")
    for c in ['prc']:
        #         plt.figure()
        for s in stats:
            if s[c] is not None:
                PrecisionRecallDisplay(
                    precision=s[c][0],
                    recall=s[c][1],
                    average_precision=s['ap']).plot(label=s['name'] + '_' + c)
            break
        plt.title(
            outfile.split('/')[-1] + '_' + c + ' || AP : ' +
            str(np.round(s['ap'], 3)))
        plt.legend(loc=3 if c == 'prc' else 4)
        plt.ylabel('precision' if c == 'prc' else '1-spec.')
        plt.xlabel('recall')
        plt.xticks(np.arange(0, 1.1, step=0.1))
        plt.yticks(np.arange(0, 1.1, step=0.1))
        plt.savefig(outfile + 'x_' + c + '.png')
        plt.close()
Ejemplo n.º 20
0
def eval_cnns(model, val_data):
    model.eval()

    val_features = val_data.features_input.to(device)
    val_times = val_data.times_input.to(device)
    val_words_labels = val_data.words_labels_input.to(device)

    val_preds = model1(val_features, val_times)

    utts = val_words_labels.shape[0]
    words = val_words_labels.shape[1]

    word_sums = torch.zeros(utts, words)

    for idx in range(len(val_times)):
        timetable = val_times[idx]
        for jdx in range(len(timetable)):
            span = timetable[jdx]
            if span[1] != 0. and span[0] != span[1]:
                start = int(span[0]) + 1
                end = int(span[1]) + 1
                word_sums[idx, jdx] = sum(val_preds[idx][start:end])

    act_sig = nn.Sigmoid()
    val_preds = act_sig(word_sums).cpu().detach().numpy().flatten()
    val_preds_bin = np.where(val_preds > 0.5, 1, 0)
    val_labels = np.array(val_words_labels.squeeze(1).cpu().detach().numpy(),
                          dtype=int).flatten()

    with open("eval/cnn_model.report", "w") as reportfile:
        reportfile.write(
            classification_report(val_labels, val_preds_bin, digits=4))

    prec, rec, _ = precision_recall_curve(val_labels, val_preds)
    prec_rec_graph = PrecisionRecallDisplay(prec,
                                            rec,
                                            average_precision=0.5,
                                            estimator_name="CNN").plot()
    plt.savefig("eval/cnn_model_prec_rec_graph.png")
Ejemplo n.º 21
0
import numpy as np
y = np.array([0, 1, 1, 0, 1, 0, 1, 0, 0, 1])
y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5])

from sklearn.metrics import precision_recall_curve, PrecisionRecallDisplay, auc

my_precision, my_recall, _ = precision_recall_curve(y_true=y,
                                                    probas_pred=y_score,
                                                    pos_label=1)
PrecisionRecallDisplay(precision=my_precision, recall=my_recall).plot()

import matplotlib.pyplot as plt
plt.savefig('10-p-pr.pdf')
Ejemplo n.º 22
0
                                  f"{img_path}{img_name}"].to_numpy()[0][-1])
        y_preds.append(0)
        #If there was an object and no bounding box  was found then it's false negative
        if test_images.loc[test_images['PATH'] == f"{img_path}{img_name}",
                           ['class']].iloc[0].to_numpy()[0] == 1:

            false_negatives += 1
            all += 1
        else:
            true_negatives += 1
            all += 1

    print(f"False Negatives {false_negatives} \n"
          f"True  Negatives {true_negatives} \n"
          f"True  Positives {true_positives} \n"
          f"False Positives {false_positives}")
    #cv2.imshow('img', img)
    #cv2.waitKey(0)
    cv2.imwrite('./results_imgs/{}.png'.format(idx), img)

#Calculate mAP
mAP = average_precision_score(y_true, y_preds)

print(f"Model's mAP: {mAP}")

precision, recall, thresholds = precision_recall_curve(y_true, y_preds)

disp = PrecisionRecallDisplay(precision=precision, recall=recall)
disp.plot()
plt.show()
def test_plot_precision_recall_pos_label(pyplot, constructor_name,
                                         response_method):
    # check that we can provide the positive label and display the proper
    # statistics
    X, y = load_breast_cancer(return_X_y=True)
    # create an highly imbalanced version of the breast cancer dataset
    idx_positive = np.flatnonzero(y == 1)
    idx_negative = np.flatnonzero(y == 0)
    idx_selected = np.hstack([idx_negative, idx_positive[:25]])
    X, y = X[idx_selected], y[idx_selected]
    X, y = shuffle(X, y, random_state=42)
    # only use 2 features to make the problem even harder
    X = X[:, :2]
    y = np.array(["cancer" if c == 1 else "not cancer" for c in y],
                 dtype=object)
    X_train, X_test, y_train, y_test = train_test_split(
        X,
        y,
        stratify=y,
        random_state=0,
    )

    classifier = LogisticRegression()
    classifier.fit(X_train, y_train)

    # sanity check to be sure the positive class is classes_[0] and that we
    # are betrayed by the class imbalance
    assert classifier.classes_.tolist() == ["cancer", "not cancer"]

    y_pred = getattr(classifier, response_method)(X_test)
    # we select the correcponding probability columns or reverse the decision
    #  function otherwise
    y_pred_cancer = -1 * y_pred if y_pred.ndim == 1 else y_pred[:, 0]
    y_pred_not_cancer = y_pred if y_pred.ndim == 1 else y_pred[:, 1]

    if constructor_name == "from_estimator":
        display = PrecisionRecallDisplay.from_estimator(
            classifier,
            X_test,
            y_test,
            pos_label="cancer",
            response_method=response_method,
        )
    else:
        display = PrecisionRecallDisplay.from_predictions(
            y_test,
            y_pred_cancer,
            pos_label="cancer",
        )
    # we should obtain the statistics of the "cancer" class
    avg_prec_limit = 0.65
    assert display.average_precision < avg_prec_limit
    assert -np.trapz(display.precision, display.recall) < avg_prec_limit

    # otherwise we should obtain the statistics of the "not cancer" class
    if constructor_name == "from_estimator":
        display = PrecisionRecallDisplay.from_estimator(
            classifier,
            X_test,
            y_test,
            response_method=response_method,
            pos_label="not cancer",
        )
    else:
        display = PrecisionRecallDisplay.from_predictions(
            y_test,
            y_pred_not_cancer,
            pos_label="not cancer",
        )
    avg_prec_limit = 0.95
    assert display.average_precision > avg_prec_limit
    assert -np.trapz(display.precision, display.recall) > avg_prec_limit
y_score = clf.decision_function(X_test)

fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=clf.classes_[1])
roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot()

# %%
# Create :class:`PrecisionRecallDisplay`
##############################################################################
# Similarly, the precision recall curve can be plotted using `y_score` from
# the prevision sections.
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import PrecisionRecallDisplay

prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=clf.classes_[1])
pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot()

# %%
# Combining the display objects into a single plot
##############################################################################
# The display objects store the computed values that were passed as arguments.
# This allows for the visualizations to be easliy combined using matplotlib's
# API. In the following example, we place the displays next to each other in a
# row.

# sphinx_gallery_thumbnail_number = 4
import matplotlib.pyplot as plt

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))

roc_display.plot(ax=ax1)
Ejemplo n.º 25
0
# Plot the Precision-Recall curve
# ...............................
#
# To plot the precision-recall curve, you should use
# :class:`~sklearn.metrics.PrecisionRecallDisplay`. Indeed, there is two
# methods available depending if you already computed the predictions of the
# classifier or not.
#
# Let's first plot the precision-recall curve without the classifier
# predictions. We use
# :func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator` that
# computes the predictions for us before plotting the curve.
from sklearn.metrics import PrecisionRecallDisplay

display = PrecisionRecallDisplay.from_estimator(classifier,
                                                X_test,
                                                y_test,
                                                name="LinearSVC")
_ = display.ax_.set_title("2-class Precision-Recall curve")

# %%
# If we already got the estimated probabilities or scores for
# our model, then we can use
# :func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions`.
y_score = classifier.decision_function(X_test)

display = PrecisionRecallDisplay.from_predictions(y_test,
                                                  y_score,
                                                  name="LinearSVC")
_ = display.ax_.set_title("2-class Precision-Recall curve")

# %%
if __name__ == '__main__':
    x_train, x_test, y_train, y_test = get_dataset()
    model = LogisticRegression(multi_class='ovr')
    model.fit(x_train, y_train)
    b_y = label_binarize(y_test, classes=[0, 1, 2])
    y_scores = model.predict_proba(x_test)
    print(y_scores)
    for i in range(len(np.unique(y_test))):
        precision, recall, _ = p_r_curve(b_y[:, i], y_scores[:, i])
        ap = compute_ap(recall, precision)
        plt.plot(recall,
                 precision,
                 drawstyle="steps-post",
                 label=f'Precision-recall for class {i} (AP = {ap})')
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Precision-Recall curve by ours")
    plt.legend(loc="lower left")
    # 通过sklear方法进行绘制

    _, ax = plt.subplots()
    for i in range(len(np.unique(y_test))):
        precision, recall, _ = p_r_curve(b_y[:, i], y_scores[:, i])
        ap = compute_ap(recall, precision)
        display = PrecisionRecallDisplay(recall=recall,
                                         precision=precision,
                                         average_precision=ap)
        display.plot(ax=ax, name=f"Precision-recall for class {i}")
    ax.set_title("Precision-Recall curve by sklearn")
    plt.show()
Ejemplo n.º 27
0
def LSTM_trainer(input_dim, hidden_dim, layer_dim, output_dim, seq_dim,
                error, lr, optimizer, num_epochs, train_loader, test_loader):    
    
    model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)

    if error == 'BCELogit':
        error = nn.BCEWithLogitsLoss() 
    else:    
        error = nn.BCEWithLogitsLoss()
        
    if optimizer == 'Adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
    else:
        optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
             
    
    loss_list_test = []
    loss_list_train = []
    iteration_list = []
    accuracy_list = []
    count = 0
    
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            # Load images as a torch tensor with gradient accumulation abilities
            images = images.view(-1, seq_dim, input_dim).requires_grad_()

            # Clear gradients w.r.t. parameters
            optimizer.zero_grad()

            # Forward pass to get output/logits
            # outputs.size 100, 10
            outputs = model(images)

            # Calculate Loss: softmax --> cross entropy loss
            labels = labels.unsqueeze(1)

            loss_train = error(outputs, labels.float())

            # Getting gradients
            loss_train.backward()

            # Updating parameters
            optimizer.step()


            count += 1

            if count % 500 == 0:
                # Calculate Accuracy
                predictions_list = []
                labels_list = []
                outputs_list = []

                correct = 0
                total = 0
                size = 0
                loss_test = 0
                for images, labels in test_loader:
                    images = images.view(-1, seq_dim, input_dim)
                    # Forward pass only to get logits/output
                    outputs = model(images)
                    labels = labels.unsqueeze(1)
                    loss_test = error(outputs, labels.float())

                    loss_test += loss_test.data.item()*labels.shape[0]
                    size += labels.shape[0]




                    predictions = torch.round(torch.sigmoid(outputs))
                    predictions_list.append(predictions.detach().numpy())
                    labels_list.append(labels.detach().numpy())
                    outputs_list.append(outputs.detach().numpy())

                loss_list_test.append(loss_test/size)
                outputs_list = np.vstack(outputs_list)
                predictions_list = np.vstack(predictions_list)
                labels_list = np.concatenate(labels_list)

                prec, recall, thresholds = precision_recall_curve(labels_list, outputs_list)
                pr_display = PrecisionRecallDisplay(precision=prec, recall=recall, average_precision = 0, estimator_name = 'LSTM' ).plot()

                fpr, tpr, thresholds = roc_curve(labels_list, outputs_list)
                plt.plot(fpr, tpr, label = roc_auc_score(labels_list, outputs_list))

                target_spec = 0.90
                spec = [1-i for i in fpr]
                idx_spec = min(enumerate(spec), key=lambda x: abs(x[1]-target_spec))


                #print('Specificity:' , spec[idx_spec[0]-2], 'Sensitivity:' , tpr[idx_spec[0]-2]) 
                print('Specificity:' , spec[idx_spec[0]-1], 'Sensitivity:' , tpr[idx_spec[0]-1])    
                print('Specificity:' , spec[idx_spec[0]], 'Sensitivity:' , tpr[idx_spec[0]])
                print('Specificity:' , spec[idx_spec[0]+1], 'Sensitivity:' , tpr[idx_spec[0]+1])
                #print('Specificity:' , spec[idx_spec[0]+2], 'Sensitivity:' , tpr[idx_spec[0]+2]) 

                print(classification_report(labels_list, predictions_list))
                print('Matthews_corrcoef' , matthews_corrcoef(labels_list, predictions_list))
                print('roc_auc_score' , roc_auc_score(labels_list, predictions_list))

                loss_list_train.append(loss_train.data.item())
                iteration_list.append(count)

                # Print Loss
                print('Iteration: {}. Loss_train: {}. Loss_test {}.'.format(count, loss_train.data.item(), loss_test/size))
pos_label = clf.classes_[1]
fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=pos_label)
AUC = auc(fpr, tpr)
roc_display = RocCurveDisplay(fpr=fpr,
                              tpr=tpr,
                              roc_auc=AUC,
                              estimator_name='demo').plot()

#%% Create PR
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import average_precision_score
from sklearn.metrics import PrecisionRecallDisplay

pos_label = clf.classes_[1]
prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=pos_label)
# alternative AUCpr (~AP), with a different computing method
AP = average_precision_score(y_test, y_score, pos_label=pos_label)
pr_display = PrecisionRecallDisplay(precision=prec,
                                    recall=recall,
                                    average_precision=AP,
                                    estimator_name='demo').plot()
AUCpr = auc(x=recall, y=prec)

#%% Combining the display objects (ROC and PR) into a single plot
import matplotlib.pyplot as plt
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))

roc_display.plot(ax=ax1)
pr_display.plot(ax=ax2)
plt.show()
Ejemplo n.º 29
0
 def precision_recall(self, y_true, y_pred, average_precision):
     precisions, recalls, _ = precision_recall_curve(y_true, y_pred, pos_label=5)
     PrecisionRecallDisplay(precision = precisions, recall = recalls, average_precision = average_precision, estimator_name = "AP").plot()
     plt.title('Metrique Precision & Recall')
     plt.savefig('static/images/outputs/metriquePR.png', dpi=100)
     plt.clf()
                              tpr=tpr,
                              roc_auc=AUC,
                              estimator_name='logestic regression').plot()
'''
Here we apply percesion and recall curve to see how much both variables changes when we change the threshold
'''
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import PrecisionRecallDisplay
from sklearn.metrics import average_precision_score

average_precision = average_precision_score(y_test, y_score, pos_label='1')

prec, recall, _ = precision_recall_curve(y_test,
                                         y_score,
                                         pos_label=pipeline1.classes_[1])
pr_display = PrecisionRecallDisplay(
    precision=prec,
    recall=recall,
    average_precision=average_precision,
    estimator_name='logestic regression').plot()

#%%
'''
Here we would like to take advantage of the classification_report module in sklearn
'''

from sklearn.metrics import classification_report

class_matrix = classification_report(y_test, y_pred)
#%%