Ejemplo n.º 1
0
def log_ks_statistic(y_true,
                     y_pred,
                     experiment=None,
                     channel_name='metric_charts',
                     prefix=''):
    """Creates and logs KS statistics curve and KS statistics score to Neptune.

    Kolmogorov-Smirnov statistics chart can be calculated for true positive rates (TPR) and true negative rates (TNR)
    for each threshold and plotted on a chart.
    The maximum distance from TPR to TNR can be treated as performance metric.

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test::

            from sklearn.datasets import make_classification
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report

            X, y = make_classification(n_samples=2000)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

            model = RandomForestClassifier()
            model.fit(X_train, y_train)

            y_test_pred = model.predict_proba(X_test)

        Create and log KS statistics curve and KS statistics score to Neptune::

            import neptune
            from neptunecontrib.monitoring.metrics import log_ks_statistic

            neptune.init()
            with neptune.create_experiment():
                log_ks_statistic(y_test, y_test_pred)

        Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs.

    """
    assert len(
        y_pred.shape
    ) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'

    _exp = experiment if experiment else neptune

    res = binary_ks_curve(y_true, y_pred[:, 1])
    ks_stat = res[3]
    _exp.log_metric(prefix + 'ks_statistic', ks_stat)

    fig, ax = plt.subplots()
    plt_metrics.plot_ks_statistic(y_true, y_pred, ax=ax)
    send_figure(fig, channel_name=prefix + channel_name, experiment=_exp)
    plt.close()
Ejemplo n.º 2
0
 def test_ax(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, self.y)
     probas = clf.predict_proba(self.X)
     fig, ax = plt.subplots(1, 1)
     out_ax = plot_ks_statistic(self.y, probas)
     assert ax is not out_ax
     out_ax = plot_ks_statistic(self.y, probas, ax=ax)
     assert ax is out_ax
Ejemplo n.º 3
0
def plot_analysis(combine,
                  test_name,
                  y_true,
                  y_pred,
                  y_proba,
                  labels,
                  verbose,
                  library,
                  save=True,
                  show=True,
                  sessionid="testing",
                  prefix=""):

    met_index = 0
    plt.rcParams.update({'font.size': 14})
    # TODO: Find a way to do this better
    pltmetrics.plot_confusion_matrix(y_true, y_pred)
    if not combine:
        #plt.gcf().set_size_inches(3.65,3.65)
        save_show(plt, library + "/" + prefix, sessionid, "confusion_matrix",
                  show, save, False, True, True, False)
    else:
        plt.subplot(2, 4, met_index + 1)
    met_index += 1

    plt.rcParams.update({'font.size': 12})
    pltmetrics.plot_roc_curve(y_true, y_proba)
    for text in plt.gca().legend_.get_texts():
        text.set_text(text.get_text().replace("ROC curve of class", "class"))
        text.set_text(text.get_text().replace("area =", "AUC: "))
        text.set_text(text.get_text().replace("micro-average ROC curve",
                                              "micro-avg"))
        text.set_text(text.get_text().replace("macro-average ROC curve",
                                              "macro-avg"))
    if not combine:
        #plt.gcf().set_size_inches(3.65,3.65)
        save_show(plt, library + "/" + prefix, sessionid, "roc_curves", show,
                  save, False, True, True, False)
    else:
        plt.subplot(2, 4, met_index + 1)
    met_index += 1

    if len(labels) < 3:
        pltmetrics.plot_ks_statistic(y_true, y_proba)
        if not combine:
            #plt.gcf().set_size_inches(3.65,3.65)
            save_show(plt, library + "/" + prefix, sessionid, "ks_statistics",
                      show, save, False, True, True, False)
        else:
            plt.subplot(2, 4, met_index + 1)
        met_index += 1

    pltmetrics.plot_precision_recall_curve(y_true, y_proba)
    for text in plt.gca().legend_.get_texts():
        text.set_text(text.get_text().replace(
            "Precision-recall curve of class", "class"))
        text.set_text(text.get_text().replace("area =", "AUC: "))
        text.set_text(text.get_text().replace(
            "micro-average Precision-recall curve", "micro-avg"))
        text.set_text(text.get_text().replace("macro-average Precision-recall",
                                              "macro-avg"))
    if not combine:
        #plt.gcf().set_size_inches(3.65,3.65)
        save_show(plt, library + "/" + prefix, sessionid,
                  "precision_recall_curve", show, save, False, True, True,
                  False)
    else:
        plt.subplot(2, 4, met_index + 1)
    met_index += 1

    if len(labels) < 3:
        pltmetrics.plot_cumulative_gain(y_true, y_proba)
        if not combine:
            #plt.gcf().set_size_inches(3.65,3.65)
            save_show(plt, library + "/" + prefix, sessionid,
                      "cumulative_gain", show, save, False, True, True, False)
        else:
            plt.subplot(2, 4, met_index + 1)
        met_index += 1

    if len(labels) < 3:
        pltmetrics.plot_lift_curve(y_true, y_proba)
        if not combine:
            #plt.gcf().set_size_inches(3.65,3.65)
            save_show(plt, library + "/" + prefix, sessionid, "lift_curve",
                      show, save, False, True, True, False)
        else:
            plt.subplot(2, 4, met_index + 1)
        met_index += 1

    if combine:
        plt.suptitle(test_name)
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        save_show(plt,
                  library,
                  sessionid,
                  figname,
                  show,
                  save,
                  True,
                  analysis=True)
Ejemplo n.º 4
0
 def test_array_like(self):
     plot_ks_statistic([0, 1], [[0.8, 0.2], [0.2, 0.8]])
     plot_ks_statistic([0, 'a'], [[0.8, 0.2], [0.2, 0.8]])
     plot_ks_statistic(['b', 'a'], [[0.8, 0.2], [0.2, 0.8]])
Ejemplo n.º 5
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, convert_labels_into_string(self.y))
     probas = clf.predict_proba(self.X)
     plot_ks_statistic(convert_labels_into_string(self.y), probas)