Ejemplo n.º 1
0
 def test_ax(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, self.y)
     probas = clf.predict_proba(self.X)
     fig, ax = plt.subplots(1, 1)
     out_ax = plot_lift_curve(self.y, probas)
     assert ax is not out_ax
     out_ax = plot_lift_curve(self.y, probas, ax=ax)
     assert ax is out_ax
Ejemplo n.º 2
0
def log_lift_curve(y_true,
                   y_pred,
                   experiment=None,
                   channel_name='metric_charts',
                   prefix=''):
    """Creates cumulative gain chart and logs it to Neptune.

    Args:
        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
        prefix(str): Prefix that will be added before metric name when logged to Neptune.

    Examples:
        Train the model and make predictions on test::

            from sklearn.datasets import make_classification
            from sklearn.ensemble import RandomForestClassifier
            from sklearn.model_selection import train_test_split
            from sklearn.metrics import classification_report

            X, y = make_classification(n_samples=2000)
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

            model = RandomForestClassifier()
            model.fit(X_train, y_train)

            y_test_pred = model.predict_proba(X_test)

        Create and log lift curve chart to Neptune::

            import neptune
            from neptunecontrib.monitoring.metrics import log_lift_curve

            neptune.init()
            with neptune.create_experiment():
                log_lift_curve(y_test, y_test_pred)

        Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs.

    """
    assert len(
        y_pred.shape
    ) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'

    _exp = experiment if experiment else neptune

    expect_not_a_run(_exp)

    fig, ax = plt.subplots()
    plt_metrics.plot_lift_curve(y_true, y_pred, ax=ax)
    send_figure(fig, channel_name=prefix + channel_name, experiment=_exp)
    plt.close()
Ejemplo n.º 3
0
    def lift_gain_curves(self):

        y_pred_proba_both_classes = np.column_stack(
            [1 - self.y_pred_proba, self.y_pred_proba])
        gain = plot_cumulative_gain(self.y_test,
                                    y_pred_proba_both_classes,
                                    title='Cumulative Gains Curve')
        plt.show()

        lift = plot_lift_curve(self.y_test,
                               y_pred_proba_both_classes,
                               title='Lift curve')
        plt.show()
Ejemplo n.º 4
0
def plot_analysis(combine,
                  test_name,
                  y_true,
                  y_pred,
                  y_proba,
                  labels,
                  verbose,
                  library,
                  save=True,
                  show=True,
                  sessionid="testing",
                  prefix=""):

    met_index = 0
    plt.rcParams.update({'font.size': 14})
    # TODO: Find a way to do this better
    pltmetrics.plot_confusion_matrix(y_true, y_pred)
    if not combine:
        #plt.gcf().set_size_inches(3.65,3.65)
        save_show(plt, library + "/" + prefix, sessionid, "confusion_matrix",
                  show, save, False, True, True, False)
    else:
        plt.subplot(2, 4, met_index + 1)
    met_index += 1

    plt.rcParams.update({'font.size': 12})
    pltmetrics.plot_roc_curve(y_true, y_proba)
    for text in plt.gca().legend_.get_texts():
        text.set_text(text.get_text().replace("ROC curve of class", "class"))
        text.set_text(text.get_text().replace("area =", "AUC: "))
        text.set_text(text.get_text().replace("micro-average ROC curve",
                                              "micro-avg"))
        text.set_text(text.get_text().replace("macro-average ROC curve",
                                              "macro-avg"))
    if not combine:
        #plt.gcf().set_size_inches(3.65,3.65)
        save_show(plt, library + "/" + prefix, sessionid, "roc_curves", show,
                  save, False, True, True, False)
    else:
        plt.subplot(2, 4, met_index + 1)
    met_index += 1

    if len(labels) < 3:
        pltmetrics.plot_ks_statistic(y_true, y_proba)
        if not combine:
            #plt.gcf().set_size_inches(3.65,3.65)
            save_show(plt, library + "/" + prefix, sessionid, "ks_statistics",
                      show, save, False, True, True, False)
        else:
            plt.subplot(2, 4, met_index + 1)
        met_index += 1

    pltmetrics.plot_precision_recall_curve(y_true, y_proba)
    for text in plt.gca().legend_.get_texts():
        text.set_text(text.get_text().replace(
            "Precision-recall curve of class", "class"))
        text.set_text(text.get_text().replace("area =", "AUC: "))
        text.set_text(text.get_text().replace(
            "micro-average Precision-recall curve", "micro-avg"))
        text.set_text(text.get_text().replace("macro-average Precision-recall",
                                              "macro-avg"))
    if not combine:
        #plt.gcf().set_size_inches(3.65,3.65)
        save_show(plt, library + "/" + prefix, sessionid,
                  "precision_recall_curve", show, save, False, True, True,
                  False)
    else:
        plt.subplot(2, 4, met_index + 1)
    met_index += 1

    if len(labels) < 3:
        pltmetrics.plot_cumulative_gain(y_true, y_proba)
        if not combine:
            #plt.gcf().set_size_inches(3.65,3.65)
            save_show(plt, library + "/" + prefix, sessionid,
                      "cumulative_gain", show, save, False, True, True, False)
        else:
            plt.subplot(2, 4, met_index + 1)
        met_index += 1

    if len(labels) < 3:
        pltmetrics.plot_lift_curve(y_true, y_proba)
        if not combine:
            #plt.gcf().set_size_inches(3.65,3.65)
            save_show(plt, library + "/" + prefix, sessionid, "lift_curve",
                      show, save, False, True, True, False)
        else:
            plt.subplot(2, 4, met_index + 1)
        met_index += 1

    if combine:
        plt.suptitle(test_name)
        plt.tight_layout(rect=[0, 0.03, 1, 0.95])
        save_show(plt,
                  library,
                  sessionid,
                  figname,
                  show,
                  save,
                  True,
                  analysis=True)
Ejemplo n.º 5
0

##############################################################################
### making ROC Curve
##############################################################################

def plot_roc_curve(fpr, tpr, label = None):
    plt.plot(fpr, tpr, linewidth=2, label = label)
    plt.plot([0,1], [0,1], "k--")
    plt.axis([0,1,0,1])
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    
plot_roc_curve(fpr, tpr)
plt.show()


# ### Our high recall rate attributes to a fairly nice ROC curve. 

# In[82]:


##############################################################################
### SVM Lift Curve
##############################################################################

y_probas = clf.predict_proba(test_xn)
plot_lift_curve(test_y, y_probas)
plt.show()

Ejemplo n.º 6
0
 def test_array_like(self):
     plot_lift_curve([0, 1], [[0.8, 0.2], [0.2, 0.8]])
     plot_lift_curve([0, 'a'], [[0.8, 0.2], [0.2, 0.8]])
     plot_lift_curve(['b', 'a'], [[0.8, 0.2], [0.2, 0.8]])
Ejemplo n.º 7
0
 def test_string_classes(self):
     np.random.seed(0)
     clf = LogisticRegression()
     clf.fit(self.X, convert_labels_into_string(self.y))
     probas = clf.predict_proba(self.X)
     plot_lift_curve(convert_labels_into_string(self.y), probas)
Ejemplo n.º 8
0
#Print Area Under Curve
plt.figure()
false_positive_rate, recall, thresholds = roc_curve(target_bi1_valid, predictions_lgbm_prob)
roc_auc = auc(false_positive_rate, recall)
plt.title('Receiver Operating Characteristic (ROC)')
plt.plot(false_positive_rate, recall, 'b', label = 'AUC = %0.3f' %roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1], [0,1], 'r--')
plt.xlim([0.0,1.0])
plt.ylim([0.0,1.0])
plt.ylabel('Recall')
plt.xlabel('Fall-out (1-Specificity)')
plt.show()

print('AUC score:', roc_auc)

#Print Confusion Matrix
plt.figure()
# cm = confusion_matrix(target_bi1_valid, predictions_lgbm_01)
labels = ['0', '1']
plt.figure(figsize=(8,6))
sns.heatmap(cm, xticklabels = labels, yticklabels = labels, annot = True, fmt='d', cmap="Blues", vmin = 0.2);
plt.title('Confusion Matrix')
plt.ylabel('True Class')
plt.xlabel('Predicted Class')
plt.savefig('CM_LightGBM.png', dpi=300)
plt.show()

# Lift
plot_lift_curve(target_bi1_valid, pd.get_dummies(y_pred).to_numpy())