def log_ks_statistic(y_true, y_pred, experiment=None, channel_name='metric_charts', prefix=''): """Creates and logs KS statistics curve and KS statistics score to Neptune. Kolmogorov-Smirnov statistics chart can be calculated for true positive rates (TPR) and true negative rates (TNR) for each threshold and plotted on a chart. The maximum distance from TPR to TNR can be treated as performance metric. Args: y_true (array-like, shape (n_samples)): Ground truth (correct) target values. y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. channel_name(str): name of the neptune channel. Default is 'metric_charts'. prefix(str): Prefix that will be added before metric name when logged to Neptune. Examples: Train the model and make predictions on test:: from sklearn.datasets import make_classification from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report X, y = make_classification(n_samples=2000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) model = RandomForestClassifier() model.fit(X_train, y_train) y_test_pred = model.predict_proba(X_test) Create and log KS statistics curve and KS statistics score to Neptune:: import neptune from neptunecontrib.monitoring.metrics import log_ks_statistic neptune.init() with neptune.create_experiment(): log_ks_statistic(y_test, y_test_pred) Check out this experiment https://ui.neptune.ai/o/neptune-ai/org/binary-classification-metrics/e/BIN-101/logs. """ assert len( y_pred.shape ) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' _exp = experiment if experiment else neptune res = binary_ks_curve(y_true, y_pred[:, 1]) ks_stat = res[3] _exp.log_metric(prefix + 'ks_statistic', ks_stat) fig, ax = plt.subplots() plt_metrics.plot_ks_statistic(y_true, y_pred, ax=ax) send_figure(fig, channel_name=prefix + channel_name, experiment=_exp) plt.close()
def test_ax(self): np.random.seed(0) clf = LogisticRegression() clf.fit(self.X, self.y) probas = clf.predict_proba(self.X) fig, ax = plt.subplots(1, 1) out_ax = plot_ks_statistic(self.y, probas) assert ax is not out_ax out_ax = plot_ks_statistic(self.y, probas, ax=ax) assert ax is out_ax
def plot_analysis(combine, test_name, y_true, y_pred, y_proba, labels, verbose, library, save=True, show=True, sessionid="testing", prefix=""): met_index = 0 plt.rcParams.update({'font.size': 14}) # TODO: Find a way to do this better pltmetrics.plot_confusion_matrix(y_true, y_pred) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "confusion_matrix", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 plt.rcParams.update({'font.size': 12}) pltmetrics.plot_roc_curve(y_true, y_proba) for text in plt.gca().legend_.get_texts(): text.set_text(text.get_text().replace("ROC curve of class", "class")) text.set_text(text.get_text().replace("area =", "AUC: ")) text.set_text(text.get_text().replace("micro-average ROC curve", "micro-avg")) text.set_text(text.get_text().replace("macro-average ROC curve", "macro-avg")) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "roc_curves", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 if len(labels) < 3: pltmetrics.plot_ks_statistic(y_true, y_proba) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "ks_statistics", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 pltmetrics.plot_precision_recall_curve(y_true, y_proba) for text in plt.gca().legend_.get_texts(): text.set_text(text.get_text().replace( "Precision-recall curve of class", "class")) text.set_text(text.get_text().replace("area =", "AUC: ")) text.set_text(text.get_text().replace( "micro-average Precision-recall curve", "micro-avg")) text.set_text(text.get_text().replace("macro-average Precision-recall", "macro-avg")) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "precision_recall_curve", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 if len(labels) < 3: pltmetrics.plot_cumulative_gain(y_true, y_proba) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "cumulative_gain", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 if len(labels) < 3: pltmetrics.plot_lift_curve(y_true, y_proba) if not combine: #plt.gcf().set_size_inches(3.65,3.65) save_show(plt, library + "/" + prefix, sessionid, "lift_curve", show, save, False, True, True, False) else: plt.subplot(2, 4, met_index + 1) met_index += 1 if combine: plt.suptitle(test_name) plt.tight_layout(rect=[0, 0.03, 1, 0.95]) save_show(plt, library, sessionid, figname, show, save, True, analysis=True)
def test_array_like(self): plot_ks_statistic([0, 1], [[0.8, 0.2], [0.2, 0.8]]) plot_ks_statistic([0, 'a'], [[0.8, 0.2], [0.2, 0.8]]) plot_ks_statistic(['b', 'a'], [[0.8, 0.2], [0.2, 0.8]])
def test_string_classes(self): np.random.seed(0) clf = LogisticRegression() clf.fit(self.X, convert_labels_into_string(self.y)) probas = clf.predict_proba(self.X) plot_ks_statistic(convert_labels_into_string(self.y), probas)