def binary_classifier_quality(model, X_test, Y_test): """ Meant for binary classification. If `model` is Grid it uses best model. """ if isinstance(model, GridSearchCV): result = pd.DataFrame( {k: model.cv_results_[k] for k in \ ['params', 'mean_test_score', 'std_test_score', 'rank_test_score']} ) print(result) print() print(f"best params: {model.best_params_}") print("best score: {:f}".format(model.best_score_)) print() Y_hat = model.predict(X_test) print("Confusion matrix (true x pred):") print(confusion_matrix(Y_test, Y_hat)) print("Sensitivity: {:f}".format( sum(Y_hat[Y_test==1]) / sum(Y_test) )) print("Specificity: {:f}".format( sum(1 - Y_hat[Y_test==0]) / sum(Y_test==0))) print("Accuracy score on test data: {:f}".format( accuracy_score(Y_test, Y_hat) )) print("F1 score on test data: {:f}".format( f1_score(Y_test, Y_hat) )) #print(confusion_matrix(grid.predict(X_test), y_test)) ConfusionMatrixDisplay.from_predictions(Y_test, Y_hat) RocCurveDisplay.from_estimator(model, X_test, Y_test)
def evaluate_roc(y_true, y_pred, method, plot=True): '''A quick helper for ad-hoc ROC analysis, more seasoned comparison later in R.''' fpr, tpr, thresholds = roc_curve(y_true, y_pred) roc_auc = auc(fpr, tpr) # best point on the ROC curve --> Youden's J J = tpr - fpr best_ind = np.argmax(J) best_threshold = thresholds[best_ind] print(f'Best threshold: < {np.round(best_threshold,3)} --> negative') # compute precision and recall at that threshold binarized = (y_pred >= best_threshold).astype(int) recall = recall_score(y_true, binarized) precision = precision_score(y_true, binarized) print( f'Recall = {np.round(recall,3)}, Precision = {np.round(precision,3)}') if plot: viz = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=method) viz.plot() plt.show() print(f'AUC: {np.round(roc_auc,3)}') return best_threshold
def loo_roc_curve_plot(clf, x, y): from functools import partial def loo_proba(i, x, y, clf): idx = list(range(len(y))) idx.pop(i) clf.fit(x[idx, :], y[idx]) return clf.predict_proba(x[[i], :])[0, 1] func_ = partial(loo_proba, x=x, y=y, clf=clf) y_proba = [func_(i) for i in range(len(y))] fpr, tpr, _ = roc_curve(y, y_proba) roc_auc = auc(fpr, tpr) name = clf.__class__.__name__ ax = plt.figure().gca() ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8) viz = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=name) return viz.plot(name=name, ax=ax)
class RocAucCurve(CurveFabric): def __init__(self, col_score, col_target, name=None, **kwargs): super().__init__(col_score, col_target, name=name) self.fpr = None self.tpr = None self.roc_auc = None def fit(self, df): self.fpr, self.tpr, _ = roc_curve(df[self.col_target], df[self.col_score]) self.roc_auc = auc(self.fpr, self.tpr) return self def plot(self, ax=None, title=None, **kwargs): if ax is None: fig, ax = plt.subplots() self.ax = ax self.viz = RocCurveDisplay(fpr=self.fpr, tpr=self.tpr, roc_auc=self.roc_auc * 100, estimator_name=self.name) if title: ax.set_title(title, fontsize=14, fontweight='bold') self.viz.plot(ax=ax, name=self.name, **kwargs) return self
def sklearn_visualizations(): import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.svm import SVC from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import RocCurveDisplay from sklearn import datasets # data X, y = datasets.load_wine(return_X_y=True) y = y == 2 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng) # svm svc = SVC(random_state=rng) svc.fit(X_train, y_train) svc_disp = RocCurveDisplay.from_estimator(svc, X_test, y_test) plt.show() # random forest rfc = RandomForestClassifier(random_state=rng) rfc.fit(X_train, y_train) ax = plt.gca() rfc_disp = RocCurveDisplay.from_estimator(rfc, X_test, y_test, ax=ax, alpha=0.8) svc_disp.plot(ax=ax, alpha=0.8) plt.show()
def evaluate(self, x_test, y_test): y_pred = self.model.predict(x_test) y_pred = [1 * (x[0] >= 0.5) for x in y_pred] print('MLP performance on test for', self.feature_name) print('Accuracy:', accuracy_score(y_test, y_pred), 'Precision:', precision_score(y_test, y_pred), 'Recall:', recall_score(y_test, y_pred)) # Confusion matrix cm = confusion_matrix(y_test, y_pred) cm_display = ConfusionMatrixDisplay(cm) # Precision recall precision, recall, _ = precision_recall_curve(y_test, y_pred) pr_display = PrecisionRecallDisplay(precision=precision, recall=recall) # Roc fpr, tpr, _ = roc_curve(y_test, y_pred) roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr) # Figure figure: Figure = plt.figure(1, figsize=(15, 6)) figure.suptitle('MLP on {}'.format(self.feature_name), fontsize=20) (ax1, ax2, ax3) = figure.subplots(1, 3) ax1.set_title('Confusion matrix') cm_display.plot(ax=ax1) ax2.set_title('Precision recall') pr_display.plot(ax=ax2) ax3.set_title('Roc curve') roc_display.plot(ax=ax3) file_name = '{}-mlp.png'.format(self.feature_name) figure.savefig( os.path.join(get_folder_path_from_root('images'), file_name)) plt.show()
def roc_curve_plot(clf, x, y): y_prob = clf.predict_proba(x)[:, 1] fpr, tpr, _ = roc_curve(y, y_prob) roc_auc = auc(fpr, tpr) name = clf.__class__.__name__ viz = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=name) return viz.plot(name=name)
def plot_roc_curves(train_roc_auc, split_roc_auc, valid_roc_auc): fig, ax = plt.subplots() for name, (roc_auc, auc) in [ ('train', train_roc_auc), ('split', split_roc_auc), ('valid', valid_roc_auc), ]: viz = RocCurveDisplay(fpr=[x[0] for x in auc], tpr=[x[1] for x in auc], roc_auc=roc_auc, estimator_name=name, pos_label=1.0) viz.plot(ax=ax, name=name) return fig
def get_roc_curve(self, gt_index=0, pred_index=1, display=True, model_name="autopilot-model") : y = self._y() yh = self._yh() fpr, tpr, thresholds = roc_curve(y, yh) roc_auc = auc(fpr, tpr) viz = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=model_name) if display : viz.plot() return viz, roc_auc, fpr, tpr, thresholds
def plot(self, ax=None, title=None, **kwargs): if ax is None: fig, ax = plt.subplots() self.ax = ax self.viz = RocCurveDisplay(fpr=self.fpr, tpr=self.tpr, roc_auc=self.roc_auc * 100, estimator_name=self.name) if title: ax.set_title(title, fontsize=14, fontweight='bold') self.viz.plot(ax=ax, name=self.name, **kwargs) return self
def plot_roc_curve(pred: torch.Tensor, label: torch.Tensor, name="example estimator"): pred, label = pred.detach().cpu().numpy().flatten(), label.detach().cpu().numpy().flatten() fpr, tpr, thresholds = roc_curve(label, pred) roc_auc = auc(fpr, tpr) display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=name) # calc optimal threshold idx = np.arange(len(tpr)) roc = pd.DataFrame({'tf': pd.Series(tpr - (1 - fpr), index=idx), 'threshold': pd.Series(thresholds, index=idx)}) roc_t = roc.iloc[(roc.tf - 0).abs().argsort()[:1]] display.plot() plt.show() return list(roc_t['threshold'])[0]
def plot(self, data_original_test): """"Plot ROC-AOC Curves of both original and synthetic in single figure""" X_test, y_test = self._split_xy(data_original_test) fig, ax = plt.subplots(1, 2, figsize=(14, 6)) sns.despine() # roc curve RocCurveDisplay.from_estimator(self.stats_original_, X_test, y_test, name=self.labels[0], color=COLOR_PALETTE[0], ax=ax[0]) RocCurveDisplay.from_estimator(self.stats_synthetic_, X_test, y_test, name=self.labels[1], color=COLOR_PALETTE[1], ax=ax[0]) ax[0].plot([0, 1], [0, 1], linestyle="--", lw=1, color="black", alpha=0.7) ax[0].set_title('ROC Curve') # pr curve PrecisionRecallDisplay.from_estimator(self.stats_original_, X_test, y_test, name=self.labels[0], color=COLOR_PALETTE[0], ax=ax[1]) PrecisionRecallDisplay.from_estimator(self.stats_synthetic_, X_test, y_test, name=self.labels[1], color=COLOR_PALETTE[1], ax=ax[1]) no_skill = len(y_test[y_test == 1]) / len(y_test) ax[1].plot([0, 1], [no_skill, no_skill], lw=1, linestyle='--', color='black', alpha=0.7) ax[1].set_title('Precision-Recall Curve')
def test_default_labels(pyplot, roc_auc, estimator_name, expected_label): fpr = np.array([0, 0.5, 1]) tpr = np.array([0, 0.5, 1]) disp = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=estimator_name).plot() assert disp.line_.get_label() == expected_label
def plot_roc(y_test_df, y_score, trained_pipeline): fpr, tpr, _ = roc_curve(y_test_df, y_score, pos_label=trained_pipeline.classes_[1]) RocCurveDisplay(fpr=fpr, tpr=tpr).plot() plt.title(f"AUC: {roc_auc_score(y_test_df, y_score)}") plt.tight_layout() plt.savefig(os.path.join(pass_success_model_eval_dir, "roc.png"))
def plot(self, ax=None, figsize=(10, 5)): if ax is None: fig, ax = plt.subplots(1, 1, figsize=figsize) ax.set_title("ROC Curve") possible_colors = GeneralUtils.shuffled_colors() for class_index, label in enumerate(self.labels): fpr, tpr = self._roc_curve[label]['fpr'], self._roc_curve[label][ 'tpr'] roc_auc = self.auc[label] viz = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='Classifier') viz.plot(ax=ax, name=label, color=possible_colors[class_index]) plt.draw()
def cv_roc_curve_plot(clf, x, y, cv): tprs = [] aucs = [] mean_fpr = np.linspace(0, 1, len(y)) fig, ax = plt.subplots() cver = StratifiedKFold(n_splits=cv) for i, (train_idx, test_idx) in enumerate(cver.split(x, y)): clf.fit(x[train_idx], y[train_idx]) viz = plot_roc_curve( clf, x[test_idx], y[test_idx], ax=ax, name=f"ROC fold {i}", alpha=.3, lw=1, ) interp_tpr = np.interp(mean_fpr, viz.fpr, viz.tpr) tprs.append(interp_tpr) aucs.append(viz.roc_auc) ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8) mean_fpr = np.linspace(0, 1, len(y)) mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) ax.plot(mean_fpr, mean_tpr, color='b', lw=2, alpha=.8, label=r'Mean ROC (AUC = %0.4f)' % mean_auc) std_tpr = np.std(tprs, axis=0) tprs_upper = np.minimum(mean_tpr + std_tpr, 1) tprs_lower = np.maximum(mean_tpr - std_tpr, 0) ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2, label=r'$\pm$ %0.4f std. dev.' % std_auc) ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="Receiver operating characteristic") ax.legend(loc="lower right") name = clf.__class__.__name__ viz = RocCurveDisplay(fpr=mean_fpr, tpr=mean_tpr, roc_auc=mean_auc, estimator_name=name) return viz
def test_roc_curve_display_default_labels(pyplot, roc_auc, estimator_name, expected_label): """Check the default labels used in the display.""" fpr = np.array([0, 0.5, 1]) tpr = np.array([0, 0.5, 1]) disp = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=estimator_name).plot() assert disp.line_.get_label() == expected_label
def test_roc_curve_display_complex_pipeline(pyplot, data_binary, clf, constructor_name): """Check the behaviour with complex pipeline.""" X, y = data_binary if constructor_name == "from_estimator": with pytest.raises(NotFittedError): RocCurveDisplay.from_estimator(clf, X, y) clf.fit(X, y) if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator(clf, X, y) name = clf.__class__.__name__ else: display = RocCurveDisplay.from_predictions(y, y) name = "Classifier" assert name in display.line_.get_label() assert display.estimator_name == name
def roc_curve(self, test_label=None, plot_type='test'): if test_label is not None: self.test_label = test_label if plot_type == 'test': predict = [self.y_pred] label = [self.test_label] elif plot_type == 'train': predict = [self.y_oof] label = [self.y_train] method_name = ['lgb'] fig = plt.figure(figsize=(6, 6)) ax = fig.add_subplot(1, 1, 1) for pred, label, method_name in zip(predict, label, method_name): fpr, tpr, thresholds = metrics.roc_curve(label, pred) auc = metrics.auc(fpr, tpr) roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=auc, estimator_name=method_name) roc_display.plot(ax=ax) ax.set_title('ROC curve : LightGBM', fontsize=16) plt.show()
def plot_roc_curves(self, level: shared.enums.EvaluationLevel, dataset_type: shared.enums.DatasetType, font_size: int = 10): """ Plots roc curves for each class in self._predictor.get_classes(). """ #set matplotlib font size globally plt.rcParams['font.size'] = font_size objs = self.__get_objects_according_to_evaluation_level( level=level, dataset_type=dataset_type) classes = self._predictor.get_classes() for Class in tqdm(classes): y_preds_raw = [] # list of the predicted percentages y_true = [] # list of True and False for obj in objs: # if the raw predictions contain NaN values, this is mostly because the wsi/case did not contain any tile # and therefore during prediction calculation a division by 0 resulted in NaN values # This is fixed in the latest version of the patient_manager. It now checks for tilesummaries, that do not # contain any top tile if (numpy.isnan(list(obj.predictions_raw.values())).any()): continue y_preds_raw.append(obj.predictions_raw[Class]) y_true.append((Class in obj.get_labels())) fpr, tpr, threshold = roc_curve(y_true, y_preds_raw, pos_label=1) roc_auc = auc(fpr, tpr) #plt.title(f'{Class}') #plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc) #plt.legend(loc = 'lower right') #plt.plot([0, 1], [0, 1],'r--') #plt.xlim([0, 1]) #plt.ylim([0, 1]) #plt.ylabel('True Positive Rate') #plt.xlabel('False Positive Rate') #plt.show() roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=Class).plot() #set matplotlib font size back to default plt.rcParams['font.size'] = 10
import matplotlib.pyplot as plt from sklearn.metrics import RocCurveDisplay fig, ax = plt.subplots() models = [ ("RT embedding -> LR", rt_model), ("RF", random_forest), ("RF embedding -> LR", rf_model), ("GBDT", gradient_boosting), ("GBDT embedding -> LR", gbdt_model), ] model_displays = {} for name, pipeline in models: model_displays[name] = RocCurveDisplay.from_estimator(pipeline, X_test, y_test, ax=ax, name=name) _ = ax.set_title("ROC curve") # %% fig, ax = plt.subplots() for name, pipeline in models: model_displays[name].plot(ax=ax) ax.set_xlim(0, 0.2) ax.set_ylim(0.8, 1) _ = ax.set_title("ROC curve (zoomed in at top left)")
# Make up some plots from sklearn.metrics import plot_roc_curve from sklearn.metrics import roc_curve, auc, RocCurveDisplay from sklearn.metrics import plot_confusion_matrix from sklearn.metrics import confusion_matrix from sklearn.metrics import ConfusionMatrixDisplay for model_name, model in models.items(): print("Model: ", model_name) # ROC Curve if model_name == "elasticnetlinear": fpr, tpr, thresholds = roc_curve(model[1], y_test) roc_auc = auc(fpr, tpr) svc_disp = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name='Elastic Net Linear') svc_disp.plot() plt.title( f"ROC Curve of {PREDICTION_PHENO} by {model_name} (Scaled)") plt.savefig( f"finalplots/scaledroc_{PREDICTION_PHENO}_{model_name}.png", dpi=600, transparent=True, bbox_inches="tight", pad_inches=0.3) else: if model_name == "rbfsvmapprox": svc_disp = plot_roc_curve(
y_pred_proba = final_model.predict_proba(x_test)[:, 1] y_pred = final_model.predict(x_test) fraction_of_positives, mean_predicted_value = calibration_curve( np.array(y_test), y_pred_proba, strategy='uniform', n_bins=20) plt.figure() plt.plot(mean_predicted_value, fraction_of_positives, "sr-") plt.title("Calibration") plt.xlabel("mean_predicted_value") plt.ylabel("fraction_of_positives") fpr, tpr, _ = roc_curve(y_test, y_pred_proba) roc_auc = auc(fpr, tpr) display = RocCurveDisplay(fpr=fpr, tpr=tpr, roc_auc=roc_auc, estimator_name=None) display.plot() plt.title("ROC") range_class = np.linspace(np.min(y_pred_proba), np.max(y_pred_proba), 100) range_class = np.delete(range_class, 0) range_class = np.delete(range_class, -1) PPV = np.zeros(len(range_class)) NPV = np.zeros(len(range_class)) j = 0 for i in range_class: PPV[j] = precision_score(y_test, y_pred_proba > i, pos_label=1) NPV[j] = precision_score(y_test, y_pred_proba > i, pos_label=0) j += 1 plt.figure()
cm_display = ConfusionMatrixDisplay(cm).plot() # %% # Create :class:`RocCurveDisplay` ############################################################################## # The roc curve requires either the probabilities or the non-thresholded # decision values from the estimator. Since the logistic regression provides # a decision function, we will use it to plot the roc curve: from sklearn.metrics import roc_curve from sklearn.metrics import RocCurveDisplay y_score = clf.decision_function(X_test) fpr, tpr, _ = roc_curve(y_test, y_score, pos_label=clf.classes_[1]) roc_display = RocCurveDisplay(fpr=fpr, tpr=tpr).plot() # %% # Create :class:`PrecisionRecallDisplay` ############################################################################## # Similarly, the precision recall curve can be plotted using `y_score` from # the prevision sections. from sklearn.metrics import precision_recall_curve from sklearn.metrics import PrecisionRecallDisplay prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=clf.classes_[1]) pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot() # %% # Combining the display objects into a single plot ##############################################################################
fpr, tpr, thresholds = roc_curve(y[test], y_proba[:, pos_label_idx], pos_label=pos_label) mean_tpr += np.interp(mean_fpr, fpr, tpr) mean_tpr[0] = 0.0 mean_tpr /= cv.get_n_splits(X, y) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) # Create a display that we will reuse to make the aggregated plots for # all methods disp.append( RocCurveDisplay( fpr=mean_fpr, tpr=mean_tpr, roc_auc=mean_auc, estimator_name=f"{model[0].__class__.__name__}", )) # %% [markdown] # In the previous cell, we created the different mean ROC curve and we can plot # them on the same plot. # %% fig, ax = plt.subplots(figsize=(9, 9)) for d in disp: d.plot(ax=ax, linestyle="--") ax.plot([0, 1], [0, 1], linestyle="--", color="k") ax.axis("square") fig.suptitle("Comparison of over-sampling methods with a 3NN classifier") ax.set_xlim([0, 1])
X, y = load_wine(return_X_y=True) y = y == 2 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) svc = SVC(random_state=42) svc.fit(X_train, y_train) # %% # Plotting the ROC Curve # ---------------------- # Next, we plot the ROC curve with a single call to # :func:`sklearn.metrics.RocCurveDisplay.from_estimator`. The returned # `svc_disp` object allows us to continue using the already computed ROC curve # for the SVC in future plots. svc_disp = RocCurveDisplay.from_estimator(svc, X_test, y_test) plt.show() # %% # Training a Random Forest and Plotting the ROC Curve # --------------------------------------------------- # We train a random forest classifier and create a plot comparing it to the SVC # ROC curve. Notice how `svc_disp` uses # :func:`~sklearn.metrics.RocCurveDisplay.plot` to plot the SVC ROC curve # without recomputing the values of the roc curve itself. Furthermore, we # pass `alpha=0.8` to the plot functions to adjust the alpha values of the # curves. rfc = RandomForestClassifier(n_estimators=10, random_state=42) rfc.fit(X_train, y_train) ax = plt.gca() rfc_disp = RocCurveDisplay.from_estimator(rfc,
def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name): # check that we can provide the positive label and display the proper # statistics X, y = load_breast_cancer(return_X_y=True) # create an highly imbalanced idx_positive = np.flatnonzero(y == 1) idx_negative = np.flatnonzero(y == 0) idx_selected = np.hstack([idx_negative, idx_positive[:25]]) X, y = X[idx_selected], y[idx_selected] X, y = shuffle(X, y, random_state=42) # only use 2 features to make the problem even harder X = X[:, :2] y = np.array(["cancer" if c == 1 else "not cancer" for c in y], dtype=object) X_train, X_test, y_train, y_test = train_test_split( X, y, stratify=y, random_state=0, ) classifier = LogisticRegression() classifier.fit(X_train, y_train) # sanity check to be sure the positive class is classes_[0] and that we # are betrayed by the class imbalance assert classifier.classes_.tolist() == ["cancer", "not cancer"] y_pred = getattr(classifier, response_method)(X_test) # we select the correcponding probability columns or reverse the decision # function otherwise y_pred_cancer = -1 * y_pred if y_pred.ndim == 1 else y_pred[:, 0] y_pred_not_cancer = y_pred if y_pred.ndim == 1 else y_pred[:, 1] if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator( classifier, X_test, y_test, pos_label="cancer", response_method=response_method, ) else: display = RocCurveDisplay.from_predictions( y_test, y_pred_cancer, pos_label="cancer", ) roc_auc_limit = 0.95679 assert display.roc_auc == pytest.approx(roc_auc_limit) assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit) if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator( classifier, X_test, y_test, response_method=response_method, pos_label="not cancer", ) else: display = RocCurveDisplay.from_predictions( y_test, y_pred_not_cancer, pos_label="not cancer", ) assert display.roc_auc == pytest.approx(roc_auc_limit) assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
import numpy as np y = np.array([0, 1, 1, 0, 1, 0, 1, 0, 0, 1]) y_score = np.array([0.7, 0.8, 0.3, 0.4, 0.9, 0.6, 0.99, 0.1, 0.2, 0.5]) from sklearn.metrics import roc_curve, RocCurveDisplay, auc my_fpr, my_tpr, _ = roc_curve(y_true=y, y_score=y_score, pos_label=1) RocCurveDisplay(fpr=my_fpr, tpr=my_tpr).plot() import matplotlib.pyplot as plt plt.savefig('10-p-roc.pdf')
def test_roc_curve_display_plotting( pyplot, response_method, data_binary, with_sample_weight, drop_intermediate, with_strings, constructor_name, default_name, ): """Check the overall plotting behaviour.""" X, y = data_binary pos_label = None if with_strings: y = np.array(["c", "b"])[y] pos_label = "c" if with_sample_weight: rng = np.random.RandomState(42) sample_weight = rng.randint(1, 4, size=(X.shape[0])) else: sample_weight = None lr = LogisticRegression() lr.fit(X, y) y_pred = getattr(lr, response_method)(X) y_pred = y_pred if y_pred.ndim == 1 else y_pred[:, 1] if constructor_name == "from_estimator": display = RocCurveDisplay.from_estimator( lr, X, y, sample_weight=sample_weight, drop_intermediate=drop_intermediate, pos_label=pos_label, alpha=0.8, ) else: display = RocCurveDisplay.from_predictions( y, y_pred, sample_weight=sample_weight, drop_intermediate=drop_intermediate, pos_label=pos_label, alpha=0.8, ) fpr, tpr, _ = roc_curve( y, y_pred, sample_weight=sample_weight, drop_intermediate=drop_intermediate, pos_label=pos_label, ) assert_allclose(display.roc_auc, auc(fpr, tpr)) assert_allclose(display.fpr, fpr) assert_allclose(display.tpr, tpr) assert display.estimator_name == default_name import matplotlib as mpl # noqal assert isinstance(display.line_, mpl.lines.Line2D) assert display.line_.get_alpha() == 0.8 assert isinstance(display.ax_, mpl.axes.Axes) assert isinstance(display.figure_, mpl.figure.Figure) expected_label = f"{default_name} (AUC = {display.roc_auc:.2f})" assert display.line_.get_label() == expected_label expected_pos_label = 1 if pos_label is None else pos_label expected_ylabel = f"True Positive Rate (Positive label: {expected_pos_label})" expected_xlabel = f"False Positive Rate (Positive label: {expected_pos_label})" assert display.ax_.get_ylabel() == expected_ylabel assert display.ax_.get_xlabel() == expected_xlabel
import pandas as pd from sklearn import tree from sklearn.metrics import roc_curve, RocCurveDisplay, auc from sklearn.pipeline import Pipeline from sklearn.preprocessing import OneHotEncoder my_url = ('https://raw.githubusercontent.com' '/taroyabuki/fromzero/master/data/titanic.csv') my_data = pd.read_csv(my_url) X, y = my_data.iloc[:, 0:3], my_data.Survived my_pipeline = Pipeline([ ('ohe', OneHotEncoder(drop='first')), ('tree', tree.DecisionTreeClassifier(max_depth=2, min_impurity_decrease=0.01)) ]) my_pipeline.fit(X, y) tmp = pd.DataFrame(my_pipeline.predict_proba(X), columns=my_pipeline.classes_) y_score = tmp.Yes my_fpr, my_tpr, _ = roc_curve(y_true=y, y_score=y_score, pos_label='Yes') my_auc = auc(x=my_fpr, y=my_tpr) RocCurveDisplay(fpr=my_fpr, tpr=my_tpr, roc_auc=my_auc).plot() import matplotlib.pyplot as plt plt.savefig('10-p-titanic-roc.pdf')