def test_plot_roc_curve(pyplot, response_method, data_binary, with_sample_weight, drop_intermediate): X, y = data_binary if with_sample_weight: rng = np.random.RandomState(42) sample_weight = rng.randint(1, 4, size=(X.shape[0])) else: sample_weight = None lr = LogisticRegression() lr.fit(X, y) viz = plot_roc_curve(lr, X, y, alpha=0.8, sample_weight=sample_weight, drop_intermediate=drop_intermediate) y_pred = getattr(lr, response_method)(X) if y_pred.ndim == 2: y_pred = y_pred[:, 1] fpr, tpr, _ = roc_curve(y, y_pred, sample_weight=sample_weight, drop_intermediate=drop_intermediate) assert_allclose(viz.roc_auc, auc(fpr, tpr)) assert_allclose(viz.fpr, fpr) assert_allclose(viz.tpr, tpr) assert viz.estimator_name == "LogisticRegression" # cannot fail thanks to pyplot fixture import matplotlib as mpl # noqal assert isinstance(viz.line_, mpl.lines.Line2D) assert viz.line_.get_alpha() == 0.8 assert isinstance(viz.ax_, mpl.axes.Axes) assert isinstance(viz.figure_, mpl.figure.Figure) expected_label = "LogisticRegression (AUC = {:0.2f})".format(viz.roc_auc) assert viz.line_.get_label() == expected_label assert viz.ax_.get_ylabel() == "True Positive Rate" assert viz.ax_.get_xlabel() == "False Positive Rate"
ax=ax) interp_tpr = interp(mean_fpr, viz.fpr, viz.tpr) interp_tpr[0] = 0.0 tprs.append(interp_tpr) aucs.append(viz.roc_auc) ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8) mean_tpr = np.mean(tprs, axis=0) mean_tpr[-1] = 1.0 mean_auc = auc(mean_fpr, mean_tpr) std_auc = np.std(aucs) ax.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc), lw=2, alpha=.8) std_tpr = np.std(tprs, axis=0) tprs_upper = np.minimum(mean_tpr + std_tpr, 1) tprs_lower = np.maximum(mean_tpr - std_tpr, 0) ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey',
x1 = lb.fit_transform(X[:, 1].astype(str)) x2 = lb.fit_transform(X[:, 2].astype(str)) x3 = lb.fit_transform(X[:, 3].astype(str)) X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]] y = (y != b'normal.').astype(int) if dataset_name == 'http' or dataset_name == 'smtp': y = (y != b'normal.').astype(int) X = X.astype(float) print('LocalOutlierFactor processing...') model = LocalOutlierFactor(n_neighbors=20) tstart = time() model.fit(X) fit_time = time() - tstart scoring = -model.negative_outlier_factor_ # the lower, the more normal fpr, tpr, thresholds = roc_curve(y, scoring) AUC = auc(fpr, tpr) plt.plot(fpr, tpr, lw=1, label=('ROC for %s (area = %0.3f, train-time: %0.2fs)' % (dataset_name, AUC, fit_time))) plt.xlim([-0.05, 1.05]) plt.ylim([-0.05, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('Receiver operating characteristic') plt.legend(loc="lower right") plt.show()
y, test_size=.5, random_state=0) # Learn to predict each class against the other classifier = OneVsRestClassifier( svm.SVC(kernel='linear', probability=True, random_state=random_state)) y_score = classifier.fit(X_train, y_train).decision_function(X_test) # Compute ROC curve and ROC area for each class fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], y_score[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) # Compute micro-average ROC curve and ROC area fpr["micro"], tpr["micro"], _ = roc_curve(y_test.ravel(), y_score.ravel()) roc_auc["micro"] = auc(fpr["micro"], tpr["micro"]) ############################################################################## # Plot of a ROC curve for a specific class plt.figure() lw = 2 plt.plot(fpr[2], tpr[2], color='darkorange', lw=lw, label='ROC curve (area = %0.2f)' % roc_auc[2]) plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
def plot_species_distribution(species=("bradypus_variegatus_0", "microryzomys_minutus_0")): """ Plot the species distribution. """ if len(species) > 2: print("Note: when more than two species are provided," " only the first two will be used") t0 = time() # Load the compressed data data = fetch_species_distributions() # Set up the data grid xgrid, ygrid = construct_grids(data) # The grid in x,y coordinates X, Y = np.meshgrid(xgrid, ygrid[::-1]) # create a bunch for each species BV_bunch = create_species_bunch(species[0], data.train, data.test, data.coverages, xgrid, ygrid) MM_bunch = create_species_bunch(species[1], data.train, data.test, data.coverages, xgrid, ygrid) # background points (grid coordinates) for evaluation np.random.seed(13) background_points = np.c_[np.random.randint(low=0, high=data.Ny, size=10000), np.random.randint(low=0, high=data.Nx, size=10000)].T # We'll make use of the fact that coverages[6] has measurements at all # land points. This will help us decide between land and water. land_reference = data.coverages[6] # Fit, predict, and plot for each species. for i, species in enumerate([BV_bunch, MM_bunch]): print("_" * 80) print("Modeling distribution of species '%s'" % species.name) # Standardize features mean = species.cov_train.mean(axis=0) std = species.cov_train.std(axis=0) train_cover_std = (species.cov_train - mean) / std # Fit OneClassSVM print(" - fit OneClassSVM ... ", end='') clf = svm.OneClassSVM(nu=0.1, kernel="rbf", gamma=0.5) clf.fit(train_cover_std) print("done.") # Plot map of South America plt.subplot(1, 2, i + 1) if basemap: print(" - plot coastlines using basemap") m = Basemap(projection='cyl', llcrnrlat=Y.min(), urcrnrlat=Y.max(), llcrnrlon=X.min(), urcrnrlon=X.max(), resolution='c') m.drawcoastlines() m.drawcountries() else: print(" - plot coastlines from coverage") plt.contour(X, Y, land_reference, levels=[-9998], colors="k", linestyles="solid") plt.xticks([]) plt.yticks([]) print(" - predict species distribution") # Predict species distribution using the training data Z = np.ones((data.Ny, data.Nx), dtype=np.float64) # We'll predict only for the land points. idx = np.where(land_reference > -9999) coverages_land = data.coverages[:, idx[0], idx[1]].T pred = clf.decision_function((coverages_land - mean) / std) Z *= pred.min() Z[idx[0], idx[1]] = pred levels = np.linspace(Z.min(), Z.max(), 25) Z[land_reference == -9999] = -9999 # plot contours of the prediction plt.contourf(X, Y, Z, levels=levels, cmap=plt.cm.Reds) plt.colorbar(format='%.2f') # scatter training/testing points plt.scatter(species.pts_train['dd long'], species.pts_train['dd lat'], s=2 ** 2, c='black', marker='^', label='train') plt.scatter(species.pts_test['dd long'], species.pts_test['dd lat'], s=2 ** 2, c='black', marker='x', label='test') plt.legend() plt.title(species.name) plt.axis('equal') # Compute AUC with regards to background points pred_background = Z[background_points[0], background_points[1]] pred_test = clf.decision_function((species.cov_test - mean) / std) scores = np.r_[pred_test, pred_background] y = np.r_[np.ones(pred_test.shape), np.zeros(pred_background.shape)] fpr, tpr, thresholds = metrics.roc_curve(y, scores) roc_auc = metrics.auc(fpr, tpr) plt.text(-35, -70, "AUC: %.3f" % roc_auc, ha="right") print("\n Area under the ROC curve : %f" % roc_auc) print("\ntime elapsed: %.2fs" % (time() - t0))
print("--- Preparing the plot elements...") if with_decision_function_histograms: fig, ax = plt.subplots(3, sharex=True, sharey=True) bins = np.linspace(-0.5, 0.5, 200) ax[0].hist(scoring, bins, color='black') ax[0].set_title('Decision function for %s dataset' % dat) ax[1].hist(scoring[y_test == 0], bins, color='b', label='normal data') ax[1].legend(loc="lower right") ax[2].hist(scoring[y_test == 1], bins, color='r', label='outliers') ax[2].legend(loc="lower right") # Show ROC Curves predict_time = time() - tstart fpr, tpr, thresholds = roc_curve(y_test, scoring) auc_score = auc(fpr, tpr) label = ('%s (AUC: %0.3f, train_time= %0.2fs, ' 'test_time= %0.2fs)' % (dat, auc_score, fit_time, predict_time)) # Print AUC score and train/test time: print(label) ax_roc.plot(fpr, tpr, lw=1, label=label) ax_roc.set_xlim([-0.05, 1.05]) ax_roc.set_ylim([-0.05, 1.05]) ax_roc.set_xlabel('False Positive Rate') ax_roc.set_ylabel('True Positive Rate') ax_roc.set_title('Receiver operating characteristic (ROC) curves') ax_roc.legend(loc="lower right") fig_roc.tight_layout() plt.show()