# Filepath and plot axes folder_path = os.path.dirname(os.path.realpath(__file__)) if os.path.basename(os.path.normpath(folder_path)) == "pycalib": folder_path += "/figures/latent_functions" folder_path += "/plots/latent_maps/" if not os.path.exists(folder_path): os.makedirs(folder_path) file_suffix = "_probs" if use_logits: file_suffix = "_logits" filename = folder_path + clf_comb_name + file_suffix fig, axes = texfig.subplots(width=7 / 3 * len(clf_names), ratio=.25 * 3 / len(clf_names), nrows=1, ncols=len(clf_names), w_pad=1) # Iterate through data sets, calibrate and plot latent functions for (i_clf, (Z, y, info_dict)) in enumerate(benchmark.data_gen()): # Train, test split cal_ind, test_ind = next(benchmark.cross_validator.split(Z, y)) Z_cal = Z[cal_ind, :] y_cal = y[cal_ind] Z_test = Z[test_ind, :] y_test = y[test_ind] hist_data = Z_cal.flatten() # Calibrate
"ECE_calib": ece_calib, "acc_calib": acc_calib, "NLL_calib": nll_calib }, open(os.path.join(dir_path, "metrics_calib.txt"), 'w')) # Load data from file metrics_train = json.load(open(os.path.join(dir_path, "metrics_train.txt"))) metrics_test = json.load(open(os.path.join(dir_path, "metrics_test.txt"))) metrics_calib = json.load(open(os.path.join(dir_path, "metrics_calib.txt"))) # Plot accuracy, ECE, logloss var_list = [("err", "error"), ("NLL", "NLL"), ("ECE", "$\\textup{ECE}_1$")] fig, axes = texfig.subplots(width=7, ratio=.2, nrows=1, ncols=3, w_pad=1) for i, (m, lab) in enumerate(var_list): axes[i].plot(metrics_train["iter"][2::], metrics_train[m][2::], label="train") axes[i].plot(metrics_test["iter"][2::], metrics_test[m][2::], label="test") axes[i].set_xlabel("epoch") axes[i].set_ylabel(lab) for j, metric_calib in enumerate([ 1 - metrics_calib["acc_calib"], metrics_calib["NLL_calib"], metrics_calib["ECE_calib"] ]):
def plot(self, file, metrics_list, width=None, height=None, scatter=False, confidence=True): """ Plots the given list of metrics for the active learning experiment. Parameters ---------- file : str Filename of resulting plot. metrics_list : list List of names of metrics to plot. Returns ------- """ # Initialization n_subplots = len(metrics_list) # Generate curves for the given metrics if width is None: width = 3.25 * n_subplots if height is None: height = 1.7 fig, axes = texfig.subplots(nrows=n_subplots, width=width, ratio=height * 1.0 / width, w_pad=1, sharex=True) cmap = get_cmap( "tab10" ) # https://matplotlib.org/gallery/color/colormap_reference.html # Allow for one metric only if n_subplots == 1: axes = [axes] for ax_ind, metric_name in enumerate(metrics_list): # Plot calibration ranges for cp in self.calib_points: axes[ax_ind].axvspan(cp, cp + self.calib_size, alpha=0.3, color='gray', linewidth=0.0) # Plot metric curves for i_calmethod, calib_method in enumerate( np.unique(self.result_df["calibration_method"])): # Extract relevant data df_tmp = self.result_df.loc[ self.result_df["calibration_method"] == calib_method] xdata = np.array(df_tmp["n_samples_queried"], dtype="float64") ydata = np.array(df_tmp[metric_name], dtype="float64") # Compute average samples queried and restrict plot grouped = df_tmp.groupby(["calibration_method", "cv_run"]) summ_stats = grouped.agg(np.max) mean_n_samples_queried = np.mean( summ_stats["n_samples_queried"]) std_n_samples_queried = np.std(summ_stats["n_samples_queried"]) print("{} samples queried: {} +- {}".format( calib_method, mean_n_samples_queried, std_n_samples_queried)) # Fit GP to result k = gpflow.kernels.Matern52( input_dim=1, lengthscales=1000, variance=.1) + gpflow.kernels.White(input_dim=1, variance=.01) m = gpflow.models.GPR(xdata.reshape(-1, 1), ydata.reshape(-1, 1), kern=k) opt = gpflow.train.ScipyOptimizer() opt.minimize(m) # Predict GP mean up to mean of n_samples_qeried xx = np.linspace(np.min(xdata), mean_n_samples_queried, 1000).reshape(-1, 1) mean, var = m.predict_f(xx) axes[ax_ind].plot(xx, mean, color=cmap.colors[i_calmethod], zorder=10, label="MF entropy " + calib_method) if scatter: axes[ax_ind].scatter(xdata, ydata, color=cmap.colors[i_calmethod], s=4, alpha=0.3, edgecolors='none') if confidence: axes[ax_ind].fill_between( xx[:, 0], mean[:, 0] - 1.96 * np.sqrt(var[:, 0]), mean[:, 0] + 1.96 * np.sqrt(var[:, 0]), alpha=0.3, linewidth=0.0) # Set labels and legend if ax_ind + 1 == len(axes): axes[ax_ind].set_xlabel("queried samples") axes[ax_ind].set_ylabel(metric_name) axes[ax_ind].legend(prop={'size': 9}, labelspacing=0.2) # Save plot to file texfig.savefig(os.path.join(file), bbox_inches='tight', pad_inches=0) plt.close("all")
def plot_feature_space_level_set(seed, dir_out='pycalib/out/synthetic_data/'): import sklearn.datasets from sklearn.neural_network import MLPClassifier import matplotlib.colors # Setup train_size = 1000 cal_size = 100 noise = .25 contour_levels = 10 # generate 2d classification dataset np.random.seed(seed) X, y = sklearn.datasets.make_circles(n_samples=train_size, noise=noise) # train classifier clf = MLPClassifier(hidden_layer_sizes=[10, 10], alpha=1, max_iter=200) clf.fit(X, y) # scatter plot, dots colored by class value df = pd.DataFrame(dict(x=X[:, 0], y=X[:, 1], label=y)) markers = {0: 'x', 1: '.'} fig, ax = texfig.subplots(width=8, ratio=.3, nrows=1, ncols=3, sharex=True, sharey=True) # grouped = df.groupby('label') # for key, group in grouped: # group.plot(ax=ax[0], kind='scatter', x='x', y='y', label=key, marker=markers[key], color='gray', alpha=.75) # Put the result into a color plot x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 h = .02 # step size in the mesh xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. if hasattr(clf, "decision_function"): Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) p_pred = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()]) else: p_pred = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()]) Z = p_pred[:, 1] Z0 = Z.reshape(xx.shape) cm = plt.cm.RdBu_r # colormap cm_bright = matplotlib.colors.ListedColormap(['#FF0000', '#0000FF']) cont0 = ax[0].contourf(xx, yy, Z0, cmap=cm, alpha=.8, levels=contour_levels, vmin=0, vmax=1) ax[0].set_title("Classification Uncertainty") # calibrate X_cal, y_cal = sklearn.datasets.make_circles(n_samples=cal_size, noise=noise) p_cal = clf.predict_proba(X_cal) clf_cal = cm.GPCalibration(SVGP=True) clf_cal.fit(p_cal, y_cal) # calibrated contour plot Z1 = clf_cal.predict_proba(p_pred)[:, 1].reshape(xx.shape) cont1 = ax[1].contourf(xx, yy, Z1, cmap=cm, alpha=.8, levels=contour_levels, vmin=0, vmax=1) ax[1].set_title("Calibrated Uncertainty") # difference plot cm_diff = plt.cm.viridis_r # colormap cont1 = ax[2].contourf(xx, yy, Z1 - Z0, cmap=cm_diff, alpha=.8) ax[2].set_title("Uncertainty Difference") # color bar # fig.subplots_adjust(right=0.8) # cbar_ax = fig.add_axes([.96, 0.15, 0.05, 0.7]) # cbar = fig.colorbar(cont1, cax=cbar_ax) # # contour labels # ax[0].clabel(cont0, inline=1, fontsize=8) # ax[1].clabel(cont1, inline=1, fontsize=8) texfig.savefig(dir_out + '/plots/' + 'level_sets')
def plot_calibration_comparison(cal_metrics_df, cal_methods, alpha, beta, beta_name, statistic, miscal_func, miscal_func_name, cal_size, dir_out): # --Plot confidence distribution-- # xx = np.linspace(0.5, 1, 1000) fig, ax = texfig.subplots() for bp in np.column_stack([alpha, beta]): ax.plot(xx, scipy.stats.beta.pdf(2 * xx - 1, a=bp[0], b=bp[1]), label="alpha={:.1f}, beta={:.1f}".format(bp[0], bp[1]), color="blue") ax.set_ylim([0, 5]) ax.set_xlabel("confidence") ax.set_ylabel("density") ax.set_title("confidence histogram") texfig.savefig(filename=dir_out + '/plots/' + 'conf_dist_' + beta_name) # --Plot miscalibration function -- # fig, ax = texfig.subplots(ratio=1) ax.plot(xx, xx, linestyle="dashed", color="gray") ax.plot(xx, miscal_func(xx), label=miscal_func_name, color="red") ax.legend() ax.axis('equal') ax.set_xlabel('confidence') ax.set_ylabel('accuracy') ax.set_title("reliability diagram") texfig.savefig(filename=dir_out + '/plots/' + 'miscal_' + miscal_func_name) # --Plot given statistic -- # if len(np.unique(alpha)) <= len(np.unique(beta)): slice_param = np.unique(alpha) slice_param_name = 'alpha' x_param = beta x_param_name = 'beta' else: slice_param = np.unique(beta) slice_param_name = 'beta' x_param = alpha x_param_name = 'alpha' fig, ax = texfig.subplots(width=6, ratio=0.4, sharey='row') for i, sl_param in enumerate(slice_param): for calm in cal_methods: df_tmp = cal_metrics_df.loc[ (cal_metrics_df['model'] == calm) & (cal_metrics_df['miscalibration_func'] == miscal_func_name) & (cal_metrics_df['size'] == cal_size) & (cal_metrics_df[slice_param_name] == sl_param) & ([el in x_param for el in cal_metrics_df[x_param_name]])] ax.plot(df_tmp[x_param_name], np.abs(df_tmp[statistic + '.mean']), label=calm) ax.fill_between(df_tmp[x_param_name], np.abs(df_tmp[statistic + '.mean']) + 2 * df_tmp[statistic + '.std'], np.abs(df_tmp[statistic + '.mean']) - 2 * df_tmp[statistic + '.std'], alpha=0.15) ax.set_title('{}={:.2f}'.format(slice_param_name, sl_param)) ax.set_xlabel(x_param_name) # ax[i].set_yscale("log", nonposy='clip') ax.set_ylabel(statistic) ax.set_ylim(bottom=0, auto=True) ax.legend(bbox_to_anchor=(1.04, 0.5), loc="center left", borderaxespad=0) # Save to file if not os.path.exists(dir_out + '/plots/' + statistic): os.makedirs(dir_out + '/plots/' + statistic) texfig.savefig(dir_out + '/plots/' + statistic + '/' + beta_name + '_' + miscal_func_name + '_' + statistic + '_' + str(cal_size)) plt.close('all')
index_col=0) plot_classifier = "1layer_NN" plot_df = results_df.loc[results_df['classifier'] == plot_classifier] grouped = plot_df.groupby(["cal_method", "classifier", "data"]) summ_stats = grouped.agg([np.mean, np.std]) cal_methods = list(summ_stats.index.levels[0]) mean_time_inf = list(summ_stats["time_inf"]["mean"]) mean_time_pred = list(summ_stats["time_pred"]["mean"]) # Plot fig, axs = texfig.subplots(nrows=1, ncols=2, width=5, ratio=.5, sharey=True) axs[0].set_yscale("log") axs[1].set_yscale("log") axs[0].scatter(cal_methods, mean_time_inf) axs[1].scatter(cal_methods, mean_time_pred) minaxis = np.min(np.concatenate([mean_time_inf, mean_time_pred])) maxaxis = np.max(np.concatenate([mean_time_inf, mean_time_pred])) axs[0].set_ylim([minaxis - .0001, maxaxis + 1]) axs[0].title.set_text('Inference') axs[1].title.set_text('Prediction') axs[0].set_ylabel("seconds") # Rotate tick labels
# Meshplot fig = texfig.figure(width=6) ax = fig.gca(projection='3d') ax.plot_wireframe(X=f1, Y=f2, Z=h, label="$\log p(y_n \mid f_n)$", color="tab:blue") ax.plot_wireframe(X=f1, Y=f2, Z=taylor_h, label="Taylor approx.", color="tab:orange") ax.set_xlabel("$f_n^1$") ax.set_ylabel("$f_n^2$") ax.tick_params(axis='both', which='major', labelsize=10) ax.legend() # ax.legend(prop={'size': 9}, labelspacing=0.2) texfig.savefig("figures/gpcalib_illustration/taylor_approx_mesh") # Contour difference plot fig, axes = texfig.subplots() c = axes.contourf(f1, f2, h - taylor_h) fig.colorbar(c, ax=axes) axes.set_xlabel("$f_n^1$") axes.set_ylabel("$f_n^2$") texfig.savefig("figures/gpcalib_illustration/taylor_approx_contour") plt.close("all")
def reliability_diagram(y, p_pred, filename, title="Reliability Diagram", n_bins=100, show_ece=False, show_legend=False, model_name = None, xlim=None, plot_height=4, plot_width=4): """ Plot a reliability diagram This function plots the reliability diagram [1]_ [2]_ and histograms from the given confidence estimates. Reliability diagrams are a visual aid to determine whether a classifier is calibrated or not. Parameters ---------- y : array, shape = [n_methods, n_samples] Ground truth labels. y_pred : array, shape = [n_methods, n_samples] Predicted labels. p_pred : array or list Array of confidence estimates. If this is a list, multiple reliability diagrams will be plotted and arranged side-by-side. filename : str Path or name of output plot files. title : str or list Title of plot. If `p_pred` is a list, titles of each individual reliability diagram. n_bins : int, optional, default=20 The number of bins into which the `y_pred` are partitioned. show_ece : bool Whether the expected calibration error (ECE) should be displayed in the plot. show_legend : bool Whether the legend should be displayed in the plot. model_name : str Name of the model from which the probabilities were generated. Displayed when showing the legend. xlim : array, shape = (2,), default=None X-axis limits. If note provided inferred from y. References ---------- .. [1] DeGroot, M. H. & Fienberg, S. E. The Comparison and Evaluation of Forecasters. Journal of the Royal Statistical Society. Series D (The Statistician) 32, 12–22. .. [2] Niculescu-Mizil, A. & Caruana, R. Predicting good probabilities with supervised learning in Proceedings of the 22nd International Conference on Machine Learning (2005) """ # Initialization if xlim is None: n_classes = len(np.unique(y)) xlim = [1 / n_classes, 1] # Check whether multiple reliability diagrams should be plotted n_plots = 1 if isinstance(p_pred, list): n_plots = len(p_pred) # Define bins bins = np.linspace(xlim[0], xlim[1], n_bins + 1) # Plot reliability diagram fig, axes = texfig.subplots(nrows=2, ncols=n_plots, width=plot_width, ratio=plot_height/plot_width, sharex=True, sharey=True, gridspec_kw={'height_ratios': [2, 1]}) for i in range(n_plots): if n_plots > 1: current_plot_axes = axes[:, i] y_pred = np.argmax(p_pred[i], axis=1) p_max = np.max(p_pred[i], axis=1) p_pred_current = p_pred[i] else: current_plot_axes = axes y_pred = np.argmax(p_pred, axis=1) p_max = np.max(p_pred, axis=1) p_pred_current = p_pred # Calibration line current_plot_axes[0].plot(xlim, xlim, linestyle='--', color='grey') # Compute bin means and empirical accuracy bin_means = np.linspace(xlim[0] + xlim[1] / (2 * n_bins), xlim[1] - xlim[1] / (2 * n_bins), n_bins) empirical_acc = scipy.stats.binned_statistic(p_max, np.equal(y_pred, y).astype(int), bins=n_bins, range=xlim)[0] empirical_acc[np.isnan(empirical_acc)] = bin_means[np.isnan(empirical_acc)] # Plot accuracy current_plot_axes[0].step(bins, np.concatenate(([xlim[0]], empirical_acc)), '-', label=model_name) x = np.linspace(xlim[0], xlim[1], 1000) bin_ind = np.digitize(x, bins)[1:-1] - 1 current_plot_axes[0].fill_between(x[1:-1], x[1:-1], (bin_means + (empirical_acc - bin_means))[bin_ind], facecolor='k', alpha=0.2) if i == 0: current_plot_axes[0].set_ylabel('accuracy') if show_legend: current_plot_axes[0].legend(loc='upper left') if title is not None and n_plots > 1: current_plot_axes[0].set_title(title[i]) elif title is not None: current_plot_axes[0].set_title(title) # Plot histogram hist, ex = np.histogram(p_max, bins=bins) current_plot_axes[1].fill_between(bins, np.concatenate(([0], hist / np.sum(hist))), lw=0.0, step="pre") current_plot_axes[1].set_xlabel('confidence $\\hat{\\textnormal{z}}$') if i == 0: current_plot_axes[1].set_ylabel('sample frac.') current_plot_axes[1].set_xlim(xlim) fig.align_labels() # Add textbox with ECE if show_ece: ece = pycalib.scoring.expected_calibration_error(y=y, p_pred=p_pred_current, n_bins=n_bins) anchored_text = AnchoredText("$\\textup{ECE}_1 = " + "{:.3f}$".format(ece), loc='lower right') anchored_text.patch.set_boxstyle("round,pad=0.,rounding_size=0.2") anchored_text.patch.set_edgecolor("0.8") anchored_text.patch.set_alpha(0.9) current_plot_axes[0].add_artist(anchored_text) # Save to file texfig.savefig(filename=filename, bbox_inches='tight', pad_inches=0) plt.close()