Beispiel #1
0
            # Filepath and plot axes
            folder_path = os.path.dirname(os.path.realpath(__file__))
            if os.path.basename(os.path.normpath(folder_path)) == "pycalib":
                folder_path += "/figures/latent_functions"
            folder_path += "/plots/latent_maps/"
            if not os.path.exists(folder_path):
                os.makedirs(folder_path)
            file_suffix = "_probs"
            if use_logits:
                file_suffix = "_logits"
            filename = folder_path + clf_comb_name + file_suffix

            fig, axes = texfig.subplots(width=7 / 3 * len(clf_names),
                                        ratio=.25 * 3 / len(clf_names),
                                        nrows=1,
                                        ncols=len(clf_names),
                                        w_pad=1)

            # Iterate through data sets, calibrate and plot latent functions
            for (i_clf, (Z, y, info_dict)) in enumerate(benchmark.data_gen()):

                # Train, test split
                cal_ind, test_ind = next(benchmark.cross_validator.split(Z, y))
                Z_cal = Z[cal_ind, :]
                y_cal = y[cal_ind]
                Z_test = Z[test_ind, :]
                y_test = y[test_ind]
                hist_data = Z_cal.flatten()

                # Calibrate
Beispiel #2
0
            "ECE_calib": ece_calib,
            "acc_calib": acc_calib,
            "NLL_calib": nll_calib
        }, open(os.path.join(dir_path, "metrics_calib.txt"), 'w'))

    # Load data from file
    metrics_train = json.load(open(os.path.join(dir_path,
                                                "metrics_train.txt")))
    metrics_test = json.load(open(os.path.join(dir_path, "metrics_test.txt")))
    metrics_calib = json.load(open(os.path.join(dir_path,
                                                "metrics_calib.txt")))

    # Plot accuracy, ECE, logloss
    var_list = [("err", "error"), ("NLL", "NLL"), ("ECE", "$\\textup{ECE}_1$")]

    fig, axes = texfig.subplots(width=7, ratio=.2, nrows=1, ncols=3, w_pad=1)

    for i, (m, lab) in enumerate(var_list):
        axes[i].plot(metrics_train["iter"][2::],
                     metrics_train[m][2::],
                     label="train")
        axes[i].plot(metrics_test["iter"][2::],
                     metrics_test[m][2::],
                     label="test")
        axes[i].set_xlabel("epoch")
        axes[i].set_ylabel(lab)

    for j, metric_calib in enumerate([
            1 - metrics_calib["acc_calib"], metrics_calib["NLL_calib"],
            metrics_calib["ECE_calib"]
    ]):
Beispiel #3
0
    def plot(self,
             file,
             metrics_list,
             width=None,
             height=None,
             scatter=False,
             confidence=True):
        """
        Plots the given list of metrics for the active learning experiment.

        Parameters
        ----------
        file : str
            Filename of resulting plot.
        metrics_list : list
            List of names of metrics to plot.
        Returns
        -------

        """
        # Initialization
        n_subplots = len(metrics_list)

        # Generate curves for the given metrics
        if width is None:
            width = 3.25 * n_subplots
        if height is None:
            height = 1.7
        fig, axes = texfig.subplots(nrows=n_subplots,
                                    width=width,
                                    ratio=height * 1.0 / width,
                                    w_pad=1,
                                    sharex=True)
        cmap = get_cmap(
            "tab10"
        )  # https://matplotlib.org/gallery/color/colormap_reference.html

        # Allow for one metric only
        if n_subplots == 1:
            axes = [axes]

        for ax_ind, metric_name in enumerate(metrics_list):

            # Plot calibration ranges
            for cp in self.calib_points:
                axes[ax_ind].axvspan(cp,
                                     cp + self.calib_size,
                                     alpha=0.3,
                                     color='gray',
                                     linewidth=0.0)

            # Plot metric curves
            for i_calmethod, calib_method in enumerate(
                    np.unique(self.result_df["calibration_method"])):
                # Extract relevant data
                df_tmp = self.result_df.loc[
                    self.result_df["calibration_method"] == calib_method]
                xdata = np.array(df_tmp["n_samples_queried"], dtype="float64")
                ydata = np.array(df_tmp[metric_name], dtype="float64")

                # Compute average samples queried and restrict plot
                grouped = df_tmp.groupby(["calibration_method", "cv_run"])
                summ_stats = grouped.agg(np.max)
                mean_n_samples_queried = np.mean(
                    summ_stats["n_samples_queried"])
                std_n_samples_queried = np.std(summ_stats["n_samples_queried"])
                print("{} samples queried: {} +- {}".format(
                    calib_method, mean_n_samples_queried,
                    std_n_samples_queried))
                # Fit GP to result
                k = gpflow.kernels.Matern52(
                    input_dim=1, lengthscales=1000,
                    variance=.1) + gpflow.kernels.White(input_dim=1,
                                                        variance=.01)
                m = gpflow.models.GPR(xdata.reshape(-1, 1),
                                      ydata.reshape(-1, 1),
                                      kern=k)
                opt = gpflow.train.ScipyOptimizer()
                opt.minimize(m)

                # Predict GP mean up to mean of n_samples_qeried
                xx = np.linspace(np.min(xdata), mean_n_samples_queried,
                                 1000).reshape(-1, 1)
                mean, var = m.predict_f(xx)
                axes[ax_ind].plot(xx,
                                  mean,
                                  color=cmap.colors[i_calmethod],
                                  zorder=10,
                                  label="MF entropy " + calib_method)
                if scatter:
                    axes[ax_ind].scatter(xdata,
                                         ydata,
                                         color=cmap.colors[i_calmethod],
                                         s=4,
                                         alpha=0.3,
                                         edgecolors='none')
                if confidence:
                    axes[ax_ind].fill_between(
                        xx[:, 0],
                        mean[:, 0] - 1.96 * np.sqrt(var[:, 0]),
                        mean[:, 0] + 1.96 * np.sqrt(var[:, 0]),
                        alpha=0.3,
                        linewidth=0.0)

            # Set labels and legend
            if ax_ind + 1 == len(axes):
                axes[ax_ind].set_xlabel("queried samples")
            axes[ax_ind].set_ylabel(metric_name)

        axes[ax_ind].legend(prop={'size': 9}, labelspacing=0.2)

        # Save plot to file
        texfig.savefig(os.path.join(file), bbox_inches='tight', pad_inches=0)
        plt.close("all")
Beispiel #4
0
    def plot_feature_space_level_set(seed,
                                     dir_out='pycalib/out/synthetic_data/'):
        import sklearn.datasets
        from sklearn.neural_network import MLPClassifier
        import matplotlib.colors

        # Setup
        train_size = 1000
        cal_size = 100
        noise = .25
        contour_levels = 10

        # generate 2d classification dataset
        np.random.seed(seed)
        X, y = sklearn.datasets.make_circles(n_samples=train_size, noise=noise)

        # train classifier
        clf = MLPClassifier(hidden_layer_sizes=[10, 10], alpha=1, max_iter=200)
        clf.fit(X, y)

        # scatter plot, dots colored by class value
        df = pd.DataFrame(dict(x=X[:, 0], y=X[:, 1], label=y))
        markers = {0: 'x', 1: '.'}

        fig, ax = texfig.subplots(width=8,
                                  ratio=.3,
                                  nrows=1,
                                  ncols=3,
                                  sharex=True,
                                  sharey=True)
        # grouped = df.groupby('label')
        # for key, group in grouped:
        #     group.plot(ax=ax[0], kind='scatter', x='x', y='y', label=key, marker=markers[key], color='gray', alpha=.75)

        # Put the result into a color plot
        x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
        y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
        h = .02  # step size in the mesh
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                             np.arange(y_min, y_max, h))

        # Plot the decision boundary. For that, we will assign a color to each
        # point in the mesh [x_min, x_max]x[y_min, y_max].
        if hasattr(clf, "decision_function"):
            Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
            p_pred = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
        else:
            p_pred = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])
            Z = p_pred[:, 1]
        Z0 = Z.reshape(xx.shape)
        cm = plt.cm.RdBu_r  # colormap
        cm_bright = matplotlib.colors.ListedColormap(['#FF0000', '#0000FF'])
        cont0 = ax[0].contourf(xx,
                               yy,
                               Z0,
                               cmap=cm,
                               alpha=.8,
                               levels=contour_levels,
                               vmin=0,
                               vmax=1)
        ax[0].set_title("Classification Uncertainty")

        # calibrate
        X_cal, y_cal = sklearn.datasets.make_circles(n_samples=cal_size,
                                                     noise=noise)
        p_cal = clf.predict_proba(X_cal)
        clf_cal = cm.GPCalibration(SVGP=True)
        clf_cal.fit(p_cal, y_cal)

        # calibrated contour plot
        Z1 = clf_cal.predict_proba(p_pred)[:, 1].reshape(xx.shape)
        cont1 = ax[1].contourf(xx,
                               yy,
                               Z1,
                               cmap=cm,
                               alpha=.8,
                               levels=contour_levels,
                               vmin=0,
                               vmax=1)
        ax[1].set_title("Calibrated Uncertainty")

        # difference plot
        cm_diff = plt.cm.viridis_r  # colormap
        cont1 = ax[2].contourf(xx, yy, Z1 - Z0, cmap=cm_diff, alpha=.8)
        ax[2].set_title("Uncertainty Difference")

        # color bar
        # fig.subplots_adjust(right=0.8)
        # cbar_ax = fig.add_axes([.96, 0.15, 0.05, 0.7])
        # cbar = fig.colorbar(cont1, cax=cbar_ax)

        # # contour labels
        # ax[0].clabel(cont0, inline=1, fontsize=8)
        # ax[1].clabel(cont1, inline=1, fontsize=8)

        texfig.savefig(dir_out + '/plots/' + 'level_sets')
Beispiel #5
0
    def plot_calibration_comparison(cal_metrics_df, cal_methods, alpha, beta,
                                    beta_name, statistic, miscal_func,
                                    miscal_func_name, cal_size, dir_out):

        # --Plot confidence distribution-- #
        xx = np.linspace(0.5, 1, 1000)
        fig, ax = texfig.subplots()
        for bp in np.column_stack([alpha, beta]):
            ax.plot(xx,
                    scipy.stats.beta.pdf(2 * xx - 1, a=bp[0], b=bp[1]),
                    label="alpha={:.1f}, beta={:.1f}".format(bp[0], bp[1]),
                    color="blue")
        ax.set_ylim([0, 5])
        ax.set_xlabel("confidence")
        ax.set_ylabel("density")
        ax.set_title("confidence histogram")

        texfig.savefig(filename=dir_out + '/plots/' + 'conf_dist_' + beta_name)

        # --Plot miscalibration function -- #
        fig, ax = texfig.subplots(ratio=1)
        ax.plot(xx, xx, linestyle="dashed", color="gray")
        ax.plot(xx, miscal_func(xx), label=miscal_func_name, color="red")
        ax.legend()
        ax.axis('equal')
        ax.set_xlabel('confidence')
        ax.set_ylabel('accuracy')
        ax.set_title("reliability diagram")

        texfig.savefig(filename=dir_out + '/plots/' + 'miscal_' +
                       miscal_func_name)

        # --Plot given statistic -- #
        if len(np.unique(alpha)) <= len(np.unique(beta)):
            slice_param = np.unique(alpha)
            slice_param_name = 'alpha'
            x_param = beta
            x_param_name = 'beta'
        else:
            slice_param = np.unique(beta)
            slice_param_name = 'beta'
            x_param = alpha
            x_param_name = 'alpha'

        fig, ax = texfig.subplots(width=6, ratio=0.4, sharey='row')

        for i, sl_param in enumerate(slice_param):
            for calm in cal_methods:
                df_tmp = cal_metrics_df.loc[
                    (cal_metrics_df['model'] == calm) &
                    (cal_metrics_df['miscalibration_func'] == miscal_func_name)
                    & (cal_metrics_df['size'] == cal_size) &
                    (cal_metrics_df[slice_param_name] == sl_param) &
                    ([el in x_param for el in cal_metrics_df[x_param_name]])]
                ax.plot(df_tmp[x_param_name],
                        np.abs(df_tmp[statistic + '.mean']),
                        label=calm)
                ax.fill_between(df_tmp[x_param_name],
                                np.abs(df_tmp[statistic + '.mean']) +
                                2 * df_tmp[statistic + '.std'],
                                np.abs(df_tmp[statistic + '.mean']) -
                                2 * df_tmp[statistic + '.std'],
                                alpha=0.15)
                ax.set_title('{}={:.2f}'.format(slice_param_name, sl_param))
                ax.set_xlabel(x_param_name)
                # ax[i].set_yscale("log", nonposy='clip')

        ax.set_ylabel(statistic)
        ax.set_ylim(bottom=0, auto=True)
        ax.legend(bbox_to_anchor=(1.04, 0.5),
                  loc="center left",
                  borderaxespad=0)

        # Save to file
        if not os.path.exists(dir_out + '/plots/' + statistic):
            os.makedirs(dir_out + '/plots/' + statistic)
        texfig.savefig(dir_out + '/plots/' + statistic + '/' + beta_name +
                       '_' + miscal_func_name + '_' + statistic + '_' +
                       str(cal_size))
        plt.close('all')
        index_col=0)

    plot_classifier = "1layer_NN"
    plot_df = results_df.loc[results_df['classifier'] == plot_classifier]

    grouped = plot_df.groupby(["cal_method", "classifier", "data"])
    summ_stats = grouped.agg([np.mean, np.std])

    cal_methods = list(summ_stats.index.levels[0])
    mean_time_inf = list(summ_stats["time_inf"]["mean"])
    mean_time_pred = list(summ_stats["time_pred"]["mean"])

    # Plot
    fig, axs = texfig.subplots(nrows=1,
                               ncols=2,
                               width=5,
                               ratio=.5,
                               sharey=True)
    axs[0].set_yscale("log")
    axs[1].set_yscale("log")
    axs[0].scatter(cal_methods, mean_time_inf)
    axs[1].scatter(cal_methods, mean_time_pred)
    minaxis = np.min(np.concatenate([mean_time_inf, mean_time_pred]))
    maxaxis = np.max(np.concatenate([mean_time_inf, mean_time_pred]))
    axs[0].set_ylim([minaxis - .0001, maxaxis + 1])

    axs[0].title.set_text('Inference')
    axs[1].title.set_text('Prediction')
    axs[0].set_ylabel("seconds")

    # Rotate tick labels
    # Meshplot
    fig = texfig.figure(width=6)
    ax = fig.gca(projection='3d')
    ax.plot_wireframe(X=f1,
                      Y=f2,
                      Z=h,
                      label="$\log p(y_n \mid f_n)$",
                      color="tab:blue")
    ax.plot_wireframe(X=f1,
                      Y=f2,
                      Z=taylor_h,
                      label="Taylor approx.",
                      color="tab:orange")

    ax.set_xlabel("$f_n^1$")
    ax.set_ylabel("$f_n^2$")
    ax.tick_params(axis='both', which='major', labelsize=10)
    ax.legend()
    # ax.legend(prop={'size': 9}, labelspacing=0.2)
    texfig.savefig("figures/gpcalib_illustration/taylor_approx_mesh")

    # Contour difference plot
    fig, axes = texfig.subplots()
    c = axes.contourf(f1, f2, h - taylor_h)
    fig.colorbar(c, ax=axes)
    axes.set_xlabel("$f_n^1$")
    axes.set_ylabel("$f_n^2$")

    texfig.savefig("figures/gpcalib_illustration/taylor_approx_contour")
    plt.close("all")
Beispiel #8
0
def reliability_diagram(y, p_pred, filename, title="Reliability Diagram", n_bins=100, show_ece=False, show_legend=False,
                        model_name = None, xlim=None, plot_height=4, plot_width=4):
    """
    Plot a reliability diagram

    This function plots the reliability diagram [1]_ [2]_ and histograms from the given confidence estimates. Reliability diagrams
    are a visual aid to determine whether a classifier is calibrated or not.

    Parameters
    ----------
    y : array, shape = [n_methods, n_samples]
        Ground truth labels.
    y_pred : array, shape = [n_methods, n_samples]
        Predicted labels.
    p_pred : array or list
        Array of confidence estimates. If this is a list, multiple reliability diagrams will be plotted and arranged
        side-by-side.
    filename : str
        Path or name of output plot files.
    title : str or list
        Title of plot. If `p_pred` is a list, titles of each individual reliability diagram.
    n_bins : int, optional, default=20
        The number of bins into which the `y_pred` are partitioned.
    show_ece : bool
        Whether the expected calibration error (ECE) should be displayed in the plot.
    show_legend : bool
        Whether the legend should be displayed in the plot.
    model_name : str
        Name of the model from which the probabilities were generated. Displayed when showing the legend.
    xlim : array, shape = (2,), default=None
        X-axis limits. If note provided inferred from y.

    References
    ----------
    .. [1] DeGroot, M. H. & Fienberg, S. E. The Comparison and Evaluation of Forecasters. Journal of the Royal
           Statistical Society. Series D (The Statistician) 32, 12–22.
    .. [2] Niculescu-Mizil, A. & Caruana, R. Predicting good probabilities with supervised learning in Proceedings of
           the 22nd International Conference on Machine Learning (2005)

    """
    # Initialization
    if xlim is None:
        n_classes = len(np.unique(y))
        xlim = [1 / n_classes, 1]

    # Check whether multiple reliability diagrams should be plotted
    n_plots = 1
    if isinstance(p_pred, list):
        n_plots = len(p_pred)

    # Define bins
    bins = np.linspace(xlim[0], xlim[1], n_bins + 1)

    # Plot reliability diagram
    fig, axes = texfig.subplots(nrows=2, ncols=n_plots, width=plot_width, ratio=plot_height/plot_width,
                                sharex=True, sharey=True,
                                gridspec_kw={'height_ratios': [2, 1]})

    for i in range(n_plots):
        if n_plots > 1:
            current_plot_axes = axes[:, i]
            y_pred = np.argmax(p_pred[i], axis=1)
            p_max = np.max(p_pred[i], axis=1)
            p_pred_current = p_pred[i]
        else:
            current_plot_axes = axes
            y_pred = np.argmax(p_pred, axis=1)
            p_max = np.max(p_pred, axis=1)
            p_pred_current = p_pred

        # Calibration line
        current_plot_axes[0].plot(xlim, xlim, linestyle='--', color='grey')

        # Compute bin means and empirical accuracy
        bin_means = np.linspace(xlim[0] + xlim[1] / (2 * n_bins), xlim[1] - xlim[1] / (2 * n_bins), n_bins)
        empirical_acc = scipy.stats.binned_statistic(p_max,
                                                     np.equal(y_pred, y).astype(int),
                                                     bins=n_bins,
                                                     range=xlim)[0]
        empirical_acc[np.isnan(empirical_acc)] = bin_means[np.isnan(empirical_acc)]

        # Plot accuracy
        current_plot_axes[0].step(bins, np.concatenate(([xlim[0]], empirical_acc)), '-', label=model_name)
        x = np.linspace(xlim[0], xlim[1], 1000)
        bin_ind = np.digitize(x, bins)[1:-1] - 1
        current_plot_axes[0].fill_between(x[1:-1], x[1:-1], (bin_means + (empirical_acc - bin_means))[bin_ind], facecolor='k', alpha=0.2)
        if i == 0:
            current_plot_axes[0].set_ylabel('accuracy')
        if show_legend:
            current_plot_axes[0].legend(loc='upper left')
        if title is not None and n_plots > 1:
            current_plot_axes[0].set_title(title[i])
        elif title is not None:
            current_plot_axes[0].set_title(title)

        # Plot histogram
        hist, ex = np.histogram(p_max, bins=bins)
        current_plot_axes[1].fill_between(bins, np.concatenate(([0], hist / np.sum(hist))), lw=0.0, step="pre")
        current_plot_axes[1].set_xlabel('confidence $\\hat{\\textnormal{z}}$')
        if i == 0:
            current_plot_axes[1].set_ylabel('sample frac.')
        current_plot_axes[1].set_xlim(xlim)
        fig.align_labels()

        # Add textbox with ECE
        if show_ece:
            ece = pycalib.scoring.expected_calibration_error(y=y, p_pred=p_pred_current, n_bins=n_bins)
            anchored_text = AnchoredText("$\\textup{ECE}_1 = " + "{:.3f}$".format(ece), loc='lower right')
            anchored_text.patch.set_boxstyle("round,pad=0.,rounding_size=0.2")
            anchored_text.patch.set_edgecolor("0.8")
            anchored_text.patch.set_alpha(0.9)
            current_plot_axes[0].add_artist(anchored_text)

    # Save to file
    texfig.savefig(filename=filename, bbox_inches='tight', pad_inches=0)
    plt.close()