예제 #1
0
    def __init__(self,
                 bins: Union[int, Iterable[int]] = 10,
                 equal_intervals: bool = True,
                 detection: bool = False,
                 sample_threshold: int = 1,
                 fmin: float = None,
                 fmax: float = None,
                 metric: str = 'ECE',
                 **kwargs):
        """ Constructor. For detailed parameter documentation view classdocs. """

        self.bins = bins
        self.detection = detection
        self.sample_threshold = sample_threshold
        self.fmin = fmin
        self.fmax = fmax
        self.metric = metric

        if 'feature_names' in kwargs:
            self.feature_names = kwargs['feature_names']

        if 'title_suffix' in kwargs:
            self.title_suffix = kwargs['title_suffix']

        self._miscalibration = _Miscalibration(
            bins=bins,
            equal_intervals=equal_intervals,
            detection=detection,
            sample_threshold=sample_threshold)
예제 #2
0
    def __plot_2d(self, X: list, y: list) -> plt.Figure:
        """
        Plot 2D miscalibration reliability diagram heatmap.

        Parameters
        ----------
        X : list of np.ndarray, each with shape=(n_samples, [n_box_features])
            List of NumPy arrays with confidence values for each prediction and and additional box features in last dim.
        y : list of np.ndarray, each with shape=(n_samples, [n_classes])
            List of NumPy array with ground truth labels as label vector (1-D).

        Returns
        -------
        matplotlib.pyplot.Figure
        """

        # miscalibration object is used to get metric map
        miscalibration = _Miscalibration(bins=self.bins, detection=self.detection,
                                         sample_threshold=self.sample_threshold)

        metric_map = []
        for batch_X, batch_y in zip(X, y):
            batch_miscal, batch_metric_map, batch_num_samples_map = miscalibration._measure(batch_X, batch_y,
                                                                                            metric=self.metric,
                                                                                            return_map=True,
                                                                                            return_num_samples=True)

            # on 2D (3 dimensions including confidence), use grid interpolation
            # set missing entries to NaN and interpolate
            batch_metric_map[batch_num_samples_map == 0.0] = np.nan
            batch_metric_map = self.__interpolate_grid(batch_metric_map)

            metric_map.append(batch_metric_map)

        # calculate mean miscalibration along all metric maps
        metric_map = np.mean(np.array(metric_map), axis=0)

        # transpose is necessary. Miscalibration is calculated in the order given by the features
        # however, imshow expects arrays in format [rows, columns] or [height, width]
        # e.g., miscalibration with additional x/y (in this order) will be drawn [y, x] otherwise
        metric_map = metric_map.T

        # draw routines
        fig, _ = plt.subplots()
        plt.imshow(metric_map, origin='lower', interpolation="gaussian", cmap='jet', aspect=1, vmin=self.fmin, vmax=self.fmax)

        # set correct x- and y-ticks
        plt.xticks(np.linspace(0., self.bins[1] - 1, 5), np.linspace(0., 1., 5))
        plt.yticks(np.linspace(0., self.bins[2] - 1, 5), np.linspace(0., 1., 5))
        plt.xlim([0.0, self.bins[1] - 1])
        plt.ylim([0.0, self.bins[2] - 1])

        # draw feature names on axes if given
        if self.feature_names is not None:
            plt.xlabel(self.feature_names[0])
            plt.ylabel(self.feature_names[1])

        plt.colorbar()

        # draw title if given
        if self.title_suffix is not None:
            plt.title("ECE depending on cx/cy coordinates\n- %s -" % self.title_suffix)
        else:
            plt.title("ECE depending on cx/cy coordinates")

        return fig
예제 #3
0
    def __plot_1d(self, X: list, y: list) -> plt.Figure:
        """
        Plot 1-D miscalibration w.r.t. one additional feature.

        Parameters
        ----------
        X : list of np.ndarray, each with shape=(n_samples, [n_box_features])
            List of NumPy arrays with confidence values for each prediction and and additional box features in last dim.
        y : list of np.ndarray, each with shape=(n_samples, [n_classes])
            List of NumPy arrays with ground truth labels as label vector (1-D).

        Returns
        -------
        matplotlib.pyplot.Figure
        """

        # iterate over all given models and build mean confidence and accuracy
        acc, edge_acc, conf, edge_conf, miscalibration_map = [], [], [], [], []

        # miscalibration object is used to get metric map
        miscalibration = _Miscalibration(bins=self.bins, detection=self.detection,
                                         sample_threshold=self.sample_threshold)

        for batch_X, batch_y in zip(X, y):
            # get miscalibration w.r.t. to given feature
            # get binned statistic of average accuracy and confidence w.r.t. binning by additional feature
            batch_acc, batch_edge_acc, _ = binned_statistic(batch_X[:, -1], values=batch_y,
                                                            statistic='mean', bins=self.bins[-1],
                                                            range=[[0.0, 1.0]])
            batch_conf, batch_edge_conf, _ = binned_statistic(batch_X[:, -1], values=batch_X[:, 0],
                                                              statistic='mean', bins=self.bins[-1],
                                                              range=[[0.0, 1.0]])
            _, batch_miscal = miscalibration._measure(batch_X, batch_y, metric=self.metric,
                                                      return_map=True, return_num_samples=False)
            miscalibration_map.append(batch_miscal)

            # set empty bins to 0
            # TODO: mark those ranges with a gray box
            batch_acc[np.isnan(batch_acc)] = 0.0
            batch_conf[np.isnan(batch_conf)] = 0.0

            # correct binning indices
            batch_edge_acc = (batch_edge_acc[:-1] + batch_edge_acc[1:]) * 0.5
            batch_edge_conf = (batch_edge_conf[:-1] + batch_edge_conf[1:]) * 0.5

            # append to global variables
            acc.append(batch_acc)
            edge_acc.append(batch_edge_acc)
            conf.append(batch_conf)
            edge_conf.append(batch_edge_conf)

        # calculate mean over all given instances
        acc = np.mean(np.array(acc), axis=0)
        edge_acc = np.mean(np.array(edge_acc), axis=0)
        conf = np.mean(np.array(conf), axis=0)
        edge_conf = np.mean(np.array(edge_conf), axis=0)
        miscalibration_map = np.mean(np.array(miscalibration_map), axis=0)

        # interpolate missing values
        x = np.linspace(0.0, 1.0, 1000)
        acc = interp1d(edge_acc, acc, kind='cubic', fill_value='extrapolate')(x)
        conf = interp1d(edge_conf, conf, kind='cubic', fill_value='extrapolate')(x)
        miscalibration_map = interp1d(edge_conf, miscalibration_map, kind='cubic', fill_value='extrapolate')(x)

        # draw routines
        fig, ax1 = plt.subplots()
        color = 'tab:blue'

        # set name of the additional feature
        if self.feature_names is not None:
            ax1.set_xlabel(self.feature_names[0])

        ax1.set_xlim([0.0, 1.0])
        ax1.set_ylim([0.0, 1.0])
        ax1.set_ylabel('accuracy/confidence', color=color)

        # draw confidence and accuracy on the same (left) axis
        line1, = ax1.plot(x, acc, '-.', color='black')
        line2, = ax1.plot(x, conf, '--', color=color)
        ax1.tick_params('y', labelcolor=color)

        # create second axis for ECE
        ax11 = ax1.twinx()
        color = 'tab:red'
        line3, = ax11.plot(x, miscalibration_map, '-', color=color)

        ax11.set_ylabel('Expected Calibration Error (ECE)', color=color)
        ax11.tick_params('y', labelcolor=color)

        # set ECE limits if given
        if self.fmin is not None and self.fmax is not None:
            ax11.set_ylim([self.fmin, self.fmax])

        ax1.legend((line1, line2, line3), ('accuracy', 'confidence', 'ECE'), loc='lower left')

        if self.title_suffix is not None:
            ax1.set_title('Accuracy, confidence and ECE depending on cx coordinate\n- %s -' % self.title_suffix)
        else:
            ax1.set_title('Accuracy, confidence and ECE depending on cx coordinate')

        ax1.grid(True)

        fig.tight_layout()
        return fig