def __init__(self, bins: Union[int, Iterable[int]] = 10, equal_intervals: bool = True, detection: bool = False, sample_threshold: int = 1, fmin: float = None, fmax: float = None, metric: str = 'ECE', **kwargs): """ Constructor. For detailed parameter documentation view classdocs. """ self.bins = bins self.detection = detection self.sample_threshold = sample_threshold self.fmin = fmin self.fmax = fmax self.metric = metric if 'feature_names' in kwargs: self.feature_names = kwargs['feature_names'] if 'title_suffix' in kwargs: self.title_suffix = kwargs['title_suffix'] self._miscalibration = _Miscalibration( bins=bins, equal_intervals=equal_intervals, detection=detection, sample_threshold=sample_threshold)
def __plot_2d(self, X: list, y: list) -> plt.Figure: """ Plot 2D miscalibration reliability diagram heatmap. Parameters ---------- X : list of np.ndarray, each with shape=(n_samples, [n_box_features]) List of NumPy arrays with confidence values for each prediction and and additional box features in last dim. y : list of np.ndarray, each with shape=(n_samples, [n_classes]) List of NumPy array with ground truth labels as label vector (1-D). Returns ------- matplotlib.pyplot.Figure """ # miscalibration object is used to get metric map miscalibration = _Miscalibration(bins=self.bins, detection=self.detection, sample_threshold=self.sample_threshold) metric_map = [] for batch_X, batch_y in zip(X, y): batch_miscal, batch_metric_map, batch_num_samples_map = miscalibration._measure(batch_X, batch_y, metric=self.metric, return_map=True, return_num_samples=True) # on 2D (3 dimensions including confidence), use grid interpolation # set missing entries to NaN and interpolate batch_metric_map[batch_num_samples_map == 0.0] = np.nan batch_metric_map = self.__interpolate_grid(batch_metric_map) metric_map.append(batch_metric_map) # calculate mean miscalibration along all metric maps metric_map = np.mean(np.array(metric_map), axis=0) # transpose is necessary. Miscalibration is calculated in the order given by the features # however, imshow expects arrays in format [rows, columns] or [height, width] # e.g., miscalibration with additional x/y (in this order) will be drawn [y, x] otherwise metric_map = metric_map.T # draw routines fig, _ = plt.subplots() plt.imshow(metric_map, origin='lower', interpolation="gaussian", cmap='jet', aspect=1, vmin=self.fmin, vmax=self.fmax) # set correct x- and y-ticks plt.xticks(np.linspace(0., self.bins[1] - 1, 5), np.linspace(0., 1., 5)) plt.yticks(np.linspace(0., self.bins[2] - 1, 5), np.linspace(0., 1., 5)) plt.xlim([0.0, self.bins[1] - 1]) plt.ylim([0.0, self.bins[2] - 1]) # draw feature names on axes if given if self.feature_names is not None: plt.xlabel(self.feature_names[0]) plt.ylabel(self.feature_names[1]) plt.colorbar() # draw title if given if self.title_suffix is not None: plt.title("ECE depending on cx/cy coordinates\n- %s -" % self.title_suffix) else: plt.title("ECE depending on cx/cy coordinates") return fig
def __plot_1d(self, X: list, y: list) -> plt.Figure: """ Plot 1-D miscalibration w.r.t. one additional feature. Parameters ---------- X : list of np.ndarray, each with shape=(n_samples, [n_box_features]) List of NumPy arrays with confidence values for each prediction and and additional box features in last dim. y : list of np.ndarray, each with shape=(n_samples, [n_classes]) List of NumPy arrays with ground truth labels as label vector (1-D). Returns ------- matplotlib.pyplot.Figure """ # iterate over all given models and build mean confidence and accuracy acc, edge_acc, conf, edge_conf, miscalibration_map = [], [], [], [], [] # miscalibration object is used to get metric map miscalibration = _Miscalibration(bins=self.bins, detection=self.detection, sample_threshold=self.sample_threshold) for batch_X, batch_y in zip(X, y): # get miscalibration w.r.t. to given feature # get binned statistic of average accuracy and confidence w.r.t. binning by additional feature batch_acc, batch_edge_acc, _ = binned_statistic(batch_X[:, -1], values=batch_y, statistic='mean', bins=self.bins[-1], range=[[0.0, 1.0]]) batch_conf, batch_edge_conf, _ = binned_statistic(batch_X[:, -1], values=batch_X[:, 0], statistic='mean', bins=self.bins[-1], range=[[0.0, 1.0]]) _, batch_miscal = miscalibration._measure(batch_X, batch_y, metric=self.metric, return_map=True, return_num_samples=False) miscalibration_map.append(batch_miscal) # set empty bins to 0 # TODO: mark those ranges with a gray box batch_acc[np.isnan(batch_acc)] = 0.0 batch_conf[np.isnan(batch_conf)] = 0.0 # correct binning indices batch_edge_acc = (batch_edge_acc[:-1] + batch_edge_acc[1:]) * 0.5 batch_edge_conf = (batch_edge_conf[:-1] + batch_edge_conf[1:]) * 0.5 # append to global variables acc.append(batch_acc) edge_acc.append(batch_edge_acc) conf.append(batch_conf) edge_conf.append(batch_edge_conf) # calculate mean over all given instances acc = np.mean(np.array(acc), axis=0) edge_acc = np.mean(np.array(edge_acc), axis=0) conf = np.mean(np.array(conf), axis=0) edge_conf = np.mean(np.array(edge_conf), axis=0) miscalibration_map = np.mean(np.array(miscalibration_map), axis=0) # interpolate missing values x = np.linspace(0.0, 1.0, 1000) acc = interp1d(edge_acc, acc, kind='cubic', fill_value='extrapolate')(x) conf = interp1d(edge_conf, conf, kind='cubic', fill_value='extrapolate')(x) miscalibration_map = interp1d(edge_conf, miscalibration_map, kind='cubic', fill_value='extrapolate')(x) # draw routines fig, ax1 = plt.subplots() color = 'tab:blue' # set name of the additional feature if self.feature_names is not None: ax1.set_xlabel(self.feature_names[0]) ax1.set_xlim([0.0, 1.0]) ax1.set_ylim([0.0, 1.0]) ax1.set_ylabel('accuracy/confidence', color=color) # draw confidence and accuracy on the same (left) axis line1, = ax1.plot(x, acc, '-.', color='black') line2, = ax1.plot(x, conf, '--', color=color) ax1.tick_params('y', labelcolor=color) # create second axis for ECE ax11 = ax1.twinx() color = 'tab:red' line3, = ax11.plot(x, miscalibration_map, '-', color=color) ax11.set_ylabel('Expected Calibration Error (ECE)', color=color) ax11.tick_params('y', labelcolor=color) # set ECE limits if given if self.fmin is not None and self.fmax is not None: ax11.set_ylim([self.fmin, self.fmax]) ax1.legend((line1, line2, line3), ('accuracy', 'confidence', 'ECE'), loc='lower left') if self.title_suffix is not None: ax1.set_title('Accuracy, confidence and ECE depending on cx coordinate\n- %s -' % self.title_suffix) else: ax1.set_title('Accuracy, confidence and ECE depending on cx coordinate') ax1.grid(True) fig.tight_layout() return fig