def testRebinWithSparseData(self): histogram = [ calibration_histogram.Bucket(4, 5.0, .25, 5.0), # pred = .05 calibration_histogram.Bucket(61, 60.0, 36.0, 60.0), # pred = .6 calibration_histogram.Bucket(70, 69.0, 47.61, 69.0), # pred = .69 calibration_histogram.Bucket(100, 99.0, 98.01, 99.0) # pred = .99 ] # [0, 0.1, ..., 0.9, 1.0] thresholds = [i * 1.0 / 10 for i in range(0, 11)] got = calibration_histogram.rebin(thresholds, histogram, 100) expected = [ calibration_histogram.Bucket(0, 5.0, 0.25, 5.0), calibration_histogram.Bucket(1, 0.0, 0.0, 0.0), calibration_histogram.Bucket(2, 0.0, 0.0, 0.0), calibration_histogram.Bucket(3, 0.0, 0.0, 0.0), calibration_histogram.Bucket(4, 0.0, 0.0, 0.0), calibration_histogram.Bucket(5, 0.0, 0.0, 0.0), calibration_histogram.Bucket(6, 129.0, 83.61, 129.0), calibration_histogram.Bucket(7, 0.0, 0.0, 0.0), calibration_histogram.Bucket(8, 0.0, 0.0, 0.0), calibration_histogram.Bucket(9, 99.0, 98.01, 99.0), calibration_histogram.Bucket(10, 0.0, 0.0, 0.0), ] self.assertLen(got, len(expected)) for i in range(len(got)): self.assertSequenceAlmostEqual(got[i], expected[i])
def testRebin(self): # [Bucket(0, -1, -0.01), Bucket(1, 0, 0) ... Bucket(101, 101, 1.01)] histogram = [calibration_histogram.Bucket(0, -1, -.01, 1.0)] for i in range(100): histogram.append( calibration_histogram.Bucket(i + 1, i, i * .01, 1.0)) histogram.append(calibration_histogram.Bucket(101, 101, 1.01, 1.0)) # [-1e-7, 0.0, 0.1, ..., 0.9, 1.0, 1.0+1e-7] thresholds = [-1e-7] + [i * 1.0 / 10 for i in range(11)] + [1.0 + 1e-7] got = calibration_histogram.rebin(thresholds, histogram, 100) # labels = (10 * (i-1)) + (1 + 2 + 3 + ... + 9) expected = [ calibration_histogram.Bucket(0, -1, -0.01, 1.0), calibration_histogram.Bucket(1, 45.0, 0.45, 10.0), calibration_histogram.Bucket(2, 145.0, 1.45, 10.0), calibration_histogram.Bucket(3, 245.0, 2.45, 10.0), calibration_histogram.Bucket(4, 345.0, 3.45, 10.0), calibration_histogram.Bucket(5, 445.0, 4.45, 10.0), calibration_histogram.Bucket(6, 545.0, 5.45, 10.0), calibration_histogram.Bucket(7, 645.0, 6.45, 10.0), calibration_histogram.Bucket(8, 745.0, 7.45, 10.0), calibration_histogram.Bucket(9, 845.0, 8.45, 10.0), calibration_histogram.Bucket(10, 945.0, 9.45, 10.0), calibration_histogram.Bucket(11, 0.0, 0.0, 0.0), calibration_histogram.Bucket(12, 101.0, 1.01, 1.0), ] self.assertLen(got, len(expected)) for i in range(len(got)): self.assertSequenceAlmostEqual(got[i], expected[i])
def result( metrics: Dict[metric_types.MetricKey, Any] ) -> Dict[metric_types.MetricKey, Matrices]: """Returns binary confusion matrices.""" # Calibration histogram uses intervals of the form [start, end) where the # prediction >= start. The confusion matrices want intervals of the form # (start, end] where the prediction > start. Add a small epsilon so that >= # checks don't match. This correction shouldn't be needed in practice but # allows for correctness in small tests. if len(thresholds) == 1: # When there is only one threshold, we need to make adjustments so that # we have proper boundaries around the threshold for <, >= comparions. if thresholds[0] < 0: # This case is used when all prediction values are considered matches # (e.g. when calculating top_k for precision/recall). rebin_thresholds = [thresholds[0], thresholds[0] + _EPSILON] else: # This case is used for a single threshold within [0, 1] (e.g. 0.5). rebin_thresholds = [ -_EPSILON, thresholds[0] + _EPSILON, 1.0 + _EPSILON ] else: rebin_thresholds = ([thresholds[0]] + [t + _EPSILON for t in thresholds[1:]]) histogram = calibration_histogram.rebin(rebin_thresholds, metrics[histogram_key]) matrices = _to_binary_confusion_matrices(thresholds, histogram) if len(thresholds) == 1: # Reset back to 1 bucket matrices = Matrices(thresholds, tp=[matrices.tp[1]], fp=[matrices.fp[1]], tn=[matrices.tn[1]], fn=[matrices.fn[1]]) return {key: matrices}
def result( metrics: Dict[metric_types.MetricKey, Any] ) -> Dict[metric_types.MetricKey, Matrices]: """Returns binary confusion matrices.""" if len(thresholds) == 1 and thresholds[0] < 0: # This case is used when all positive prediction values are considered # matches (e.g. when calculating top_k for precision/recall where the # non-top_k values are expected to have been set to float('-inf')). histogram = metrics[histogram_key] else: # Calibration histogram uses intervals of the form [start, end) where the # prediction >= start. The confusion matrices want intervals of the form # (start, end] where the prediction > start. Add a small epsilon so that # >= checks don't match. This correction shouldn't be needed in practice # but allows for correctness in small tests. rebin_thresholds = [ t + _EPSILON if t != 0 else t for t in thresholds ] if thresholds[0] >= 0: # Add -epsilon bucket to account for differences in histogram vs # confusion matrix intervals mentioned above. If the epsilon bucket is # missing the false negatives and false positives will be 0 for the # first threshold. rebin_thresholds = [-_EPSILON] + rebin_thresholds if thresholds[-1] < 1.0: # If the last threshold < 1.0, then add a fence post at 1.0 + epsilon # othewise true negatives and true positives will be overcounted. rebin_thresholds = rebin_thresholds + [1.0 + _EPSILON] histogram = calibration_histogram.rebin(rebin_thresholds, metrics[histogram_key]) matrices = _to_binary_confusion_matrices(thresholds, histogram) return {key: matrices}
def result( metrics: Dict[metric_types.MetricKey, Any] ) -> Dict[metric_types.MetricKey, Any]: thresholds = [ left + i * (right - left) / num_buckets for i in range(num_buckets + 1) ] thresholds = [float('-inf')] + thresholds histogram = calibration_histogram.rebin( thresholds, metrics[histogram_key], left=left, right=right) return {key: _to_proto(thresholds, histogram)}
def result( metrics: Dict[metric_types.MetricKey, Any] ) -> Dict[metric_types.MetricKey, Matrices]: """Returns binary confusion matrices.""" # Calibration histogram uses intervals of the form [start, end) where the # prediction >= start. The confusion matrices want intervals of the form # (start, end] where the prediction > start. Add a small epsilon so that >= # checks don't match. This correction shouldn't be needed in practice but # allows for correctness in small tests. if len(thresholds) == 1: # When there is only one threshold, we need to make adjustments so that # we have proper boundaries around the threshold for <, >= comparions. if thresholds[0] < 0: # This case is used when all prediction values are considered matches # (e.g. when calculating top_k for precision/recall). rebin_thresholds = [thresholds[0], thresholds[0] + _EPSILON] else: # This case is used for a single threshold within [0, 1] (e.g. 0.5). rebin_thresholds = [-_EPSILON, thresholds[0] + _EPSILON, 1.0 + _EPSILON] else: rebin_thresholds = [t + _EPSILON if t != 0 else t for t in thresholds] if thresholds[0] >= 0: # Add -epsilon bucket to account for differences in histogram vs # confusion matrix intervals mentioned above. If the epsilon bucket is # missing the false negatives and false positives will be 0 for the # first threshold. rebin_thresholds = [-_EPSILON] + rebin_thresholds if thresholds[-1] < 1.0: # If the last threshold < 1.0, then add a fence post at 1.0 + epsilon # othewise true negatives and true positives will be overcounted. rebin_thresholds = rebin_thresholds + [1.0 + _EPSILON] histogram = calibration_histogram.rebin(rebin_thresholds, metrics[histogram_key]) matrices = _to_binary_confusion_matrices(thresholds, histogram) # Check if need to remove -epsilon bucket (or reset back to 1 bucket). start_index = 1 if thresholds[0] >= 0 or len(thresholds) == 1 else 0 matrices = Matrices( thresholds, tp=matrices.tp[start_index:start_index + len(thresholds)], fp=matrices.fp[start_index:start_index + len(thresholds)], tn=matrices.tn[start_index:start_index + len(thresholds)], fn=matrices.fn[start_index:start_index + len(thresholds)]) return {key: matrices}