def testEmptyBins(self): # Test calibration curve data when there are no examples. num_bins = 2 scores = np.array([]) hits = np.array([]) mean_predicted_accuracies, mean_empirical_accuracies, num_examples, ece = \ calibration_processing.CalibrationCurve(scores, hits, num_bins) expected_mean_predicted_accuracies = np.array([0.25, 0.75]) self.assertAllEqual(expected_mean_predicted_accuracies, mean_predicted_accuracies) expected_mean_empirical_accuracies = np.array([0, 0]) self.assertAllEqual(expected_mean_empirical_accuracies, mean_empirical_accuracies) expected_num_examples = np.zeros(shape=num_bins) self.assertAllEqual(expected_num_examples, num_examples) self.assertEqual(ece, 0.0)
def testAllDataInOneBin(self): # Test calibration curve data when all data is in one bin. num_bins = 2 scores = np.array([1, 1, 1]) hits = np.array([0, 0, 0]) mean_predicted_accuracies, mean_empirical_accuracies, num_examples, ece = \ calibration_processing.CalibrationCurve(scores, hits, num_bins) expected_mean_predicted_accuracies = np.array([0.25, 0.75]) self.assertAllEqual(expected_mean_predicted_accuracies, mean_predicted_accuracies) expected_mean_empirical_accuracies = np.array([0, 0]) self.assertAllEqual(expected_mean_empirical_accuracies, mean_empirical_accuracies) expected_num_examples = np.zeros(shape=num_bins) expected_num_examples[0] = 0 expected_num_examples[num_bins - 1] = 3 self.assertAllEqual(expected_num_examples, num_examples) self.assertEqual(ece, 0.75)
def testCalibrationCurvePerfectCalibration(self): # Test calibration curve data when empirical accuracy corresponds to mean # predicted accuracy. num_bins = 2 scores = np.array([0.25, 0.25, 0.25, 0.25, 0.75, 0.75, 0.75, 0.75]) hits = np.array([1, 0, 0, 0, 1, 1, 1, 0]) mean_predicted_accuracies, mean_empirical_accuracies, num_examples, ece = \ calibration_processing.CalibrationCurve(scores, hits, num_bins) expected_mean_predicted_accuracies = np.array([0.25, 0.75]) self.assertAllEqual(expected_mean_predicted_accuracies, mean_predicted_accuracies) expected_mean_empirical_accuracies = np.array([0.25, 0.75]) self.assertAllEqual(expected_mean_empirical_accuracies, mean_empirical_accuracies) expected_num_examples = np.zeros(shape=num_bins) expected_num_examples[0] = 4 expected_num_examples[num_bins - 1] = 4 self.assertAllEqual(expected_num_examples, num_examples) self.assertEqual(ece, 0.0)
def testCalibrationCurveEqualScoresAndHits(self): # Test calibration curve data when scores are equal to hits. # This is an edge case where scores are either 0 or 1. num_bins = 2 scores = np.array([0, 1, 0]) hits = scores mean_predicted_accuracies, mean_empirical_accuracies, num_examples, ece = \ calibration_processing.CalibrationCurve(scores, hits, num_bins) expected_mean_predicted_accuracies = np.array([0.25, 0.75]) self.assertAllEqual(expected_mean_predicted_accuracies, mean_predicted_accuracies) expected_mean_empirical_accuracies = np.array([0, 1]) self.assertAllEqual(expected_mean_empirical_accuracies, mean_empirical_accuracies) expected_num_examples = np.zeros(shape=num_bins) expected_num_examples[0] = 2 expected_num_examples[num_bins - 1] = 1 self.assertAllEqual(expected_num_examples, num_examples) self.assertEqual(ece, 0.25)