def testEmptyBins(self):
     # Test calibration curve data when there are no examples.
     num_bins = 2
     scores = np.array([])
     hits = np.array([])
     mean_predicted_accuracies, mean_empirical_accuracies, num_examples, ece = \
         calibration_processing.CalibrationCurve(scores, hits, num_bins)
     expected_mean_predicted_accuracies = np.array([0.25, 0.75])
     self.assertAllEqual(expected_mean_predicted_accuracies,
                         mean_predicted_accuracies)
     expected_mean_empirical_accuracies = np.array([0, 0])
     self.assertAllEqual(expected_mean_empirical_accuracies,
                         mean_empirical_accuracies)
     expected_num_examples = np.zeros(shape=num_bins)
     self.assertAllEqual(expected_num_examples, num_examples)
     self.assertEqual(ece, 0.0)
 def testAllDataInOneBin(self):
     # Test calibration curve data when all data is in one bin.
     num_bins = 2
     scores = np.array([1, 1, 1])
     hits = np.array([0, 0, 0])
     mean_predicted_accuracies, mean_empirical_accuracies, num_examples, ece = \
         calibration_processing.CalibrationCurve(scores, hits, num_bins)
     expected_mean_predicted_accuracies = np.array([0.25, 0.75])
     self.assertAllEqual(expected_mean_predicted_accuracies,
                         mean_predicted_accuracies)
     expected_mean_empirical_accuracies = np.array([0, 0])
     self.assertAllEqual(expected_mean_empirical_accuracies,
                         mean_empirical_accuracies)
     expected_num_examples = np.zeros(shape=num_bins)
     expected_num_examples[0] = 0
     expected_num_examples[num_bins - 1] = 3
     self.assertAllEqual(expected_num_examples, num_examples)
     self.assertEqual(ece, 0.75)
 def testCalibrationCurvePerfectCalibration(self):
     # Test calibration curve data when empirical accuracy corresponds to mean
     # predicted accuracy.
     num_bins = 2
     scores = np.array([0.25, 0.25, 0.25, 0.25, 0.75, 0.75, 0.75, 0.75])
     hits = np.array([1, 0, 0, 0, 1, 1, 1, 0])
     mean_predicted_accuracies, mean_empirical_accuracies, num_examples, ece = \
         calibration_processing.CalibrationCurve(scores, hits, num_bins)
     expected_mean_predicted_accuracies = np.array([0.25, 0.75])
     self.assertAllEqual(expected_mean_predicted_accuracies,
                         mean_predicted_accuracies)
     expected_mean_empirical_accuracies = np.array([0.25, 0.75])
     self.assertAllEqual(expected_mean_empirical_accuracies,
                         mean_empirical_accuracies)
     expected_num_examples = np.zeros(shape=num_bins)
     expected_num_examples[0] = 4
     expected_num_examples[num_bins - 1] = 4
     self.assertAllEqual(expected_num_examples, num_examples)
     self.assertEqual(ece, 0.0)
 def testCalibrationCurveEqualScoresAndHits(self):
     # Test calibration curve data when scores are equal to hits.
     # This is an edge case where scores are either 0 or 1.
     num_bins = 2
     scores = np.array([0, 1, 0])
     hits = scores
     mean_predicted_accuracies, mean_empirical_accuracies, num_examples, ece = \
         calibration_processing.CalibrationCurve(scores, hits, num_bins)
     expected_mean_predicted_accuracies = np.array([0.25, 0.75])
     self.assertAllEqual(expected_mean_predicted_accuracies,
                         mean_predicted_accuracies)
     expected_mean_empirical_accuracies = np.array([0, 1])
     self.assertAllEqual(expected_mean_empirical_accuracies,
                         mean_empirical_accuracies)
     expected_num_examples = np.zeros(shape=num_bins)
     expected_num_examples[0] = 2
     expected_num_examples[num_bins - 1] = 1
     self.assertAllEqual(expected_num_examples, num_examples)
     self.assertEqual(ece, 0.25)