Example #1
0
    def evaluate(self):
        """Computes evaluation result.

    Returns:
      A named tuple with the following fields -
        average_precision: a float number corresponding to average precision.
        precisions: an array of precisions.
        recalls: an array of recalls.
        recall@50: recall computed on 50 top-scoring samples.
        recall@100: recall computed on 100 top-scoring samples.
        median_rank@50: median rank computed on 50 top-scoring samples.
        median_rank@100: median rank computed on 100 top-scoring samples.
    """
        if self._num_gt_instances == 0:
            logging.warn('No ground truth instances')

        if not self._scores:
            scores = np.array([], dtype=float)
            tp_fp_labels = np.array([], dtype=bool)
        else:
            scores = np.concatenate(self._scores)
            tp_fp_labels = np.concatenate(self._tp_fp_labels)
            relation_field_values = np.concatenate(self._relation_field_values)

        for relation_field_value, _ in (
                self._num_gt_instances_per_relationship.iteritems()):
            precisions, recalls = metrics.compute_precision_recall(
                scores[relation_field_values == relation_field_value],
                tp_fp_labels[relation_field_values == relation_field_value],
                self._num_gt_instances_per_relationship[relation_field_value])
            self._average_precisions[
                relation_field_value] = metrics.compute_average_precision(
                    precisions, recalls)

        self._mean_average_precision = np.mean(
            self._average_precisions.values())

        self._precisions, self._recalls = metrics.compute_precision_recall(
            scores, tp_fp_labels, self._num_gt_instances)
        self._weighted_average_precision = metrics.compute_average_precision(
            self._precisions, self._recalls)

        self._recall_50 = (metrics.compute_recall_at_k(self._tp_fp_labels,
                                                       self._num_gt_instances,
                                                       50))
        self._median_rank_50 = (metrics.compute_median_rank_at_k(
            self._tp_fp_labels, 50))
        self._recall_100 = (metrics.compute_recall_at_k(
            self._tp_fp_labels, self._num_gt_instances, 100))
        self._median_rank_100 = (metrics.compute_median_rank_at_k(
            self._tp_fp_labels, 100))

        return VRDDetectionEvalMetrics(
            self._weighted_average_precision, self._mean_average_precision,
            self._average_precisions, self._precisions, self._recalls,
            self._recall_50, self._recall_100, self._median_rank_50,
            self._median_rank_100)
Example #2
0
  def test_compute_precision_recall(self):
    num_gt = 10
    scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
    labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
    labels_float_type = np.array([0, 1, 1, 0, 0, 1], dtype=float)
    accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
    expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
    expected_recall = accumulated_tp_count / num_gt

    precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
    precision_float_type, recall_float_type = metrics.compute_precision_recall(
        scores, labels_float_type, num_gt)

    self.assertAllClose(precision, expected_precision)
    self.assertAllClose(recall, expected_recall)
    self.assertAllClose(precision_float_type, expected_precision)
    self.assertAllClose(recall_float_type, expected_recall)
Example #3
0
 def test_compute_precision_recall_and_ap_no_groundtruth(self):
   num_gt = 0
   scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
   labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool)
   expected_precision = None
   expected_recall = None
   precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
   self.assertEquals(precision, expected_precision)
   self.assertEquals(recall, expected_recall)
   ap = metrics.compute_average_precision(precision, recall)
   self.assertTrue(np.isnan(ap))
Example #4
0
 def test_compute_precision_recall_float(self):
   num_gt = 10
   scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
   labels_float = np.array([0, 1, 1, 0.5, 0, 1], dtype=float)
   expected_precision = np.array(
       [0., 0.5, 0.33333333, 0.5, 0.55555556, 0.63636364], dtype=float)
   expected_recall = np.array([0., 0.1, 0.1, 0.2, 0.25, 0.35], dtype=float)
   precision, recall = metrics.compute_precision_recall(
       scores, labels_float, num_gt)
   self.assertAllClose(precision, expected_precision)
   self.assertAllClose(recall, expected_recall)
    def evaluate(self):
        """Compute evaluation result.

    Returns:
      A named tuple with the following fields -
        average_precision: float numpy array of average precision for
            each class.
        mean_ap: mean average precision of all classes, float scalar
        precisions: List of precisions, each precision is a float numpy
            array
        recalls: List of recalls, each recall is a float numpy array
        corloc: numpy float array
        mean_corloc: Mean CorLoc score for each class, float scalar
    """
        if (self.num_gt_instances_per_class == 0).any():
            logging.warn(
                'The following classes have no ground truth examples: %s',
                np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) +
                self.label_id_offset)

        if self.use_weighted_mean_ap:
            all_scores = np.array([], dtype=float)
            all_tp_fp_labels = np.array([], dtype=bool)
        for class_index in range(self.num_class):
            if self.num_gt_instances_per_class[class_index] == 0:
                continue
            if not self.scores_per_class[class_index]:
                scores = np.array([], dtype=float)
                tp_fp_labels = np.array([], dtype=float)
            else:
                scores = np.concatenate(self.scores_per_class[class_index])
                tp_fp_labels = np.concatenate(
                    self.tp_fp_labels_per_class[class_index])
            if self.use_weighted_mean_ap:
                all_scores = np.append(all_scores, scores)
                all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
            logging.info('Scores and tpfp per class label: %d', class_index)
            logging.info(tp_fp_labels)
            logging.info(scores)
            precision, recall = metrics.compute_precision_recall(
                scores, tp_fp_labels,
                self.num_gt_instances_per_class[class_index])
            self.precisions_per_class.append(precision)
            self.recalls_per_class.append(recall)
            average_precision = metrics.compute_average_precision(
                precision, recall)
            self.average_precision_per_class[class_index] = average_precision

        self.corloc_per_class = metrics.compute_cor_loc(
            self.num_gt_imgs_per_class,
            self.num_images_correctly_detected_per_class)

        if self.use_weighted_mean_ap:
            num_gt_instances = np.sum(self.num_gt_instances_per_class)
            precision, recall = metrics.compute_precision_recall(
                all_scores, all_tp_fp_labels, num_gt_instances)
            mean_ap = metrics.compute_average_precision(precision, recall)
        else:
            mean_ap = np.nanmean(self.average_precision_per_class)
        mean_corloc = np.nanmean(self.corloc_per_class)
        return ObjectDetectionEvalMetrics(self.average_precision_per_class,
                                          mean_ap, self.precisions_per_class,
                                          self.recalls_per_class,
                                          self.corloc_per_class, mean_corloc)