def evaluate(self):
    """Computes evaluation result.

    Returns:
      A named tuple with the following fields -
        average_precision: a float number corresponding to average precision.
        precisions: an array of precisions.
        recalls: an array of recalls.
        recall@50: recall computed on 50 top-scoring samples.
        recall@100: recall computed on 100 top-scoring samples.
        median_rank@50: median rank computed on 50 top-scoring samples.
        median_rank@100: median rank computed on 100 top-scoring samples.
    """
    if self._num_gt_instances == 0:
      logging.warn('No ground truth instances')

    if not self._scores:
      scores = np.array([], dtype=float)
      tp_fp_labels = np.array([], dtype=bool)
    else:
      scores = np.concatenate(self._scores)
      tp_fp_labels = np.concatenate(self._tp_fp_labels)
      relation_field_values = np.concatenate(self._relation_field_values)

    for relation_field_value, _ in (
        self._num_gt_instances_per_relationship.iteritems()):
      precisions, recalls = metrics.compute_precision_recall(
          scores[relation_field_values == relation_field_value],
          tp_fp_labels[relation_field_values == relation_field_value],
          self._num_gt_instances_per_relationship[relation_field_value])
      self._average_precisions[
          relation_field_value] = metrics.compute_average_precision(
              precisions, recalls)

    self._mean_average_precision = np.mean(self._average_precisions.values())

    self._precisions, self._recalls = metrics.compute_precision_recall(
        scores, tp_fp_labels, self._num_gt_instances)
    self._weighted_average_precision = metrics.compute_average_precision(
        self._precisions, self._recalls)

    self._recall_50 = (
        metrics.compute_recall_at_k(self._tp_fp_labels, self._num_gt_instances,
                                    50))
    self._median_rank_50 = (
        metrics.compute_median_rank_at_k(self._tp_fp_labels, 50))
    self._recall_100 = (
        metrics.compute_recall_at_k(self._tp_fp_labels, self._num_gt_instances,
                                    100))
    self._median_rank_100 = (
        metrics.compute_median_rank_at_k(self._tp_fp_labels, 100))

    return VRDDetectionEvalMetrics(
        self._weighted_average_precision, self._mean_average_precision,
        self._average_precisions, self._precisions, self._recalls,
        self._recall_50, self._recall_100, self._median_rank_50,
        self._median_rank_100)
 def test_compute_precision_recall(self):
   num_gt = 10
   scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
   labels = np.array([0, 1, 1, 0, 0, 1], dtype=bool)
   accumulated_tp_count = np.array([0, 1, 1, 2, 2, 3], dtype=float)
   expected_precision = accumulated_tp_count / np.array([1, 2, 3, 4, 5, 6])
   expected_recall = accumulated_tp_count / num_gt
   precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
   self.assertAllClose(precision, expected_precision)
   self.assertAllClose(recall, expected_recall)
 def test_compute_precision_recall_and_ap_no_groundtruth(self):
   num_gt = 0
   scores = np.array([0.4, 0.3, 0.6, 0.2, 0.7, 0.1], dtype=float)
   labels = np.array([0, 0, 0, 0, 0, 0], dtype=bool)
   expected_precision = None
   expected_recall = None
   precision, recall = metrics.compute_precision_recall(scores, labels, num_gt)
   self.assertEquals(precision, expected_precision)
   self.assertEquals(recall, expected_recall)
   ap = metrics.compute_average_precision(precision, recall)
   self.assertTrue(np.isnan(ap))
    def evaluate(self):
        """Compute evaluation result.

    Returns:
      average_precision_per_class: float numpy array of average precision for
          each class.
      mean_ap: mean average precision of all classes, float scalar
      precisions_per_class: List of precisions, each precision is a float numpy
          array
      recalls_per_class: List of recalls, each recall is a float numpy array
      corloc_per_class: numpy float array
      mean_corloc: Mean CorLoc score for each class, float scalar
    """
        if (self.num_gt_instances_per_class == 0).any():
            logging.warn(
                'The following classes have no ground truth examples: %s',
                np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)))
        for class_index in range(self.num_class):
            if self.num_gt_instances_per_class[class_index] == 0:
                continue
            scores = np.concatenate(self.scores_per_class[class_index])
            tp_fp_labels = np.concatenate(
                self.tp_fp_labels_per_class[class_index])
            precision, recall = metrics.compute_precision_recall(
                scores, tp_fp_labels,
                self.num_gt_instances_per_class[class_index])
            self.precisions_per_class.append(precision)
            self.recalls_per_class.append(recall)
            average_precision = metrics.compute_average_precision(
                precision, recall)
            self.average_precision_per_class[class_index] = average_precision

        self.corloc_per_class = metrics.compute_cor_loc(
            self.num_gt_imgs_per_class,
            self.num_images_correctly_detected_per_class)

        mean_ap = np.nanmean(self.average_precision_per_class)
        mean_corloc = np.nanmean(self.corloc_per_class)
        return (self.average_precision_per_class, mean_ap,
                self.precisions_per_class, self.recalls_per_class,
                self.corloc_per_class, mean_corloc)
    def evaluate(self):
        """Compute evaluation result.

        Returns:
          A named tuple with the following fields -
            average_precision: float numpy array of average precision for
                each class.
            mean_ap: mean average precision of all classes, float scalar
            precisions: List of precisions, each precision is a float numpy
                array
            recalls: List of recalls, each recall is a float numpy array
            corloc: numpy float array
            mean_corloc: Mean CorLoc score for each class, float scalar
        """
        if (self.num_gt_instances_per_class == 0).any():
            logging.warn(
                'The following classes have no ground truth examples: %s',
                np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) +
                self.label_id_offset)

        if self.use_weighted_mean_ap:
            all_scores = np.array([], dtype=float)
            all_tp_fp_labels = np.array([], dtype=bool)
        for class_index in range(self.num_class):
            if self.num_gt_instances_per_class[class_index] == 0:
                continue
            if not self.scores_per_class[class_index]:
                scores = np.array([], dtype=float)
                tp_fp_labels = np.array([], dtype=float)
            else:
                scores = np.concatenate(self.scores_per_class[class_index])
                tp_fp_labels = np.concatenate(
                    self.tp_fp_labels_per_class[class_index])
            if self.use_weighted_mean_ap:
                all_scores = np.append(all_scores, scores)
                all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
            logging.info('Scores and tpfp per class label: %d', class_index)
            logging.info(tp_fp_labels)
            logging.info(scores)
            precision, recall = metrics.compute_precision_recall(
                scores, tp_fp_labels,
                self.num_gt_instances_per_class[class_index])
            self.precisions_per_class.append(precision)
            self.recalls_per_class.append(recall)
            average_precision = metrics.compute_average_precision(
                precision, recall)
            self.average_precision_per_class[class_index] = average_precision

        self.corloc_per_class = metrics.compute_cor_loc(
            self.num_gt_imgs_per_class,
            self.num_images_correctly_detected_per_class)

        if self.use_weighted_mean_ap:
            num_gt_instances = np.sum(self.num_gt_instances_per_class)
            precision, recall = metrics.compute_precision_recall(
                all_scores, all_tp_fp_labels, num_gt_instances)
            mean_ap = metrics.compute_average_precision(precision, recall)
        else:
            mean_ap = np.nanmean(self.average_precision_per_class)
        mean_corloc = np.nanmean(self.corloc_per_class)
        return ObjectDetectionEvalMetrics(self.average_precision_per_class,
                                          mean_ap, self.precisions_per_class,
                                          self.recalls_per_class,
                                          self.corloc_per_class, mean_corloc)