Esempio n. 1
0
    def evaluate(self):
        """Computes evaluation result.

    Returns:
      A named tuple with the following fields -
        average_precision: a float number corresponding to average precision.
        precisions: an array of precisions.
        recalls: an array of recalls.
        recall@50: recall computed on 50 top-scoring samples.
        recall@100: recall computed on 100 top-scoring samples.
        median_rank@50: median rank computed on 50 top-scoring samples.
        median_rank@100: median rank computed on 100 top-scoring samples.
    """
        if self._num_gt_instances == 0:
            logging.warning('No ground truth instances')

        if not self._scores:
            scores = np.array([], dtype=float)
            tp_fp_labels = np.array([], dtype=bool)
        else:
            scores = np.concatenate(self._scores)
            tp_fp_labels = np.concatenate(self._tp_fp_labels)
            relation_field_values = np.concatenate(self._relation_field_values)

        for relation_field_value, _ in (six.iteritems(
                self._num_gt_instances_per_relationship)):
            precisions, recalls = metrics.compute_precision_recall(
                scores[relation_field_values == relation_field_value],
                tp_fp_labels[relation_field_values == relation_field_value],
                self._num_gt_instances_per_relationship[relation_field_value])
            self._average_precisions[
                relation_field_value] = metrics.compute_average_precision(
                    precisions, recalls)

        self._mean_average_precision = np.mean(
            list(self._average_precisions.values()))

        self._precisions, self._recalls = metrics.compute_precision_recall(
            scores, tp_fp_labels, self._num_gt_instances)
        self._weighted_average_precision = metrics.compute_average_precision(
            self._precisions, self._recalls)

        self._recall_50 = (metrics.compute_recall_at_k(self._tp_fp_labels,
                                                       self._num_gt_instances,
                                                       50))
        self._median_rank_50 = (metrics.compute_median_rank_at_k(
            self._tp_fp_labels, 50))
        self._recall_100 = (metrics.compute_recall_at_k(
            self._tp_fp_labels, self._num_gt_instances, 100))
        self._median_rank_100 = (metrics.compute_median_rank_at_k(
            self._tp_fp_labels, 100))

        return VRDDetectionEvalMetrics(
            self._weighted_average_precision, self._mean_average_precision,
            self._average_precisions, self._precisions, self._recalls,
            self._recall_50, self._recall_100, self._median_rank_50,
            self._median_rank_100)
Esempio n. 2
0
  def evaluate(self):
    """Computes evaluation result.

    Returns:
      A named tuple with the following fields -
        average_precision: a float number corresponding to average precision.
        precisions: an array of precisions.
        recalls: an array of recalls.
        recall@50: recall computed on 50 top-scoring samples.
        recall@100: recall computed on 100 top-scoring samples.
        median_rank@50: median rank computed on 50 top-scoring samples.
        median_rank@100: median rank computed on 100 top-scoring samples.
    """
    if self._num_gt_instances == 0:
      logging.warn('No ground truth instances')

    if not self._scores:
      scores = np.array([], dtype=float)
      tp_fp_labels = np.array([], dtype=bool)
    else:
      scores = np.concatenate(self._scores)
      tp_fp_labels = np.concatenate(self._tp_fp_labels)
      relation_field_values = np.concatenate(self._relation_field_values)

    for relation_field_value, _ in (
        self._num_gt_instances_per_relationship.iteritems()):
      precisions, recalls = metrics.compute_precision_recall(
          scores[relation_field_values == relation_field_value],
          tp_fp_labels[relation_field_values == relation_field_value],
          self._num_gt_instances_per_relationship[relation_field_value])
      self._average_precisions[
          relation_field_value] = metrics.compute_average_precision(
              precisions, recalls)

    self._mean_average_precision = np.mean(self._average_precisions.values())

    self._precisions, self._recalls = metrics.compute_precision_recall(
        scores, tp_fp_labels, self._num_gt_instances)
    self._weighted_average_precision = metrics.compute_average_precision(
        self._precisions, self._recalls)

    self._recall_50 = (
        metrics.compute_recall_at_k(self._tp_fp_labels, self._num_gt_instances,
                                    50))
    self._median_rank_50 = (
        metrics.compute_median_rank_at_k(self._tp_fp_labels, 50))
    self._recall_100 = (
        metrics.compute_recall_at_k(self._tp_fp_labels, self._num_gt_instances,
                                    100))
    self._median_rank_100 = (
        metrics.compute_median_rank_at_k(self._tp_fp_labels, 100))

    return VRDDetectionEvalMetrics(
        self._weighted_average_precision, self._mean_average_precision,
        self._average_precisions, self._precisions, self._recalls,
        self._recall_50, self._recall_100, self._median_rank_50,
        self._median_rank_100)
Esempio n. 3
0
  def test_compute_median_rank_at_k(self):
    tp_fp = [
        np.array([1, 0, 0], dtype=float),
        np.array([0, 0.1], dtype=float),
        np.array([0, 0, 0, 0, 0], dtype=float)
    ]
    tp_fp_bool = [
        np.array([True, False, False], dtype=bool),
        np.array([False, True], dtype=float),
        np.array([False, False, False, False, False], dtype=float)
    ]

    median_ranks_1 = metrics.compute_median_rank_at_k(tp_fp, 1)
    median_ranks_3 = metrics.compute_median_rank_at_k(tp_fp, 3)
    median_ranks_5 = metrics.compute_median_rank_at_k(tp_fp, 5)
    median_ranks_3_bool = metrics.compute_median_rank_at_k(tp_fp_bool, 3)

    self.assertEquals(median_ranks_1, 0)
    self.assertEquals(median_ranks_3, 0.5)
    self.assertEquals(median_ranks_3_bool, 0.5)
    self.assertEquals(median_ranks_5, 0.5)
Esempio n. 4
0
  def test_compute_median_rank_at_k(self):
    tp_fp = [
        np.array([1, 0, 0], dtype=float),
        np.array([0, 0.1], dtype=float),
        np.array([0, 0, 0, 0, 0], dtype=float)
    ]
    tp_fp_bool = [
        np.array([True, False, False], dtype=bool),
        np.array([False, True], dtype=float),
        np.array([False, False, False, False, False], dtype=float)
    ]

    median_ranks_1 = metrics.compute_median_rank_at_k(tp_fp, 1)
    median_ranks_3 = metrics.compute_median_rank_at_k(tp_fp, 3)
    median_ranks_5 = metrics.compute_median_rank_at_k(tp_fp, 5)
    median_ranks_3_bool = metrics.compute_median_rank_at_k(tp_fp_bool, 3)

    self.assertEquals(median_ranks_1, 0)
    self.assertEquals(median_ranks_3, 0.5)
    self.assertEquals(median_ranks_3_bool, 0.5)
    self.assertEquals(median_ranks_5, 0.5)