def test_merge_internal_state(self):
        # Test that if initial state is merged, the results of the evaluation are
        # the same.
        od_eval_state = self.od_eval.get_internal_state()
        copy_od_eval = object_detection_evaluation.ObjectDetectionEvaluation(
            self.od_eval.num_class)
        copy_od_eval.merge_internal_state(od_eval_state)

        (average_precision_per_class, mean_ap, precisions_per_class,
         recalls_per_class, corloc_per_class,
         mean_corloc) = self.od_eval.evaluate()

        (copy_average_precision_per_class, copy_mean_ap,
         copy_precisions_per_class, copy_recalls_per_class,
         copy_corloc_per_class, copy_mean_corloc) = copy_od_eval.evaluate()

        for i in range(self.od_eval.num_class):
            self.assertTrue(
                np.allclose(copy_precisions_per_class[i],
                            precisions_per_class[i]))
            self.assertTrue(
                np.allclose(copy_recalls_per_class[i], recalls_per_class[i]))
        self.assertTrue(
            np.allclose(copy_average_precision_per_class,
                        average_precision_per_class))
        self.assertTrue(np.allclose(copy_corloc_per_class, corloc_per_class))
        self.assertAlmostEqual(copy_mean_ap, mean_ap)
        self.assertAlmostEqual(copy_mean_corloc, mean_corloc)
Example #2
0
    def setUp(self):
        num_groundtruth_classes = 3
        self.od_eval = object_detection_evaluation.ObjectDetectionEvaluation(
            num_groundtruth_classes)

        image_key1 = "img1"
        groundtruth_boxes1 = np.array([[0, 0, 1, 1], [0, 0, 2, 2], [0, 0, 3, 3]],
                                      dtype=float)
        groundtruth_class_labels1 = np.array([0, 2, 0], dtype=int)
        self.od_eval.add_single_ground_truth_image_info(
            image_key1, groundtruth_boxes1, groundtruth_class_labels1)
        image_key2 = "img2"
        groundtruth_boxes2 = np.array([[10, 10, 11, 11], [500, 500, 510, 510],
                                       [10, 10, 12, 12]], dtype=float)
        groundtruth_class_labels2 = np.array([0, 0, 2], dtype=int)
        groundtruth_is_difficult_list2 = np.array([False, True, False], dtype=bool)
        self.od_eval.add_single_ground_truth_image_info(
            image_key2, groundtruth_boxes2, groundtruth_class_labels2,
            groundtruth_is_difficult_list2)
        image_key3 = "img3"
        groundtruth_boxes3 = np.array([[0, 0, 1, 1]], dtype=float)
        groundtruth_class_labels3 = np.array([1], dtype=int)
        self.od_eval.add_single_ground_truth_image_info(
            image_key3, groundtruth_boxes3, groundtruth_class_labels3)

        image_key = "img2"
        detected_boxes = np.array(
            [[10, 10, 11, 11], [100, 100, 120, 120], [100, 100, 220, 220]],
            dtype=float)
        detected_class_labels = np.array([0, 0, 2], dtype=int)
        detected_scores = np.array([0.7, 0.8, 0.9], dtype=float)
        self.od_eval.add_single_detected_image_info(
            image_key, detected_boxes, detected_scores, detected_class_labels)
Example #3
0
    def _build_od_evaluation(self):
        # per image evaluation is not done inside of accumulator
        def _per_image_eval_class(**kwargs):  # pylint: disable=unused-argument
            return None

        self._od_evaluation = od_eval.ObjectDetectionEvaluation(
            num_groundtruth_classes=self.num_classes,
            label_id_offset=1,
            use_weighted_mean_ap=self.use_weighted_mean_ap,
            per_image_eval_class=_per_image_eval_class)
    def compute_od_eval(ground_truth_labels, prediction_labels, nb_classes):
        # Lazy import of TFOD
        from object_detection.utils import object_detection_evaluation

        matching_iou_threshold = 0.5
        od_eval = object_detection_evaluation.ObjectDetectionEvaluation(
            nb_classes, matching_iou_threshold=matching_iou_threshold)
        image_key = 'image'
        od_eval.add_single_ground_truth_image_info(
            image_key, ground_truth_labels.get_npboxes(),
            ground_truth_labels.get_class_ids() - 1)
        od_eval.add_single_detected_image_info(
            image_key, prediction_labels.get_npboxes(),
            prediction_labels.get_scores(),
            prediction_labels.get_class_ids() - 1)
        od_eval.evaluate()
        return od_eval
Example #5
0
def evaluate_detection_results_pascal_voc(result_lists,
                                          categories,
                                          label_id_offset=0,
                                          iou_thres=0.5,
                                          corloc_summary=False):
    """Computes Pascal VOC detection metrics given groundtruth and detections.

  This function computes Pascal VOC metrics. This function by default
  takes detections and groundtruth boxes encoded in result_lists and writes
  evaluation results to tf summaries which can be viewed on tensorboard.

  Args:
    result_lists: a dictionary holding lists of groundtruth and detection
      data corresponding to each image being evaluated.  The following keys
      are required:
        'image_id': a list of string ids
        'detection_boxes': a list of float32 numpy arrays of shape [N, 4]
        'detection_scores': a list of float32 numpy arrays of shape [N]
        'detection_classes': a list of int32 numpy arrays of shape [N]
        'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4]
        'groundtruth_classes': a list of int32 numpy arrays of shape [M]
      and the remaining fields below are optional:
        'difficult': a list of boolean arrays of shape [M] indicating the
          difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide
          this information and it is used to remove difficult examples from eval
          in order to not penalize the models on them.
      Note that it is okay to have additional fields in result_lists --- they
      are simply ignored.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list has the following keys:
          'id': (required) an integer id uniquely identifying this category
          'name': (required) string representing category name
            e.g., 'cat', 'dog', 'pizza'
    label_id_offset: an integer offset for the label space.
    iou_thres: float determining the IoU threshold at which a box is considered
        correct. Defaults to the standard 0.5.
    corloc_summary: boolean. If True, also outputs CorLoc metrics.

  Returns:
    A dictionary of metric names to scalar values.

  Raises:
    ValueError: if the set of keys in result_lists is not a superset of the
      expected list of keys.  Unexpected keys are ignored.
    ValueError: if the lists in result_lists have inconsistent sizes.
  """
    # check for expected keys in result_lists
    expected_keys = [
        'detection_boxes', 'detection_scores', 'detection_classes', 'image_id'
    ]
    expected_keys += ['groundtruth_boxes', 'groundtruth_classes']
    if not set(expected_keys).issubset(set(result_lists.keys())):
        raise ValueError('result_lists does not have expected key set.')
    num_results = len(result_lists[expected_keys[0]])
    for key in expected_keys:
        if len(result_lists[key]) != num_results:
            raise ValueError('Inconsistent list sizes in result_lists')

    # Pascal VOC evaluator assumes foreground index starts from zero.
    categories = copy.deepcopy(categories)
    for idx in range(len(categories)):
        categories[idx]['id'] -= label_id_offset

    # num_classes (maybe encoded as categories)
    num_classes = max([cat['id'] for cat in categories]) + 1
    logging.info('Computing Pascal VOC metrics on results.')
    if all(image_id.isdigit() for image_id in result_lists['image_id']):
        image_ids = [int(image_id) for image_id in result_lists['image_id']]
    else:
        image_ids = range(num_results)

    evaluator = object_detection_evaluation.ObjectDetectionEvaluation(
        num_classes, matching_iou_threshold=iou_thres)

    difficult_lists = None
    if 'difficult' in result_lists and result_lists['difficult']:
        difficult_lists = result_lists['difficult']
    for idx, image_id in enumerate(image_ids):
        difficult = None
        if difficult_lists is not None and difficult_lists[idx].size:
            difficult = difficult_lists[idx].astype(np.bool)
        evaluator.add_single_ground_truth_image_info(
            image_id, result_lists['groundtruth_boxes'][idx],
            result_lists['groundtruth_classes'][idx] - label_id_offset,
            difficult)
        evaluator.add_single_detected_image_info(
            image_id, result_lists['detection_boxes'][idx],
            result_lists['detection_scores'][idx],
            result_lists['detection_classes'][idx] - label_id_offset)
    per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = (
        evaluator.evaluate())

    metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap}
    category_index = label_map_util.create_category_index(categories)
    for idx in range(per_class_ap.size):
        if idx in category_index:
            display_name = ('PerformanceByCategory/mAP@{}IOU/{}'.format(
                iou_thres, category_index[idx]['name']))
            metrics[display_name] = per_class_ap[idx]

    if corloc_summary:
        metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc
        for idx in range(per_class_corloc.size):
            if idx in category_index:
                display_name = ('PerformanceByCategory/CorLoc@{}IOU/{}'.format(
                    iou_thres, category_index[idx]['name']))
                metrics[display_name] = per_class_corloc[idx]
    return metrics
 def test_value_error_on_zero_classes(self):
     with self.assertRaises(ValueError):
         object_detection_evaluation.ObjectDetectionEvaluation(
             num_groundtruth_classes=0)