def evaluate(self):
        """Evaluates the detection boxes and returns a dictionary of coco metrics.

    Returns:
      A dictionary holding -

      1. summary_metrics:
      'DetectionBoxes_Precision/mAP': mean average precision over classes
        averaged over IOU thresholds ranging from .5 to .95 with .05
        increments.
      'DetectionBoxes_Precision/[email protected]': mean average precision at 50% IOU
      'DetectionBoxes_Precision/[email protected]': mean average precision at 75% IOU
      'DetectionBoxes_Precision/mAP (small)': mean average precision for small
        objects (area < 32^2 pixels).
      'DetectionBoxes_Precision/mAP (medium)': mean average precision for
        medium sized objects (32^2 pixels < area < 96^2 pixels).
      'DetectionBoxes_Precision/mAP (large)': mean average precision for large
        objects (96^2 pixels < area < 10000^2 pixels).
      'DetectionBoxes_Recall/AR@1': average recall with 1 detection.
      'DetectionBoxes_Recall/AR@10': average recall with 10 detections.
      'DetectionBoxes_Recall/AR@100': average recall with 100 detections.
      'DetectionBoxes_Recall/AR@100 (small)': average recall for small objects
        with 100.
      'DetectionBoxes_Recall/AR@100 (medium)': average recall for medium objects
        with 100.
      'DetectionBoxes_Recall/AR@100 (large)': average recall for large objects
        with 100 detections.

      2. per_category_ap: if include_metrics_per_category is True, category
      specific results with keys of the form:
      'Precision mAP ByCategory/category' (without the supercategory part if
      no supercategories exist). For backward compatibility
      'PerformanceByCategory' is included in the output regardless of
      all_metrics_per_category.
    """
        groundtruth_dict = {
            'annotations': self._groundtruth_list,
            'images': [{
                'id': image_id
            } for image_id in self._image_ids],
            'categories': self._categories
        }
        coco_wrapped_groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
        coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
            self._detection_boxes_list)
        box_evaluator = coco_tools.COCOEvalWrapper(coco_wrapped_groundtruth,
                                                   coco_wrapped_detections,
                                                   agnostic_mode=False)
        box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
            include_metrics_per_category=self._include_metrics_per_category,
            all_metrics_per_category=self._all_metrics_per_category)
        box_metrics.update(box_per_category_ap)
        box_metrics = {
            'DetectionBoxes_' + key: value
            for key, value in iter(box_metrics.items())
        }
        return box_metrics
  def evaluate(self):
    """Evaluates the detection masks and returns a dictionary of coco metrics.

    Returns:
      A dictionary holding -

      1. summary_metrics:
      'Precision/mAP': mean average precision over classes averaged over IOU
        thresholds ranging from .5 to .95 with .05 increments
      'Precision/[email protected]': mean average precision at 50% IOU
      'Precision/[email protected]': mean average precision at 75% IOU
      'Precision/mAP (small)': mean average precision for small objects
                      (area < 32^2 pixels)
      'Precision/mAP (medium)': mean average precision for medium sized
                      objects (32^2 pixels < area < 96^2 pixels)
      'Precision/mAP (large)': mean average precision for large objects
                      (96^2 pixels < area < 10000^2 pixels)
      'Recall/AR@1': average recall with 1 detection
      'Recall/AR@10': average recall with 10 detections
      'Recall/AR@100': average recall with 100 detections
      'Recall/AR@100 (small)': average recall for small objects with 100
        detections
      'Recall/AR@100 (medium)': average recall for medium objects with 100
        detections
      'Recall/AR@100 (large)': average recall for large objects with 100
        detections

      2. per_category_ap: if include_metrics_per_category is True, category
      specific results with keys of the form:
      'Precision mAP ByCategory/category' (without the supercategory part if
      no supercategories exist). For backward compatibility
      'PerformanceByCategory' is included in the output regardless of
      all_metrics_per_category.
    """
    groundtruth_dict = {
        'annotations': self._groundtruth_list,
        'images': [{'id': image_id, 'height': shape[1], 'width': shape[2]}
                   for image_id, shape in self._image_id_to_mask_shape_map.
                   iteritems()],
        'categories': self._categories
    }
    coco_wrapped_groundtruth = coco_tools.COCOWrapper(
        groundtruth_dict, detection_type='segmentation')
    coco_wrapped_detection_masks = coco_wrapped_groundtruth.LoadAnnotations(
        self._detection_masks_list)
    mask_evaluator = coco_tools.COCOEvalWrapper(
        coco_wrapped_groundtruth, coco_wrapped_detection_masks,
        agnostic_mode=False, iou_type='segm')
    mask_metrics, mask_per_category_ap = mask_evaluator.ComputeMetrics(
        include_metrics_per_category=self._include_metrics_per_category)
    mask_metrics.update(mask_per_category_ap)
    mask_metrics = {'DetectionMasks_'+ key: value
                    for key, value in mask_metrics.iteritems()}
    return mask_metrics
 def testCocoWrappers(self):
     groundtruth = coco_tools.COCOWrapper(self._groundtruth_dict)
     detections = groundtruth.LoadAnnotations(self._detections_list)
     evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections)
     summary_metrics, _ = evaluator.ComputeMetrics()
     self.assertAlmostEqual(1.0, summary_metrics['Precision/mAP'])