Ejemplo n.º 1
0
def ExportSegmentsToCOCO(image_ids,
                         detection_masks,
                         detection_scores,
                         detection_classes,
                         categories,
                         output_path=None):
  """Export segmentation masks in numpy arrays to COCO API.

  This function converts a set of predicted instance masks represented
  as numpy arrays to dictionaries that can be ingested by the COCO API.
  Inputs to this function are lists, consisting of segments, scores and
  classes, respectively, corresponding to each image for which detections
  have been produced.

  Note this function is recommended to use for small dataset.
  For large dataset, it should be used with a merge function
  (e.g. in map reduce), otherwise the memory consumption is large.

  We assume that for each image, masks, scores and classes are in
  correspondence --- that is: detection_masks[i, :, :, :], detection_scores[i]
  and detection_classes[i] are associated with the same detection.

  Args:
    image_ids: list of image ids (typically ints or strings)
    detection_masks: list of numpy arrays with shape [num_detection, h, w, 1]
      and type uint8. The height and width should match the shape of
      corresponding image.
    detection_scores: list of numpy arrays (float) with shape
      [num_detection]. Note that num_detection can be different
      for each entry in the list.
    detection_classes: list of numpy arrays (int) with shape
      [num_detection]. Note that num_detection can be different
      for each entry in the list.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list must have an integer 'id' key uniquely identifying
      this category.
    output_path: (optional) path for exporting result to JSON

  Returns:
    list of dictionaries that can be read by COCO API, where each entry
    corresponds to a single detection and has keys from:
    ['image_id', 'category_id', 'segmentation', 'score'].

  Raises:
    ValueError: if detection_masks and detection_classes do not have the
      right lengths or if each of the elements inside these lists do not
      have the correct shapes.
  """
  if not (len(image_ids) == len(detection_masks) == len(detection_scores) ==
          len(detection_classes)):
    raise ValueError('Input lists must have the same length')

  segment_export_list = []
  for image_id, masks, scores, classes in zip(image_ids, detection_masks,
                                              detection_scores,
                                              detection_classes):

    if len(classes.shape) != 1 or len(scores.shape) != 1:
      raise ValueError('All entries in detection_classes and detection_scores'
                       'expected to be of rank 1.')
    if len(masks.shape) != 4:
      raise ValueError('All entries in masks expected to be of '
                       'rank 4. Given {}'.format(masks.shape))

    num_boxes = classes.shape[0]
    if not num_boxes == masks.shape[0] == scores.shape[0]:
      raise ValueError('Corresponding entries in segment_classes, '
                       'detection_scores and detection_boxes should have '
                       'compatible shapes (i.e., agree on the 0th dimension).')

    category_id_set = set([cat['id'] for cat in categories])
    segment_export_list.extend(ExportSingleImageDetectionMasksToCoco(
        image_id, category_id_set, np.squeeze(masks, axis=3), scores, classes))

  if output_path:
    with tf.gfile.GFile(output_path, 'w') as fid:
      json_utils.Dump(segment_export_list, fid, float_digits=4, indent=2)
  return segment_export_list
Ejemplo n.º 2
0
def ExportKeypointsToCOCO(image_ids,
                          detection_keypoints,
                          detection_scores,
                          detection_classes,
                          categories,
                          output_path=None):
  """Exports keypoints in numpy arrays to COCO API.

  This function converts a set of predicted keypoints represented
  as numpy arrays to dictionaries that can be ingested by the COCO API.
  Inputs to this function are lists, consisting of keypoints, scores and
  classes, respectively, corresponding to each image for which detections
  have been produced.

  We assume that for each image, keypoints, scores and classes are in
  correspondence --- that is: detection_keypoints[i, :, :, :],
  detection_scores[i] and detection_classes[i] are associated with the same
  detection.

  Args:
    image_ids: list of image ids (typically ints or strings)
    detection_keypoints: list of numpy arrays with shape
      [num_detection, num_keypoints, 2] and type float32 in absolute
      x-y coordinates.
    detection_scores: list of numpy arrays (float) with shape
      [num_detection]. Note that num_detection can be different
      for each entry in the list.
    detection_classes: list of numpy arrays (int) with shape
      [num_detection]. Note that num_detection can be different
      for each entry in the list.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list must have an integer 'id' key uniquely identifying
      this category and an integer 'num_keypoints' key specifying the number of
      keypoints the category has.
    output_path: (optional) path for exporting result to JSON

  Returns:
    list of dictionaries that can be read by COCO API, where each entry
    corresponds to a single detection and has keys from:
    ['image_id', 'category_id', 'keypoints', 'score'].

  Raises:
    ValueError: if detection_keypoints and detection_classes do not have the
      right lengths or if each of the elements inside these lists do not
      have the correct shapes.
  """
  if not (len(image_ids) == len(detection_keypoints) ==
          len(detection_scores) == len(detection_classes)):
    raise ValueError('Input lists must have the same length')

  keypoints_export_list = []
  for image_id, keypoints, scores, classes in zip(
      image_ids, detection_keypoints, detection_scores, detection_classes):

    if len(classes.shape) != 1 or len(scores.shape) != 1:
      raise ValueError('All entries in detection_classes and detection_scores'
                       'expected to be of rank 1.')
    if len(keypoints.shape) != 3:
      raise ValueError('All entries in keypoints expected to be of '
                       'rank 3. Given {}'.format(keypoints.shape))

    num_boxes = classes.shape[0]
    if not num_boxes == keypoints.shape[0] == scores.shape[0]:
      raise ValueError('Corresponding entries in detection_classes, '
                       'detection_keypoints, and detection_scores should have '
                       'compatible shapes (i.e., agree on the 0th dimension).')

    category_id_set = set([cat['id'] for cat in categories])
    category_id_to_num_keypoints_map = {
        cat['id']: cat['num_keypoints'] for cat in categories
        if 'num_keypoints' in cat}

    for i in range(num_boxes):
      if classes[i] not in category_id_set:
        raise ValueError('class id should be in category_id_set\n')

      if classes[i] in category_id_to_num_keypoints_map:
        num_keypoints = category_id_to_num_keypoints_map[classes[i]]
        # Adds extra ones to indicate the visibility for each keypoint as is
        # recommended by MSCOCO.
        instance_keypoints = np.concatenate(
            [keypoints[i, 0:num_keypoints, :],
             np.expand_dims(np.ones(num_keypoints), axis=1)],
            axis=1).astype(int)

        instance_keypoints = instance_keypoints.flatten().tolist()
        keypoints_export_list.append({
            'image_id': image_id,
            'category_id': int(classes[i]),
            'keypoints': instance_keypoints,
            'score': float(scores[i])
        })

  if output_path:
    with tf.gfile.GFile(output_path, 'w') as fid:
      json_utils.Dump(keypoints_export_list, fid, float_digits=4, indent=2)
  return keypoints_export_list
Ejemplo n.º 3
0
def ExportDetectionsToCOCO(image_ids,
                           detection_boxes,
                           detection_scores,
                           detection_classes,
                           categories,
                           output_path=None):
  """Export detection annotations in numpy arrays to COCO API.

  This function converts a set of predicted detections represented
  as numpy arrays to dictionaries that can be ingested by the COCO API.
  Inputs to this function are lists, consisting of boxes, scores and
  classes, respectively, corresponding to each image for which detections
  have been produced.  Note that the image_ids provided here must
  match the ones given to the ExportGroundtruthToCOCO function in order
  for evaluation to work properly.

  We assume that for each image, boxes, scores and classes are in
  correspondence --- that is: detection_boxes[i, :], detection_scores[i] and
  detection_classes[i] are associated with the same detection.

  Args:
    image_ids: a list of unique image identifier either of type integer or
      string.
    detection_boxes: list of numpy arrays with shape [num_detection_boxes, 4]
    detection_scores: list of numpy arrays (float) with shape
      [num_detection_boxes]. Note that num_detection_boxes can be different
      for each entry in the list.
    detection_classes: list of numpy arrays (int) with shape
      [num_detection_boxes]. Note that num_detection_boxes can be different
      for each entry in the list.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list must have an integer 'id' key uniquely identifying
      this category.
    output_path: (optional) path for exporting result to JSON

  Returns:
    list of dictionaries that can be read by COCO API, where each entry
    corresponds to a single detection and has keys from:
    ['image_id', 'category_id', 'bbox', 'score'].
  Raises:
    ValueError: if (1) detection_boxes and detection_classes do not have the
      right lengths or (2) if each of the elements inside these lists do not
      have the correct shapes or (3) if image_ids are not integers.
  """
  category_id_set = set([cat['id'] for cat in categories])
  detections_export_list = []
  if not (len(image_ids) == len(detection_boxes) == len(detection_scores) ==
          len(detection_classes)):
    raise ValueError('Input lists must have the same length')
  for image_id, boxes, scores, classes in zip(image_ids, detection_boxes,
                                              detection_scores,
                                              detection_classes):
    detections_export_list.extend(ExportSingleImageDetectionBoxesToCoco(
        image_id,
        category_id_set,
        boxes,
        scores,
        classes))
  if output_path:
    with tf.gfile.GFile(output_path, 'w') as fid:
      json_utils.Dump(detections_export_list, fid, float_digits=4, indent=2)
  return detections_export_list
Ejemplo n.º 4
0
def ExportGroundtruthToCOCO(image_ids,
                            groundtruth_boxes,
                            groundtruth_classes,
                            categories,
                            output_path=None):
  """Export groundtruth detection annotations in numpy arrays to COCO API.

  This function converts a set of groundtruth detection annotations represented
  as numpy arrays to dictionaries that can be ingested by the COCO API.
  Inputs to this function are three lists: image ids for each groundtruth image,
  groundtruth boxes for each image and groundtruth classes respectively.
  Note that the image_ids provided here must match the ones given to the
  ExportDetectionsToCOCO function in order for evaluation to work properly.
  We assume that for each image, boxes, scores and classes are in
  correspondence --- that is: image_id[i], groundtruth_boxes[i, :] and
  groundtruth_classes[i] are associated with the same groundtruth annotation.

  In the exported result, "area" fields are always set to the area of the
  groundtruth bounding box and "iscrowd" fields are always set to 0.
  TODO(jonathanhuang): pass in "iscrowd" array for evaluating on COCO dataset.

  Args:
    image_ids: a list of unique image identifier either of type integer or
      string.
    groundtruth_boxes: list of numpy arrays with shape [num_gt_boxes, 4]
      (note that num_gt_boxes can be different for each entry in the list)
    groundtruth_classes: list of numpy arrays (int) with shape [num_gt_boxes]
      (note that num_gt_boxes can be different for each entry in the list)
    categories: a list of dictionaries representing all possible categories.
        Each dict in this list has the following keys:
          'id': (required) an integer id uniquely identifying this category
          'name': (required) string representing category name
            e.g., 'cat', 'dog', 'pizza'
          'supercategory': (optional) string representing the supercategory
            e.g., 'animal', 'vehicle', 'food', etc
    output_path: (optional) path for exporting result to JSON
  Returns:
    dictionary that can be read by COCO API
  Raises:
    ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
      right lengths or (2) if each of the elements inside these lists do not
      have the correct shapes or (3) if image_ids are not integers
  """
  category_id_set = set([cat['id'] for cat in categories])
  groundtruth_export_list = []
  image_export_list = []
  if not len(image_ids) == len(groundtruth_boxes) == len(groundtruth_classes):
    raise ValueError('Input lists must have the same length')

  # For reasons internal to the COCO API, it is important that annotation ids
  # are not equal to zero; we thus start counting from 1.
  annotation_id = 1
  for image_id, boxes, classes in zip(image_ids, groundtruth_boxes,
                                      groundtruth_classes):
    image_export_list.append({'id': image_id})
    groundtruth_export_list.extend(ExportSingleImageGroundtruthToCoco(
        image_id,
        annotation_id,
        category_id_set,
        boxes,
        classes))
    num_boxes = classes.shape[0]
    annotation_id += num_boxes

  groundtruth_dict = {
      'annotations': groundtruth_export_list,
      'images': image_export_list,
      'categories': categories
  }
  if output_path:
    with tf.gfile.GFile(output_path, 'w') as fid:
      json_utils.Dump(groundtruth_dict, fid, float_digits=4, indent=2)
  return groundtruth_dict
Ejemplo n.º 5
0
 def testDumpUnspecifiedPrecision(self):
     output_path = os.path.join(tf.test.get_temp_dir(), 'test.json')
     with tf.gfile.GFile(output_path, 'w') as f:
         json_utils.Dump(1.012345, f)
     with tf.gfile.GFile(output_path, 'r') as f:
         self.assertEqual(f.read(), '1.012345')
Ejemplo n.º 6
0
 def testDumpZeroPrecision(self):
     output_path = os.path.join(tf.test.get_temp_dir(), 'test.json')
     with tf.gfile.GFile(output_path, 'w') as f:
         json_utils.Dump(1.0, f, float_digits=0, indent=3)
     with tf.gfile.GFile(output_path, 'r') as f:
         self.assertEqual(f.read(), '1')
Ejemplo n.º 7
0
 def testDumpPassExtraParams(self):
     output_path = os.path.join(tf.test.get_temp_dir(), 'test.json')
     with tf.gfile.GFile(output_path, 'w') as f:
         json_utils.Dump([1.0], f, float_digits=2, indent=3)
     with tf.gfile.GFile(output_path, 'r') as f:
         self.assertEqual(f.read(), '[\n   1.00\n]')