Exemple #1
0
  def extract_images_and_targets(read_data):
    """Extract images and targets from the input dict."""
    image = read_data[fields.InputDataFields.image]
    key = ''
    if fields.InputDataFields.source_id in read_data:
      key = read_data[fields.InputDataFields.source_id]
    location_gt = read_data[fields.InputDataFields.groundtruth_boxes]
    classes_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_classes],
                         tf.int32)
    classes_gt -= label_id_offset

    if merge_multiple_label_boxes and use_multiclass_scores:
      raise ValueError(
          'Using both merge_multiple_label_boxes and use_multiclass_scores is'
          'not supported'
      )

    if merge_multiple_label_boxes:
      location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
          location_gt, classes_gt, num_classes)
    elif use_multiclass_scores:
      classes_gt = tf.cast(read_data[fields.InputDataFields.multiclass_scores],
                           tf.float32)
    else:
      classes_gt = util_ops.padded_one_hot_encoding(
          indices=classes_gt, depth=num_classes, left_pad=0)
    masks_gt = read_data.get(fields.InputDataFields.groundtruth_instance_masks)
    keypoints_gt = read_data.get(fields.InputDataFields.groundtruth_keypoints)
    if (merge_multiple_label_boxes and (
        masks_gt is not None or keypoints_gt is not None)):
      raise NotImplementedError('Multi-label support is only for boxes.')
    weights_gt = read_data.get(
        fields.InputDataFields.groundtruth_weights)
    return (image, key, location_gt, classes_gt, masks_gt, keypoints_gt,
            weights_gt)
Exemple #2
0
 def extract_images_and_targets(read_data):
     """Extract images and targets from the input dict."""
     image = read_data[fields.InputDataFields.image]
     key = ''
     if fields.InputDataFields.source_id in read_data:
         key = read_data[fields.InputDataFields.source_id]
     location_gt = read_data[fields.InputDataFields.groundtruth_boxes]
     classes_gt = tf.cast(
         read_data[fields.InputDataFields.groundtruth_classes], tf.int32)
     classes_gt -= label_id_offset
     if merge_multiple_label_boxes:
         location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
             location_gt, classes_gt, num_classes)
     else:
         classes_gt = util_ops.padded_one_hot_encoding(indices=classes_gt,
                                                       depth=num_classes,
                                                       left_pad=0)
     masks_gt = read_data.get(
         fields.InputDataFields.groundtruth_instance_masks)
     keypoints_gt = read_data.get(
         fields.InputDataFields.groundtruth_keypoints)
     if (merge_multiple_label_boxes
             and (masks_gt is not None or keypoints_gt is not None)):
         raise NotImplementedError('Multi-label support is only for boxes.')
     return image, key, location_gt, classes_gt, masks_gt, keypoints_gt
Exemple #3
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False):
    # if fields.InputDataFields.groundtruth_boxes in tensor_dict:
    #   tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
    #     tensor_dict)
    if fields.InputDataFields.image_additional_channels in tensor_dict:
        channels = tensor_dict[
            fields.InputDataFields.image_additional_channels]
        tensor_dict[fields.InputDataFields.image] = tf.concat(
            [tensor_dict[fields.InputDataFields.image], channels], axis=2)

    # Apply data augmentation ops.
    if data_augmentation_fn is not None:
        tensor_dict = data_augmentation_fn(tensor_dict)

    # Apply model preprocessing ops and resize instance masks.
    query = tensor_dict['query']
    preprocessed_resized_image, true_image_shape = resize_image(
        query, new_height=FLAGS.im_size, new_width=FLAGS.im_size)
    tensor_dict['query'] = preprocessed_resized_image
    tensor_dict['query_shape'] = true_image_shape

    ref = tensor_dict['ref']
    preprocessed_resized_image, true_image_shape = resize_image(
        ref, new_height=FLAGS.im_size, new_width=FLAGS.im_size)
    tensor_dict['ref'] = preprocessed_resized_image
    tensor_dict[fields.InputDataFields.true_image_shape] = true_image_shape

    if retain_original_image:
        tensor_dict[fields.InputDataFields.
                    original_image] = tf.image.convert_image_dtype(
                        tensor_dict['ref'][0] / 2 + 0.5, tf.uint8)

    # Transform groundtruth classes to one hot encodings.
    zero_indexed_groundtruth_classes = tensor_dict[
        fields.InputDataFields.groundtruth_classes]
    tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
        zero_indexed_groundtruth_classes, num_classes)

    if merge_multiple_boxes:
        merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels(
            tensor_dict[fields.InputDataFields.groundtruth_boxes],
            zero_indexed_groundtruth_classes, num_classes)
        merged_classes = tf.cast(merged_classes, tf.float32)
        tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
        tensor_dict[
            fields.InputDataFields.groundtruth_classes] = merged_classes

    return tensor_dict
Exemple #4
0
 def testMergeBoxesWithEmptyInputs(self):
     boxes = tf.constant([[]])
     class_indices = tf.constant([])
     num_classes = 5
     merged_boxes, merged_classes, merged_box_indices = (
         ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes))
     with self.test_session() as sess:
         np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run(
             [merged_boxes, merged_classes, merged_box_indices])
         self.assertAllEqual(np_merged_boxes.shape, [0, 4])
         self.assertAllEqual(np_merged_classes.shape, [0, 5])
         self.assertAllEqual(np_merged_box_indices.shape, [0])
Exemple #5
0
 def testMergeBoxesWithEmptyInputs(self):
   boxes = tf.constant([[]])
   class_indices = tf.constant([])
   num_classes = 5
   merged_boxes, merged_classes, merged_box_indices = (
       ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes))
   with self.test_session() as sess:
     np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run(
         [merged_boxes, merged_classes, merged_box_indices])
     self.assertAllEqual(np_merged_boxes.shape, [0, 4])
     self.assertAllEqual(np_merged_classes.shape, [0, 5])
     self.assertAllEqual(np_merged_box_indices.shape, [0])
Exemple #6
0
    def extract_images_and_targets(read_data):
        """Extract images and targets from the input dict."""
        suffix = 0

        images = []
        keys = []
        locations = []
        classes = []
        masks = []
        keypoints = []

        while fields.InputDataFields.image + str(suffix) in read_data:
            image = read_data[fields.InputDataFields.image + str(suffix)]
            key = ''
            if fields.InputDataFields.source_id in read_data:
                key = read_data[fields.InputDataFields.source_id + str(suffix)]
            location_gt = (read_data[fields.InputDataFields.groundtruth_boxes +
                                     str(suffix)])
            classes_gt = tf.cast(
                read_data[fields.InputDataFields.groundtruth_classes +
                          str(suffix)], tf.int32)
            classes_gt -= label_id_offset
            masks_gt = read_data.get(
                fields.InputDataFields.groundtruth_instance_masks +
                str(suffix))
            keypoints_gt = read_data.get(
                fields.InputDataFields.groundtruth_keypoints + str(suffix))

            if merge_multiple_label_boxes:
                location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
                    location_gt, classes_gt, num_classes)
            else:
                classes_gt = util_ops.padded_one_hot_encoding(
                    indices=classes_gt, depth=num_classes, left_pad=0)

            # Batch read input data and groundtruth. Images and locations, classes by
            # default should have the same number of items.
            images.append(image)
            keys.append(key)
            locations.append(location_gt)
            classes.append(classes_gt)
            masks.append(masks_gt)
            keypoints.append(keypoints_gt)

            suffix += 1

        return (images, keys, locations, classes, masks, keypoints)
Exemple #7
0
  def extract_images_and_targets(read_data):
    """Extract images and targets from the input dict."""
    suffix = 0

    images = []
    keys = []
    locations = []
    classes = []
    masks = []
    keypoints = []

    while fields.InputDataFields.image + str(suffix) in read_data:
      image = read_data[fields.InputDataFields.image + str(suffix)]
      key = ''
      if fields.InputDataFields.source_id in read_data:
        key = read_data[fields.InputDataFields.source_id + str(suffix)]
      location_gt = (
          read_data[fields.InputDataFields.groundtruth_boxes + str(suffix)])
      classes_gt = tf.cast(
          read_data[fields.InputDataFields.groundtruth_classes + str(suffix)],
          tf.int32)
      classes_gt -= label_id_offset
      masks_gt = read_data.get(
          fields.InputDataFields.groundtruth_instance_masks + str(suffix))
      keypoints_gt = read_data.get(
          fields.InputDataFields.groundtruth_keypoints + str(suffix))

      if merge_multiple_label_boxes:
        location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
            location_gt, classes_gt, num_classes)
      else:
        classes_gt = util_ops.padded_one_hot_encoding(
            indices=classes_gt, depth=num_classes, left_pad=0)

      # Batch read input data and groundtruth. Images and locations, classes by
      # default should have the same number of items.
      images.append(image)
      keys.append(key)
      locations.append(location_gt)
      classes.append(classes_gt)
      masks.append(masks_gt)
      keypoints.append(keypoints_gt)

      suffix += 1

    return (images, keys, locations, classes, masks, keypoints)
Exemple #8
0
  def extract_images_and_targets(read_data):
    """Extract images and targets from the input dict."""
    image = read_data[fields.InputDataFields.image]
    key = ''
    if fields.InputDataFields.source_id in read_data:
      key = read_data[fields.InputDataFields.source_id]
    location_gt = read_data[fields.InputDataFields.groundtruth_boxes]
    classes_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_classes],
                         tf.int32)
    classes_gt -= label_id_offset
    classes_in_image_level_gt = tf.cast(read_data[fields.InputDataFields.groundtruth_image_classes],
                         tf.int32)

    # image-level class does not have background class
    # thus, id starts from 1
    classes_in_image_level_gt -= label_id_offset

    # audio
    audio = read_data[fields.InputDataFields.audio]

    if merge_multiple_label_boxes:
      location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
          location_gt, classes_gt, num_classes)
    else:
      classes_gt = util_ops.padded_one_hot_encoding(
          indices=classes_gt, depth=num_classes, left_pad=0)

      """
      classes_in_image_level_gt = tf.Print(classes_in_image_level_gt,
                                           [classes_in_image_level_gt],
                                           "classes_in_image_level_gt: ")
      """

      # multi-label classfication, so we need k-hot encoding
      classes_in_image_level_gt = util_ops.padded_one_hot_encoding(
          indices=classes_in_image_level_gt, depth=num_classes_in_image_level, left_pad=0)
      classes_in_image_level_gt = tf.reduce_sum(classes_in_image_level_gt, 0)

    masks_gt = read_data.get(fields.InputDataFields.groundtruth_instance_masks)
    keypoints_gt = read_data.get(fields.InputDataFields.groundtruth_keypoints)
    if (merge_multiple_label_boxes and (
        masks_gt is not None or keypoints_gt is not None)):
      raise NotImplementedError('Multi-label support is only for boxes.')
    #return image, key, location_gt, classes_gt, masks_gt, keypoints_gt

    return image, audio, key, location_gt, classes_gt, classes_in_image_level_gt, masks_gt, keypoints_gt
Exemple #9
0
    def extract_images_and_targets(read_data):
        """Extract images and targets from the input dict."""
        image = read_data[fields.InputDataFields.image]
        key = ''
        if fields.InputDataFields.source_id in read_data:
            key = read_data[fields.InputDataFields.source_id]
        location_gt = read_data[fields.InputDataFields.groundtruth_boxes]
        if cfg.HAS_RPN:
            rpn_boxes = read_data['rpn_boxes']
        else:
            rpn_boxes = location_gt
        rpn_class = tf.ones_like(rpn_boxes, dtype=tf.int32)
        rpn_class = rpn_class[:, :2]
        classes_gt = tf.cast(
            read_data[fields.InputDataFields.groundtruth_classes], tf.int32)
        classes_gt -= label_id_offset

        if merge_multiple_label_boxes and use_multiclass_scores:
            raise ValueError(
                'Using both merge_multiple_label_boxes and use_multiclass_scores is'
                'not supported')

        if merge_multiple_label_boxes:
            location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
                location_gt, classes_gt, num_classes)
            classes_gt = tf.cast(classes_gt, tf.float32)
        elif use_multiclass_scores:
            classes_gt = tf.cast(
                read_data[fields.InputDataFields.multiclass_scores],
                tf.float32)
        else:
            classes_gt = util_ops.padded_one_hot_encoding(indices=classes_gt,
                                                          depth=num_classes,
                                                          left_pad=0)
        masks_gt = read_data.get(
            fields.InputDataFields.groundtruth_instance_masks)
        keypoints_gt = read_data.get(
            fields.InputDataFields.groundtruth_keypoints)
        if (merge_multiple_label_boxes
                and (masks_gt is not None or keypoints_gt is not None)):
            raise NotImplementedError('Multi-label support is only for boxes.')
        weights_gt = read_data.get(fields.InputDataFields.groundtruth_weights)

        return (image, key, location_gt, classes_gt, masks_gt, keypoints_gt,
                weights_gt, rpn_boxes, rpn_class)
    def extract_images_and_targets(read_data):
        """Extract images and targets from the input dict."""
        image = read_data[fields.InputDataFields.image]
        #     image = tf.Print(image,[image])
        #     image = tf.Print(image,["<<", read_data[fields.InputDataFields.filename], read_data[fields.InputDataFields.groundtruth_boxes],read_data[fields.InputDataFields.groundtruth_area],tf.shape(read_data[fields.InputDataFields.groundtruth_area])])
        #     image = tf.Print(image,["<<", read_data[fields.InputDataFields.filename]])
        area = read_data[fields.InputDataFields.groundtruth_area]
        key = ''
        if fields.InputDataFields.source_id in read_data:
            key = read_data[fields.InputDataFields.source_id]
        location_gt = read_data[fields.InputDataFields.groundtruth_boxes]
        classes_gt = tf.cast(
            read_data[fields.InputDataFields.groundtruth_classes], tf.int32)
        classes_gt -= label_id_offset

        if merge_multiple_label_boxes and use_multiclass_scores:
            raise ValueError(
                'Using both merge_multiple_label_boxes and use_multiclass_scores is'
                'not supported')

        if merge_multiple_label_boxes:
            location_gt, classes_gt, _ = util_ops.merge_boxes_with_multiple_labels(
                location_gt, classes_gt, num_classes)
        elif use_multiclass_scores:
            classes_gt = tf.cast(
                read_data[fields.InputDataFields.multiclass_scores],
                tf.float32)
        else:
            classes_gt = util_ops.padded_one_hot_encoding(indices=classes_gt,
                                                          depth=num_classes,
                                                          left_pad=0)
        masks_gt = read_data.get(
            fields.InputDataFields.groundtruth_instance_masks)
        keypoints_gt = read_data.get(
            fields.InputDataFields.groundtruth_keypoints)
        if (merge_multiple_label_boxes
                and (masks_gt is not None or keypoints_gt is not None)):
            raise NotImplementedError('Multi-label support is only for boxes.')
        weights_gt = read_data.get(fields.InputDataFields.groundtruth_weights)
        print(read_data)
        print(masks_gt, '000<<<<')
        return (image, area, key, location_gt, classes_gt, masks_gt,
                keypoints_gt, weights_gt)
Exemple #11
0
 def testMergeBoxesWithMultipleLabels(self):
     boxes = tf.constant(
         [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
          [0.25, 0.25, 0.75, 0.75]],
         dtype=tf.float32)
     class_indices = tf.constant([0, 4, 2], dtype=tf.int32)
     num_classes = 5
     merged_boxes, merged_classes, merged_box_indices = (
         ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes))
     expected_merged_boxes = np.array(
         [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=np.float32)
     expected_merged_classes = np.array(
         [[1, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=np.int32)
     expected_merged_box_indices = np.array([0, 1], dtype=np.int32)
     with self.test_session() as sess:
         np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run(
             [merged_boxes, merged_classes, merged_box_indices])
         if np_merged_classes[0, 0] != 1:
             expected_merged_boxes = expected_merged_boxes[::-1, :]
             expected_merged_classes = expected_merged_classes[::-1, :]
             expected_merged_box_indices = expected_merged_box_indices[::-1, :]
         self.assertAllClose(np_merged_boxes, expected_merged_boxes)
         self.assertAllClose(np_merged_classes, expected_merged_classes)
         self.assertAllClose(np_merged_box_indices, expected_merged_box_indices)
Exemple #12
0
 def testMergeBoxesWithMultipleLabels(self):
   boxes = tf.constant(
       [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75],
        [0.25, 0.25, 0.75, 0.75]],
       dtype=tf.float32)
   class_indices = tf.constant([0, 4, 2], dtype=tf.int32)
   num_classes = 5
   merged_boxes, merged_classes, merged_box_indices = (
       ops.merge_boxes_with_multiple_labels(boxes, class_indices, num_classes))
   expected_merged_boxes = np.array(
       [[0.25, 0.25, 0.75, 0.75], [0.0, 0.0, 0.5, 0.75]], dtype=np.float32)
   expected_merged_classes = np.array(
       [[1, 0, 1, 0, 0], [0, 0, 0, 0, 1]], dtype=np.int32)
   expected_merged_box_indices = np.array([0, 1], dtype=np.int32)
   with self.test_session() as sess:
     np_merged_boxes, np_merged_classes, np_merged_box_indices = sess.run(
         [merged_boxes, merged_classes, merged_box_indices])
     if np_merged_classes[0, 0] != 1:
       expected_merged_boxes = expected_merged_boxes[::-1, :]
       expected_merged_classes = expected_merged_classes[::-1, :]
       expected_merged_box_indices = expected_merged_box_indices[::-1, :]
     self.assertAllClose(np_merged_boxes, expected_merged_boxes)
     self.assertAllClose(np_merged_classes, expected_merged_classes)
     self.assertAllClose(np_merged_box_indices, expected_merged_box_indices)
Exemple #13
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False):
  """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. If key fields.InputDataFields.image_additional_channels is present in
     tensor_dict, the additional channels will be merged into
     fields.InputDataFields.image.
  2. data_augmentation_fn (optional): applied on tensor_dict.
  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
  4. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
  5. one_hot_encoding: applied to classes tensor in tensor_dict.
  6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      `masks. This function must take a 3-D float tensor of an image and a 3-D
      tensor of instance masks and return a resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      data.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
    tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
        tensor_dict)
  if fields.InputDataFields.image_additional_channels in tensor_dict:
    channels = tensor_dict[fields.InputDataFields.image_additional_channels]
    tensor_dict[fields.InputDataFields.image] = tf.concat(
        [tensor_dict[fields.InputDataFields.image], channels], axis=2)

  if retain_original_image:
    tensor_dict[fields.InputDataFields.original_image] = tf.cast(
        tensor_dict[fields.InputDataFields.image], tf.uint8)

  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
    tensor_dict = data_augmentation_fn(tensor_dict)

  # Apply model preprocessing ops and resize instance masks.
  image = tensor_dict[fields.InputDataFields.image]
  preprocessed_resized_image, true_image_shape = model_preprocess_fn(
      tf.expand_dims(tf.to_float(image), axis=0))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
  tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
      true_image_shape, axis=0)
  if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
    masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
    _, resized_masks, _ = image_resizer_fn(image, masks)
    tensor_dict[fields.InputDataFields.
                groundtruth_instance_masks] = resized_masks

  # Transform groundtruth classes to one hot encodings.
  label_offset = 1
  zero_indexed_groundtruth_classes = tensor_dict[
      fields.InputDataFields.groundtruth_classes] - label_offset
  tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
      zero_indexed_groundtruth_classes, num_classes)

  if merge_multiple_boxes:
    merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels(
        tensor_dict[fields.InputDataFields.groundtruth_boxes],
        zero_indexed_groundtruth_classes, num_classes)
    merged_classes = tf.cast(merged_classes, tf.float32)
    tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
    tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes

  return tensor_dict
Exemple #14
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False):
  """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. data_augmentation_fn (optional): applied on tensor_dict.
  2. model_preprocess_fn: applied only on image tensor in tensor_dict.
  3. image_resizer_fn: applied only on instance mask tensor in tensor_dict.
  4. one_hot_encoding: applied to classes tensor in tensor_dict.
  5. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      masks. This function must take a 4-D float tensor of image and a 4-D
      tensor of instances masks and return resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
  if retain_original_image:
    tensor_dict[fields.InputDataFields.
                original_image] = tensor_dict[fields.InputDataFields.image]

  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
    tensor_dict = data_augmentation_fn(tensor_dict)

  # Apply model preprocessing ops and resize instance masks.
  image = tf.expand_dims(
      tf.to_float(tensor_dict[fields.InputDataFields.image]), axis=0)
  preprocessed_resized_image, true_image_shape = model_preprocess_fn(image)
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
  tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
      true_image_shape, axis=0)
  if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
    masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
    _, resized_masks, _ = image_resizer_fn(image, masks)
    tensor_dict[fields.InputDataFields.
                groundtruth_instance_masks] = resized_masks

  # Transform groundtruth classes to one hot encodings.
  label_offset = 1
  zero_indexed_groundtruth_classes = tensor_dict[
      fields.InputDataFields.groundtruth_classes] - label_offset
  tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
      zero_indexed_groundtruth_classes, num_classes)

  if merge_multiple_boxes:
    merged_boxes, merged_classes, _ = util_ops.merge_boxes_with_multiple_labels(
        tensor_dict[fields.InputDataFields.groundtruth_boxes],
        zero_indexed_groundtruth_classes, num_classes)
    tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
    tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes

  return tensor_dict
Exemple #15
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False,
                         use_multiclass_scores=False):
  """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. If key fields.InputDataFields.image_additional_channels is present in
     tensor_dict, the additional channels will be merged into
     fields.InputDataFields.image.
  2. data_augmentation_fn (optional): applied on tensor_dict.
  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
  4. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
  5. one_hot_encoding: applied to classes tensor in tensor_dict.
  6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      `masks. This function must take a 3-D float tensor of an image and a 3-D
      tensor of instance masks and return a resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.
    use_multiclass_scores: whether to use multiclass scores as
      class targets instead of one-hot encoding of `groundtruth_classes`.
    use_bfloat16: (optional) a bool, whether to use bfloat16 in training.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
  # Reshape flattened multiclass scores tensor into a 2D tensor of shape
  # [num_boxes, num_classes].
  if fields.InputDataFields.multiclass_scores in tensor_dict:
    tensor_dict[fields.InputDataFields.multiclass_scores] = tf.reshape(
        tensor_dict[fields.InputDataFields.multiclass_scores], [
            tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0],
            num_classes
        ])
  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
    tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
        tensor_dict)
    tensor_dict = util_ops.filter_unrecognized_classes(tensor_dict)

  if retain_original_image:
    tensor_dict[fields.InputDataFields.original_image] = tf.cast(
        image_resizer_fn(tensor_dict[fields.InputDataFields.image])[0],
        tf.uint8)

  if fields.InputDataFields.image_additional_channels in tensor_dict:
    channels = tensor_dict[fields.InputDataFields.image_additional_channels]
    tensor_dict[fields.InputDataFields.image] = tf.concat(
        [tensor_dict[fields.InputDataFields.image], channels], axis=2)


  # # Create gt_boxes_masks
  # height, width, _ = tf.unstack(tf.shape(tensor_dict[fields.InputDataFields.image]))
  # # image_template = tf.squeeze(tensor_dict[fields.InputDataFields.groundtruth_bel_O], axis=2)
  # # image_template = tensor_dict[fields.InputDataFields.groundtruth_bel_O]
  # label_boxes_list = tensor_dict[fields.InputDataFields.groundtruth_boxes]
  # print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
  # # print(image_template)
  # print(label_boxes_list)
  # boxes_mask = boxes2mask(label_boxes_list)
  # tensor_dict[fields.InputDataFields.groundtruth_boxes_mask] = tf.stop_gradient(boxes_mask)

  # # Create detection masks
  # det_mask = tf.squeeze(tensor_dict[fields.InputDataFields.groundtruth_bel_O], axis=2)
  # zeros = tf.zeros_like(det_mask)
  # ones = tf.ones_like(det_mask)
  # tensor_dict[fields.InputDataFields.groundtruth_boxes_mask] = tf.stop_gradient(tf.where(det_mask > 0, ones, zeros))




  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
    tensor_dict = data_augmentation_fn(tensor_dict) # todo first without data augm


  # Apply model preprocessing ops and resize instance masks.
  image = tensor_dict[fields.InputDataFields.image]
  preprocessed_resized_image, true_image_shape = model_preprocess_fn(
      tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0))
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
  tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
      true_image_shape, axis=0)

  groundtruth_bel_F = tensor_dict[fields.InputDataFields.groundtruth_bel_F]
  groundtruth_bel_O = tensor_dict[fields.InputDataFields.groundtruth_bel_O]
  groundtruth_z_max_detections = tensor_dict[fields.InputDataFields.groundtruth_z_max_detections]
  groundtruth_z_min_observations = tensor_dict[fields.InputDataFields.groundtruth_z_min_observations]
  groundtruth_bel_U = tensor_dict[fields.InputDataFields.groundtruth_bel_U]
  groundtruth_z_min_detections = tensor_dict[fields.InputDataFields.groundtruth_z_min_detections]
  groundtruth_detections_drivingCorridor = tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor]
  groundtruth_intensity = tensor_dict[fields.InputDataFields.groundtruth_intensity]

  groundtruth_bel_F = tf.expand_dims(tf.squeeze(groundtruth_bel_F, axis=2), axis=0)
  _, resized_groundtruth_bel_F, _ = image_resizer_fn(image, groundtruth_bel_F)
  # resized_groundtruth_bel_F = image_resizer_fn(groundtruth_bel_F)

  groundtruth_bel_O = tf.expand_dims(tf.squeeze(groundtruth_bel_O, axis=2), axis=0)
  _, resized_groundtruth_bel_O, _ = image_resizer_fn(image, groundtruth_bel_O)
  # resized_groundtruth_bel_O = image_resizer_fn(groundtruth_bel_O)

  groundtruth_z_max_detections = tf.expand_dims(tf.squeeze(groundtruth_z_max_detections, axis=2), axis=0)
  _, resized_groundtruth_z_max_detections, _ = image_resizer_fn(image, groundtruth_z_max_detections)

  groundtruth_z_min_observations = tf.expand_dims(tf.squeeze(groundtruth_z_min_observations, axis=2), axis=0)
  _, resized_groundtruth_z_min_observations, _ = image_resizer_fn(image, groundtruth_z_min_observations)

  groundtruth_bel_U = tf.expand_dims(tf.squeeze(groundtruth_bel_U, axis=2), axis=0)
  _, resized_groundtruth_bel_U, _ = image_resizer_fn(image, groundtruth_bel_U)


  groundtruth_z_min_detections = tf.expand_dims(tf.squeeze(groundtruth_z_min_detections, axis=2), axis=0)
  _, resized_groundtruth_z_min_detections, _ = image_resizer_fn(image, groundtruth_z_min_detections)

  groundtruth_detections_drivingCorridor = tf.expand_dims(tf.squeeze(groundtruth_detections_drivingCorridor, axis=2), axis=0)
  _, resized_groundtruth_detections_drivingCorridor, _ = image_resizer_fn(image, groundtruth_detections_drivingCorridor)

  groundtruth_intensity = tf.expand_dims(tf.squeeze(groundtruth_intensity, axis=2), axis=0)
  _, resized_groundtruth_intensity, _ = image_resizer_fn(image, groundtruth_intensity)


  tensor_dict[fields.InputDataFields.groundtruth_bel_F] = tf.expand_dims(tf.squeeze(
      resized_groundtruth_bel_F, axis=0), axis=2)

  tensor_dict[fields.InputDataFields.groundtruth_bel_O] = tf.expand_dims(tf.squeeze(
      resized_groundtruth_bel_O, axis=0), axis=2)

  tensor_dict[fields.InputDataFields.groundtruth_z_min_observations] = tf.expand_dims(tf.squeeze(
      resized_groundtruth_z_min_observations, axis=0), axis=2)

  tensor_dict[fields.InputDataFields.groundtruth_z_max_detections] = tf.expand_dims(tf.squeeze(
      resized_groundtruth_z_max_detections, axis=0), axis=2)

  tensor_dict[fields.InputDataFields.groundtruth_bel_U] = tf.expand_dims(tf.squeeze(
      resized_groundtruth_bel_U, axis=0), axis=2)

  tensor_dict[fields.InputDataFields.groundtruth_detections_drivingCorridor] = tf.expand_dims(tf.squeeze(
      resized_groundtruth_detections_drivingCorridor, axis=0), axis=2)

  tensor_dict[fields.InputDataFields.groundtruth_z_min_detections] = tf.expand_dims(tf.squeeze(
      resized_groundtruth_z_min_detections, axis=0), axis=2)

  tensor_dict[fields.InputDataFields.groundtruth_intensity] = tf.expand_dims(tf.squeeze(
      resized_groundtruth_intensity, axis=0), axis=2)

  # Transform groundtruth classes to one hot encodings.
  label_offset = 1
  zero_indexed_groundtruth_classes = tensor_dict[
      fields.InputDataFields.groundtruth_classes] - label_offset
  tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
      zero_indexed_groundtruth_classes, num_classes)

  if use_multiclass_scores:
    tensor_dict[fields.InputDataFields.groundtruth_classes] = tensor_dict[
        fields.InputDataFields.multiclass_scores]
  tensor_dict.pop(fields.InputDataFields.multiclass_scores, None)

  if fields.InputDataFields.groundtruth_confidences in tensor_dict:
    groundtruth_confidences = tensor_dict[
        fields.InputDataFields.groundtruth_confidences]
    # Map the confidences to the one-hot encoding of classes
    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        tf.reshape(groundtruth_confidences, [-1, 1]) *
        tensor_dict[fields.InputDataFields.groundtruth_classes])
  else:
    groundtruth_confidences = tf.ones_like(
        zero_indexed_groundtruth_classes, dtype=tf.float32)
    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        tensor_dict[fields.InputDataFields.groundtruth_classes])

  if merge_multiple_boxes:
    merged_boxes, merged_classes, merged_confidences, _ = (
        util_ops.merge_boxes_with_multiple_labels(
            tensor_dict[fields.InputDataFields.groundtruth_boxes],
            zero_indexed_groundtruth_classes,
            groundtruth_confidences,
            num_classes))
    merged_classes = tf.cast(merged_classes, tf.float32)
    tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
    tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        merged_confidences)
  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
    tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
        tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

  # if fields.InputDataFields.groundtruth_bel_F in tensor_dict:
  #   channels = tensor_dict[fields.InputDataFields.groundtruth_bel_F]
  #   tensor_dict[fields.InputDataFields.groundtruth_bel_F] = tf.concat(
  #       [tensor_dict[fields.InputDataFields.groundtruth_bel_F], channels], axis=2)
  #   """ValueError: Can't concatenate scalars (use tf.stack instead) for 'concat_10' (op: 'ConcatV2') with input shapes: [], [], []."""
  # if fields.InputDataFields.groundtruth_bel_O in tensor_dict:
  #   channels = tensor_dict[fields.InputDataFields.groundtruth_bel_O]
  #   tensor_dict[fields.InputDataFields.groundtruth_bel_O] = tf.concat(
  #       [tensor_dict[fields.InputDataFields.groundtruth_bel_O], channels], axis=2)
  #   """ValueError: Can't concatenate scalars (use tf.stack instead) for 'concat_10' (op: 'ConcatV2') with input shapes: [], [], []."""





  return tensor_dict
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False,
                         use_multiclass_scores=False,
                         use_bfloat16=False,
                         retain_original_image_additional_channels=False):
  """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. If key fields.InputDataFields.image_additional_channels is present in
     tensor_dict, the additional channels will be merged into
     fields.InputDataFields.image.
  2. data_augmentation_fn (optional): applied on tensor_dict.
  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
  4. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
  5. one_hot_encoding: applied to classes tensor in tensor_dict.
  6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      `masks. This function must take a 3-D float tensor of an image and a 3-D
      tensor of instance masks and return a resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.
    use_multiclass_scores: whether to use multiclass scores as class targets
      instead of one-hot encoding of `groundtruth_classes`. When
      this is True and multiclass_scores is empty, one-hot encoding of
      `groundtruth_classes` is used as a fallback.
    use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
    retain_original_image_additional_channels: (optional) Whether to retain
      original image additional channels in the output dictionary.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
  out_tensor_dict = tensor_dict.copy()
  if fields.InputDataFields.multiclass_scores in out_tensor_dict:
    out_tensor_dict[
        fields.InputDataFields
        .multiclass_scores] = _multiclass_scores_or_one_hot_labels(
            out_tensor_dict[fields.InputDataFields.multiclass_scores],
            out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
            out_tensor_dict[fields.InputDataFields.groundtruth_classes],
            num_classes)

  if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
    out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
        out_tensor_dict)
    out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict)

  if retain_original_image:
    out_tensor_dict[fields.InputDataFields.original_image] = tf.cast(
        image_resizer_fn(out_tensor_dict[fields.InputDataFields.image],
                         None)[0], tf.uint8)

  if fields.InputDataFields.image_additional_channels in out_tensor_dict:
    channels = out_tensor_dict[fields.InputDataFields.image_additional_channels]
    out_tensor_dict[fields.InputDataFields.image] = tf.concat(
        [out_tensor_dict[fields.InputDataFields.image], channels], axis=2)
    if retain_original_image_additional_channels:
      out_tensor_dict[
          fields.InputDataFields.image_additional_channels] = tf.cast(
              image_resizer_fn(channels, None)[0], tf.uint8)

  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
    out_tensor_dict = data_augmentation_fn(out_tensor_dict)

  # Apply model preprocessing ops and resize instance masks.
  image = out_tensor_dict[fields.InputDataFields.image]
  preprocessed_resized_image, true_image_shape = model_preprocess_fn(
      tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0))
  if use_bfloat16:
    preprocessed_resized_image = tf.cast(
        preprocessed_resized_image, tf.bfloat16)
  out_tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
  out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
      true_image_shape, axis=0)
  if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict:
    masks = out_tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
    _, resized_masks, _ = image_resizer_fn(image, masks)
    if use_bfloat16:
      resized_masks = tf.cast(resized_masks, tf.bfloat16)
    out_tensor_dict[
        fields.InputDataFields.groundtruth_instance_masks] = resized_masks

  label_offset = 1
  zero_indexed_groundtruth_classes = out_tensor_dict[
      fields.InputDataFields.groundtruth_classes] - label_offset
  if use_multiclass_scores:
    out_tensor_dict[
        fields.InputDataFields.groundtruth_classes] = out_tensor_dict[
            fields.InputDataFields.multiclass_scores]
  else:
    out_tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
        zero_indexed_groundtruth_classes, num_classes)
  out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None)

  if fields.InputDataFields.groundtruth_confidences in out_tensor_dict:
    groundtruth_confidences = out_tensor_dict[
        fields.InputDataFields.groundtruth_confidences]
    # Map the confidences to the one-hot encoding of classes
    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        tf.reshape(groundtruth_confidences, [-1, 1]) *
        out_tensor_dict[fields.InputDataFields.groundtruth_classes])
  else:
    groundtruth_confidences = tf.ones_like(
        zero_indexed_groundtruth_classes, dtype=tf.float32)
    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        out_tensor_dict[fields.InputDataFields.groundtruth_classes])

  if merge_multiple_boxes:
    merged_boxes, merged_classes, merged_confidences, _ = (
        util_ops.merge_boxes_with_multiple_labels(
            out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
            zero_indexed_groundtruth_classes,
            groundtruth_confidences,
            num_classes))
    merged_classes = tf.cast(merged_classes, tf.float32)
    out_tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
    out_tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
    out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        merged_confidences)
  if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
    out_tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
        out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

  return out_tensor_dict
Exemple #17
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False,
                         use_multiclass_scores=False,
                         use_bfloat16=False,
                         retain_original_image_additional_channels=False,
                         keypoint_type_weight=None):
    """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. If key fields.InputDataFields.image_additional_channels is present in
     tensor_dict, the additional channels will be merged into
     fields.InputDataFields.image.
  2. data_augmentation_fn (optional): applied on tensor_dict.
  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
  4. keypoint_type_weight (optional): If groundtruth keypoints are in
     the tensor dictionary, per-keypoint weights are produced. These weights are
     initialized by `keypoint_type_weight` (or ones if left None).
     Then, for all keypoints that are not visible, the weights are set to 0 (to
     avoid penalizing the model in a loss function).
  5. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
  6. one_hot_encoding: applied to classes tensor in tensor_dict.
  7. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      `masks. This function must take a 3-D float tensor of an image and a 3-D
      tensor of instance masks and return a resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.
    use_multiclass_scores: whether to use multiclass scores as class targets
      instead of one-hot encoding of `groundtruth_classes`. When
      this is True and multiclass_scores is empty, one-hot encoding of
      `groundtruth_classes` is used as a fallback.
    use_bfloat16: (optional) a bool, whether to use bfloat16 in training.
    retain_original_image_additional_channels: (optional) Whether to retain
      original image additional channels in the output dictionary.
    keypoint_type_weight: A list (of length num_keypoints) containing
      groundtruth loss weights to use for each keypoint. If None, will use a
      weight of 1.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
    out_tensor_dict = tensor_dict.copy()
    if fields.InputDataFields.multiclass_scores in out_tensor_dict:
        out_tensor_dict[
            fields.InputDataFields.
            multiclass_scores] = _multiclass_scores_or_one_hot_labels(
                out_tensor_dict[fields.InputDataFields.multiclass_scores],
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
                out_tensor_dict[fields.InputDataFields.groundtruth_classes],
                num_classes)

    if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
        out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
            out_tensor_dict)
        out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict)

    if retain_original_image:
        out_tensor_dict[fields.InputDataFields.original_image] = tf.cast(
            image_resizer_fn(out_tensor_dict[fields.InputDataFields.image],
                             None)[0], tf.uint8)

    if fields.InputDataFields.image_additional_channels in out_tensor_dict:
        channels = out_tensor_dict[
            fields.InputDataFields.image_additional_channels]
        out_tensor_dict[fields.InputDataFields.image] = tf.concat(
            [out_tensor_dict[fields.InputDataFields.image], channels], axis=2)
        if retain_original_image_additional_channels:
            out_tensor_dict[
                fields.InputDataFields.image_additional_channels] = tf.cast(
                    image_resizer_fn(channels, None)[0], tf.uint8)

    # Apply data augmentation ops.
    if data_augmentation_fn is not None:
        out_tensor_dict = data_augmentation_fn(out_tensor_dict)

    # Apply model preprocessing ops and resize instance masks.
    image = out_tensor_dict[fields.InputDataFields.image]
    preprocessed_resized_image, true_image_shape = model_preprocess_fn(
        tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0))

    preprocessed_shape = tf.shape(preprocessed_resized_image)
    new_height, new_width = preprocessed_shape[1], preprocessed_shape[2]

    im_box = tf.stack([
        0.0, 0.0,
        tf.to_float(new_height) / tf.to_float(true_image_shape[0, 0]),
        tf.to_float(new_width) / tf.to_float(true_image_shape[0, 1])
    ])

    if fields.InputDataFields.groundtruth_boxes in tensor_dict:
        bboxes = out_tensor_dict[fields.InputDataFields.groundtruth_boxes]
        boxlist = box_list.BoxList(bboxes)
        realigned_bboxes = box_list_ops.change_coordinate_frame(
            boxlist, im_box)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_boxes] = realigned_bboxes.get()

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
        keypoints = out_tensor_dict[
            fields.InputDataFields.groundtruth_keypoints]
        realigned_keypoints = keypoint_ops.change_coordinate_frame(
            keypoints, im_box)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_keypoints] = realigned_keypoints
        flds_gt_kpt = fields.InputDataFields.groundtruth_keypoints
        flds_gt_kpt_vis = fields.InputDataFields.groundtruth_keypoint_visibilities
        flds_gt_kpt_weights = fields.InputDataFields.groundtruth_keypoint_weights
        if flds_gt_kpt_vis not in out_tensor_dict:
            out_tensor_dict[flds_gt_kpt_vis] = tf.ones_like(
                out_tensor_dict[flds_gt_kpt][:, :, 0], dtype=tf.bool)
        out_tensor_dict[flds_gt_kpt_weights] = (
            keypoint_ops.keypoint_weights_from_visibilities(
                out_tensor_dict[flds_gt_kpt_vis], keypoint_type_weight))

    if use_bfloat16:
        preprocessed_resized_image = tf.cast(preprocessed_resized_image,
                                             tf.bfloat16)
    out_tensor_dict[fields.InputDataFields.image] = tf.squeeze(
        preprocessed_resized_image, axis=0)
    out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
        true_image_shape, axis=0)
    if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict:
        masks = out_tensor_dict[
            fields.InputDataFields.groundtruth_instance_masks]
        _, resized_masks, _ = image_resizer_fn(image, masks)
        if use_bfloat16:
            resized_masks = tf.cast(resized_masks, tf.bfloat16)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_instance_masks] = resized_masks

    label_offset = 1
    zero_indexed_groundtruth_classes = out_tensor_dict[
        fields.InputDataFields.groundtruth_classes] - label_offset
    if use_multiclass_scores:
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = out_tensor_dict[
                fields.InputDataFields.multiclass_scores]
    else:
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = tf.one_hot(
                zero_indexed_groundtruth_classes, num_classes)
    out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None)

    if fields.InputDataFields.groundtruth_confidences in out_tensor_dict:
        groundtruth_confidences = out_tensor_dict[
            fields.InputDataFields.groundtruth_confidences]
        # Map the confidences to the one-hot encoding of classes
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            tf.reshape(groundtruth_confidences, [-1, 1]) *
            out_tensor_dict[fields.InputDataFields.groundtruth_classes])
    else:
        groundtruth_confidences = tf.ones_like(
            zero_indexed_groundtruth_classes, dtype=tf.float32)
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            out_tensor_dict[fields.InputDataFields.groundtruth_classes])

    if merge_multiple_boxes:
        merged_boxes, merged_classes, merged_confidences, _ = (
            util_ops.merge_boxes_with_multiple_labels(
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
                zero_indexed_groundtruth_classes, groundtruth_confidences,
                num_classes))
        merged_classes = tf.cast(merged_classes, tf.float32)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_boxes] = merged_boxes
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = merged_classes
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            merged_confidences)
    if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
        out_tensor_dict[
            fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

    return out_tensor_dict
Exemple #18
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False,
                         use_bfloat16=False):
  """A single function that is responsible for all input data transformations.

  Data transformation functions are applied in the following order.
  1. If key fields.InputDataFields.image_additional_channels is present in
     tensor_dict, the additional channels will be merged into
     fields.InputDataFields.image.
  2. data_augmentation_fn (optional): applied on tensor_dict.
  3. model_preprocess_fn: applied only on image tensor in tensor_dict.
  4. image_resizer_fn: applied on original image and instance mask tensor in
     tensor_dict.
  5. one_hot_encoding: applied to classes tensor in tensor_dict.
  6. merge_multiple_boxes (optional): when groundtruth boxes are exactly the
     same they can be merged into a single box with an associated k-hot class
     label.

  Args:
    tensor_dict: dictionary containing input tensors keyed by
      fields.InputDataFields.
    model_preprocess_fn: model's preprocess function to apply on image tensor.
      This function must take in a 4-D float tensor and return a 4-D preprocess
      float tensor and a tensor containing the true image shape.
    image_resizer_fn: image resizer function to apply on groundtruth instance
      `masks. This function must take a 3-D float tensor of an image and a 3-D
      tensor of instance masks and return a resized version of these along with
      the true shapes.
    num_classes: number of max classes to one-hot (or k-hot) encode the class
      labels.
    data_augmentation_fn: (optional) data augmentation function to apply on
      input `tensor_dict`.
    merge_multiple_boxes: (optional) whether to merge multiple groundtruth boxes
      and classes for a given image if the boxes are exactly the same.
    retain_original_image: (optional) whether to retain original image in the
      output dictionary.
    use_bfloat16: (optional) a bool, whether to use bfloat16 in training.

  Returns:
    A dictionary keyed by fields.InputDataFields containing the tensors obtained
    after applying all the transformations.
  """
  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
    tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
        tensor_dict)
  if fields.InputDataFields.image_additional_channels in tensor_dict:
    channels = tensor_dict[fields.InputDataFields.image_additional_channels]
    tensor_dict[fields.InputDataFields.image] = tf.concat(
        [tensor_dict[fields.InputDataFields.image], channels], axis=2)

  if retain_original_image:
    tensor_dict[fields.InputDataFields.original_image] = tf.cast(
        image_resizer_fn(tensor_dict[fields.InputDataFields.image], None)[0],
        tf.uint8)

  # Apply data augmentation ops.
  if data_augmentation_fn is not None:
    tensor_dict = data_augmentation_fn(tensor_dict)

  # Apply model preprocessing ops and resize instance masks.
  image = tensor_dict[fields.InputDataFields.image]
  preprocessed_resized_image, true_image_shape = model_preprocess_fn(
      tf.expand_dims(tf.to_float(image), axis=0))
  if use_bfloat16:
    preprocessed_resized_image = tf.cast(
        preprocessed_resized_image, tf.bfloat16)
  tensor_dict[fields.InputDataFields.image] = tf.squeeze(
      preprocessed_resized_image, axis=0)
  tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
      true_image_shape, axis=0)
  if fields.InputDataFields.groundtruth_instance_masks in tensor_dict:
    masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
    _, resized_masks, _ = image_resizer_fn(image, masks)
    if use_bfloat16:
      resized_masks = tf.cast(resized_masks, tf.bfloat16)
    tensor_dict[fields.InputDataFields.
                groundtruth_instance_masks] = resized_masks

  # Transform groundtruth classes to one hot encodings.
  label_offset = 1
  zero_indexed_groundtruth_classes = tensor_dict[
      fields.InputDataFields.groundtruth_classes] - label_offset
  tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
      zero_indexed_groundtruth_classes, num_classes)

  if fields.InputDataFields.groundtruth_confidences in tensor_dict:
    groundtruth_confidences = tensor_dict[
        fields.InputDataFields.groundtruth_confidences]
    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        tf.sparse_to_dense(
            zero_indexed_groundtruth_classes,
            [num_classes],
            groundtruth_confidences,
            validate_indices=False))
  else:
    groundtruth_confidences = tf.ones_like(
        zero_indexed_groundtruth_classes, dtype=tf.float32)
    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        tensor_dict[fields.InputDataFields.groundtruth_classes])

  if merge_multiple_boxes:
    merged_boxes, merged_classes, merged_confidences, _ = (
        util_ops.merge_boxes_with_multiple_labels(
            tensor_dict[fields.InputDataFields.groundtruth_boxes],
            zero_indexed_groundtruth_classes,
            groundtruth_confidences,
            num_classes))
    merged_classes = tf.cast(merged_classes, tf.float32)
    tensor_dict[fields.InputDataFields.groundtruth_boxes] = merged_boxes
    tensor_dict[fields.InputDataFields.groundtruth_classes] = merged_classes
    tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
        merged_confidences)
  if fields.InputDataFields.groundtruth_boxes in tensor_dict:
    tensor_dict[fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
        tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

  return tensor_dict
Exemple #19
0
def transform_input_data(tensor_dict,
                         model_preprocess_fn,
                         image_resizer_fn,
                         num_classes,
                         data_augmentation_fn=None,
                         merge_multiple_boxes=False,
                         retain_original_image=False,
                         use_multiclass_scores=False,
                         use_bfloat16=False):

    out_tensor_dict = tensor_dict.copy()
    if fields.InputDataFields.multiclass_scores in out_tensor_dict:
        out_tensor_dict[
            fields.InputDataFields.
            multiclass_scores] = _multiclass_scores_or_one_hot_labels(
                out_tensor_dict[fields.InputDataFields.multiclass_scores],
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
                out_tensor_dict[fields.InputDataFields.groundtruth_classes],
                num_classes)

    if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
        out_tensor_dict = util_ops.filter_groundtruth_with_nan_box_coordinates(
            out_tensor_dict)
        out_tensor_dict = util_ops.filter_unrecognized_classes(out_tensor_dict)

    if retain_original_image:
        out_tensor_dict[fields.InputDataFields.original_image] = tf.cast(
            image_resizer_fn(out_tensor_dict[fields.InputDataFields.image],
                             None)[0], tf.uint8)

    if fields.InputDataFields.image_additional_channels in out_tensor_dict:
        channels = out_tensor_dict[
            fields.InputDataFields.image_additional_channels]
        out_tensor_dict[fields.InputDataFields.image] = tf.concat(
            [out_tensor_dict[fields.InputDataFields.image], channels], axis=2)

    # Apply data augmentation ops.
    if data_augmentation_fn is not None:
        out_tensor_dict = data_augmentation_fn(out_tensor_dict)

    # Apply model preprocessing ops and resize instance masks.
    image = out_tensor_dict[fields.InputDataFields.image]
    preprocessed_resized_image, true_image_shape = model_preprocess_fn(
        tf.expand_dims(tf.cast(image, dtype=tf.float32), axis=0))
    if use_bfloat16:
        preprocessed_resized_image = tf.cast(preprocessed_resized_image,
                                             tf.bfloat16)
    out_tensor_dict[fields.InputDataFields.image] = tf.squeeze(
        preprocessed_resized_image, axis=0)
    out_tensor_dict[fields.InputDataFields.true_image_shape] = tf.squeeze(
        true_image_shape, axis=0)
    if fields.InputDataFields.groundtruth_instance_masks in out_tensor_dict:
        masks = out_tensor_dict[
            fields.InputDataFields.groundtruth_instance_masks]
        _, resized_masks, _ = image_resizer_fn(image, masks)
        if use_bfloat16:
            resized_masks = tf.cast(resized_masks, tf.bfloat16)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_instance_masks] = resized_masks

    label_offset = 1
    zero_indexed_groundtruth_classes = out_tensor_dict[
        fields.InputDataFields.groundtruth_classes] - label_offset
    if use_multiclass_scores:
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = out_tensor_dict[
                fields.InputDataFields.multiclass_scores]
    else:
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = tf.one_hot(
                zero_indexed_groundtruth_classes, num_classes)
    out_tensor_dict.pop(fields.InputDataFields.multiclass_scores, None)

    if fields.InputDataFields.groundtruth_confidences in out_tensor_dict:
        groundtruth_confidences = out_tensor_dict[
            fields.InputDataFields.groundtruth_confidences]
        # Map the confidences to the one-hot encoding of classes
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            tf.reshape(groundtruth_confidences, [-1, 1]) *
            out_tensor_dict[fields.InputDataFields.groundtruth_classes])
    else:
        groundtruth_confidences = tf.ones_like(
            zero_indexed_groundtruth_classes, dtype=tf.float32)
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            out_tensor_dict[fields.InputDataFields.groundtruth_classes])

    if merge_multiple_boxes:
        merged_boxes, merged_classes, merged_confidences, _ = (
            util_ops.merge_boxes_with_multiple_labels(
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes],
                zero_indexed_groundtruth_classes, groundtruth_confidences,
                num_classes))
        merged_classes = tf.cast(merged_classes, tf.float32)
        out_tensor_dict[
            fields.InputDataFields.groundtruth_boxes] = merged_boxes
        out_tensor_dict[
            fields.InputDataFields.groundtruth_classes] = merged_classes
        out_tensor_dict[fields.InputDataFields.groundtruth_confidences] = (
            merged_confidences)
    if fields.InputDataFields.groundtruth_boxes in out_tensor_dict:
        out_tensor_dict[
            fields.InputDataFields.num_groundtruth_boxes] = tf.shape(
                out_tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]

    return out_tensor_dict