예제 #1
0
def _parse_single_example(example, options):
    """Parses a single tf.Example proto.

  Args:
    example: An Example proto.
    options: An instance of reader_pb2.Reader.

  Returns:
    A dictionary indexed by tensor name.
  """
    # Initialize `keys_to_features`.
    keys_to_features = {
        TFExampleFields.img_id: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.annot_id: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.answer_label: tf.io.FixedLenFeature([], tf.int64),
        TFExampleFields.img_bbox_label: tf.io.VarLenFeature(tf.string),
        TFExampleFields.img_bbox_score: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.img_bbox_feature: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.question: tf.io.VarLenFeature(tf.string),
        TFExampleFields.question_tag: tf.io.VarLenFeature(tf.int64),
    }
    for bbox_key in TFExampleFields.img_bbox_field_keys:
        bbox_field = os.path.join(TFExampleFields.img_bbox_scope, bbox_key)
        keys_to_features[bbox_field] = tf.io.VarLenFeature(tf.float32)
    for i in range(1, 1 + NUM_CHOICES):
        keys_to_features.update({
            TFExampleFields.cls_bert + '_%i' % i:
            tf.io.VarLenFeature(tf.float32),
            TFExampleFields.question_bert + '_%i' % i:
            tf.io.VarLenFeature(tf.float32),
            TFExampleFields.answer_choice + '_%i' % i:
            tf.io.VarLenFeature(tf.string),
            TFExampleFields.answer_choice_tag + '_%i' % i:
            tf.io.VarLenFeature(tf.int64),
            TFExampleFields.answer_choice_bert + '_%i' % i:
            tf.io.VarLenFeature(tf.float32)
        })

    # Initialize `items_to_handlers`.
    items_to_handlers = {
        InputFields.img_id:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_id,
                                 default_value=''),
        InputFields.annot_id:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.annot_id,
                                 default_value=''),
        InputFields.answer_label:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.answer_label,
                                 default_value=-1),
        InputFields.object_bboxes:
        tfexample_decoder.BoundingBox(keys=TFExampleFields.img_bbox_field_keys,
                                      prefix=TFExampleFields.img_bbox_scope),
        InputFields.object_labels:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_label,
                                 default_value=''),
        InputFields.object_scores:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_score,
                                 default_value=0),
        InputFields.question:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.question,
                                 default_value=PAD),
        InputFields.question_tag:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.question_tag,
                                 default_value=-1),
        TFExampleFields.img_bbox_feature:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_feature,
                                 default_value=0),
    }

    for i in range(1, 1 + NUM_CHOICES):
        tensor_key = TFExampleFields.cls_bert + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=0)
        tensor_key = TFExampleFields.question_bert + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=0)
        tensor_key = TFExampleFields.answer_choice + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=PAD)
        tensor_key = TFExampleFields.answer_choice_tag + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=-1)
        tensor_key = TFExampleFields.answer_choice_bert + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=0)
    if options.decode_jpeg:
        keys_to_features.update({
            TFExampleFields.img_encoded:
            tf.io.FixedLenFeature([], tf.string),
            TFExampleFields.img_format:
            tf.io.FixedLenFeature([], tf.string),
        })
        items_to_handlers.update({
            InputFields.img_data:
            tfexample_decoder.Image(image_key=TFExampleFields.img_encoded,
                                    format_key=TFExampleFields.img_format,
                                    shape=None)
        })

    # Decode example.
    example_decoder = tfexample_decoder.TFExampleDecoder(
        keys_to_features, items_to_handlers)

    output_keys = example_decoder.list_items()
    output_tensors = example_decoder.decode(example)
    output_tensors = [
        x if x.dtype != tf.int64 else tf.cast(x, tf.int32)
        for x in output_tensors
    ]
    decoded_example = dict(zip(output_keys, output_tensors))
    return _update_decoded_example(decoded_example, options)
  def __init__(self,
               load_instance_masks=False,
               instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
               label_map_proto_file=None,
               use_display_name=False,
               dct_method='',
               num_keypoints=0,
               num_additional_channels=0,
               load_multiclass_scores=False,
               load_context_features=False,
               expand_hierarchy_labels=False,
               load_dense_pose=False):
    """Constructor sets keys_to_features and items_to_handlers.

    Args:
      load_instance_masks: whether or not to load and handle instance masks.
      instance_mask_type: type of instance masks. Options are provided in
        input_reader.proto. This is only used if `load_instance_masks` is True.
      label_map_proto_file: a file path to a
        object_detection.protos.StringIntLabelMap proto. If provided, then the
        mapped IDs of 'image/object/class/text' will take precedence over the
        existing 'image/object/class/label' ID.  Also, if provided, it is
        assumed that 'image/object/class/text' will be in the data.
      use_display_name: whether or not to use the `display_name` for label
        mapping (instead of `name`).  Only used if label_map_proto_file is
        provided.
      dct_method: An optional string. Defaults to None. It only takes
        effect when image format is jpeg, used to specify a hint about the
        algorithm used for jpeg decompression. Currently valid values
        are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
        example, the jpeg library does not have that specific option.
      num_keypoints: the number of keypoints per object.
      num_additional_channels: how many additional channels to use.
      load_multiclass_scores: Whether to load multiclass scores associated with
        boxes.
      load_context_features: Whether to load information from context_features,
        to provide additional context to a detection model for training and/or
        inference.
      expand_hierarchy_labels: Expands the object and image labels taking into
        account the provided hierarchy in the label_map_proto_file. For positive
        classes, the labels are extended to ancestor. For negative classes,
        the labels are expanded to descendants.
      load_dense_pose: Whether to load DensePose annotations.

    Raises:
      ValueError: If `instance_mask_type` option is not one of
        input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
        input_reader_pb2.PNG_MASKS.
      ValueError: If `expand_labels_hierarchy` is True, but the
        `label_map_proto_file` is not provided.
    """
    # TODO(rathodv): delete unused `use_display_name` argument once we change
    # other decoders to handle label maps similarly.
    del use_display_name
    self.keys_to_features = {
        'image/encoded':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/format':
            tf.FixedLenFeature((), tf.string, default_value='jpeg'),
        'image/filename':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/key/sha256':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/source_id':
            tf.FixedLenFeature((), tf.string, default_value=''),
        'image/height':
            tf.FixedLenFeature((), tf.int64, default_value=1),
        'image/width':
            tf.FixedLenFeature((), tf.int64, default_value=1),
        # Image-level labels.
        'image/class/text':
            tf.VarLenFeature(tf.string),
        'image/class/label':
            tf.VarLenFeature(tf.int64),
        'image/class/confidence':
            tf.VarLenFeature(tf.float32),
        # Object boxes and classes.
        'image/object/bbox/xmin':
            tf.VarLenFeature(tf.float32),
        'image/object/bbox/xmax':
            tf.VarLenFeature(tf.float32),
        'image/object/bbox/ymin':
            tf.VarLenFeature(tf.float32),
        'image/object/bbox/ymax':
            tf.VarLenFeature(tf.float32),
        'image/object/class/label':
            tf.VarLenFeature(tf.int64),
        'image/object/class/text':
            tf.VarLenFeature(tf.string),
        'image/object/area':
            tf.VarLenFeature(tf.float32),
        'image/object/is_crowd':
            tf.VarLenFeature(tf.int64),
        'image/object/difficult':
            tf.VarLenFeature(tf.int64),
        'image/object/group_of':
            tf.VarLenFeature(tf.int64),
        'image/object/weight':
            tf.VarLenFeature(tf.float32),

    }
    # We are checking `dct_method` instead of passing it directly in order to
    # ensure TF version 1.6 compatibility.
    if dct_method:
      image = slim_example_decoder.Image(
          image_key='image/encoded',
          format_key='image/format',
          channels=3,
          dct_method=dct_method)
      additional_channel_image = slim_example_decoder.Image(
          image_key='image/additional_channels/encoded',
          format_key='image/format',
          channels=1,
          repeated=True,
          dct_method=dct_method)
    else:
      image = slim_example_decoder.Image(
          image_key='image/encoded', format_key='image/format', channels=3)
      additional_channel_image = slim_example_decoder.Image(
          image_key='image/additional_channels/encoded',
          format_key='image/format',
          channels=1,
          repeated=True)
    self.items_to_handlers = {
        fields.InputDataFields.image:
            image,
        fields.InputDataFields.source_id: (
            slim_example_decoder.Tensor('image/source_id')),
        fields.InputDataFields.key: (
            slim_example_decoder.Tensor('image/key/sha256')),
        fields.InputDataFields.filename: (
            slim_example_decoder.Tensor('image/filename')),
        # Image-level labels.
        fields.InputDataFields.groundtruth_image_confidences: (
            slim_example_decoder.Tensor('image/class/confidence')),
        # Object boxes and classes.
        fields.InputDataFields.groundtruth_boxes: (
            slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
                                             'image/object/bbox/')),
        fields.InputDataFields.groundtruth_area:
            slim_example_decoder.Tensor('image/object/area'),
        fields.InputDataFields.groundtruth_is_crowd: (
            slim_example_decoder.Tensor('image/object/is_crowd')),
        fields.InputDataFields.groundtruth_difficult: (
            slim_example_decoder.Tensor('image/object/difficult')),
        fields.InputDataFields.groundtruth_group_of: (
            slim_example_decoder.Tensor('image/object/group_of')),
        fields.InputDataFields.groundtruth_weights: (
            slim_example_decoder.Tensor('image/object/weight')),

    }
    if load_multiclass_scores:
      self.keys_to_features[
          'image/object/class/multiclass_scores'] = tf.VarLenFeature(tf.float32)
      self.items_to_handlers[fields.InputDataFields.multiclass_scores] = (
          slim_example_decoder.Tensor('image/object/class/multiclass_scores'))

    if load_context_features:
      self.keys_to_features[
          'image/context_features'] = tf.VarLenFeature(tf.float32)
      self.items_to_handlers[fields.InputDataFields.context_features] = (
          slim_example_decoder.ItemHandlerCallback(
              ['image/context_features', 'image/context_feature_length'],
              self._reshape_context_features))

      self.keys_to_features[
          'image/context_feature_length'] = tf.FixedLenFeature((), tf.int64)
      self.items_to_handlers[fields.InputDataFields.context_feature_length] = (
          slim_example_decoder.Tensor('image/context_feature_length'))

    if num_additional_channels > 0:
      self.keys_to_features[
          'image/additional_channels/encoded'] = tf.FixedLenFeature(
              (num_additional_channels,), tf.string)
      self.items_to_handlers[
          fields.InputDataFields.
          image_additional_channels] = additional_channel_image
    self._num_keypoints = num_keypoints
    if num_keypoints > 0:
      self.keys_to_features['image/object/keypoint/x'] = (
          tf.VarLenFeature(tf.float32))
      self.keys_to_features['image/object/keypoint/y'] = (
          tf.VarLenFeature(tf.float32))
      self.keys_to_features['image/object/keypoint/visibility'] = (
          tf.VarLenFeature(tf.int64))
      self.items_to_handlers[fields.InputDataFields.groundtruth_keypoints] = (
          slim_example_decoder.ItemHandlerCallback(
              ['image/object/keypoint/y', 'image/object/keypoint/x'],
              self._reshape_keypoints))
      kpt_vis_field = fields.InputDataFields.groundtruth_keypoint_visibilities
      self.items_to_handlers[kpt_vis_field] = (
          slim_example_decoder.ItemHandlerCallback(
              ['image/object/keypoint/x', 'image/object/keypoint/visibility'],
              self._reshape_keypoint_visibilities))
    if load_instance_masks:
      if instance_mask_type in (input_reader_pb2.DEFAULT,
                                input_reader_pb2.NUMERICAL_MASKS):
        self.keys_to_features['image/object/mask'] = (
            tf.VarLenFeature(tf.float32))
        self.items_to_handlers[
            fields.InputDataFields.groundtruth_instance_masks] = (
                slim_example_decoder.ItemHandlerCallback(
                    ['image/object/mask', 'image/height', 'image/width'],
                    self._reshape_instance_masks))
      elif instance_mask_type == input_reader_pb2.PNG_MASKS:
        self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string)
        self.items_to_handlers[
            fields.InputDataFields.groundtruth_instance_masks] = (
                slim_example_decoder.ItemHandlerCallback(
                    ['image/object/mask', 'image/height', 'image/width'],
                    self._decode_png_instance_masks))
      else:
        raise ValueError('Did not recognize the `instance_mask_type` option.')
    if load_dense_pose:
      self.keys_to_features['image/object/densepose/num'] = (
          tf.VarLenFeature(tf.int64))
      self.keys_to_features['image/object/densepose/part_index'] = (
          tf.VarLenFeature(tf.int64))
      self.keys_to_features['image/object/densepose/x'] = (
          tf.VarLenFeature(tf.float32))
      self.keys_to_features['image/object/densepose/y'] = (
          tf.VarLenFeature(tf.float32))
      self.keys_to_features['image/object/densepose/u'] = (
          tf.VarLenFeature(tf.float32))
      self.keys_to_features['image/object/densepose/v'] = (
          tf.VarLenFeature(tf.float32))
      self.items_to_handlers[
          fields.InputDataFields.groundtruth_dp_num_points] = (
              slim_example_decoder.Tensor('image/object/densepose/num'))
      self.items_to_handlers[fields.InputDataFields.groundtruth_dp_part_ids] = (
          slim_example_decoder.ItemHandlerCallback(
              ['image/object/densepose/part_index',
               'image/object/densepose/num'], self._dense_pose_part_indices))
      self.items_to_handlers[
          fields.InputDataFields.groundtruth_dp_surface_coords] = (
              slim_example_decoder.ItemHandlerCallback(
                  ['image/object/densepose/x', 'image/object/densepose/y',
                   'image/object/densepose/u', 'image/object/densepose/v',
                   'image/object/densepose/num'],
                  self._dense_pose_surface_coordinates))

    if label_map_proto_file:
      # If the label_map_proto is provided, try to use it in conjunction with
      # the class text, and fall back to a materialized ID.
      label_handler = slim_example_decoder.BackupHandler(
          _ClassTensorHandler(
              'image/object/class/text', label_map_proto_file,
              default_value=''),
          slim_example_decoder.Tensor('image/object/class/label'))
      image_label_handler = slim_example_decoder.BackupHandler(
          _ClassTensorHandler(
              fields.TfExampleFields.image_class_text,
              label_map_proto_file,
              default_value=''),
          slim_example_decoder.Tensor(fields.TfExampleFields.image_class_label))
    else:
      label_handler = slim_example_decoder.Tensor('image/object/class/label')
      image_label_handler = slim_example_decoder.Tensor(
          fields.TfExampleFields.image_class_label)
    self.items_to_handlers[
        fields.InputDataFields.groundtruth_classes] = label_handler
    self.items_to_handlers[
        fields.InputDataFields.groundtruth_image_classes] = image_label_handler

    self._expand_hierarchy_labels = expand_hierarchy_labels
    self._ancestors_lut = None
    self._descendants_lut = None
    if expand_hierarchy_labels:
      if label_map_proto_file:
        ancestors_lut, descendants_lut = (
            label_map_util.get_label_map_hierarchy_lut(label_map_proto_file,
                                                       True))
        self._ancestors_lut = tf.constant(ancestors_lut, dtype=tf.int64)
        self._descendants_lut = tf.constant(descendants_lut, dtype=tf.int64)
      else:
        raise ValueError('In order to expand labels, the label_map_proto_file '
                         'has to be provided.')
예제 #3
0
def _parse_single_example(example, options):
    """Parses a single tf.Example proto.

  Args:
    example: An Example proto.
    options: An instance of reader_pb2.Reader.

  Returns:
    A dictionary indexed by tensor name.
  """
    ###################################
    # Initialize `keys_to_features`.
    ###################################
    keys_to_features = {
        TFExampleFields.annot_id: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.img_id: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.img_encoded: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.img_format: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.answer_label: tf.io.FixedLenFeature([], tf.int64),
        TFExampleFields.rationale_label: tf.io.FixedLenFeature([], tf.int64),
        TFExampleFields.detection_classes: tf.io.VarLenFeature(tf.string),
        TFExampleFields.detection_scores: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.detection_boxes_ymin: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.detection_boxes_ymax: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.detection_boxes_xmin: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.detection_boxes_xmax: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.question: tf.io.VarLenFeature(tf.string),
        TFExampleFields.question_tag: tf.io.VarLenFeature(tf.int64),
    }

    # Answer and rationale choices.
    for i in range(NUM_CHOICES):
        keys_to_features.update({
            TFExampleFields.answer_choice + '_%i' % i:
            tf.io.VarLenFeature(tf.string),
            TFExampleFields.answer_choice_tag + '_%i' % i:
            tf.io.VarLenFeature(tf.int64),
            TFExampleFields.rationale_choice + '_%i' % i:
            tf.io.VarLenFeature(tf.string),
            TFExampleFields.rationale_choice_tag + '_%i' % i:
            tf.io.VarLenFeature(tf.int64),
        })

    ###################################
    # Initialize `items_to_handlers`.
    ###################################
    items_to_handlers = {
        'annot_id':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.annot_id,
                                 default_value=''),
        'img_id':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_id,
                                 default_value=''),
        'img_data':
        tfexample_decoder.Image(image_key=TFExampleFields.img_encoded,
                                format_key=TFExampleFields.img_format,
                                shape=None),
        'answer_label':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.answer_label,
                                 default_value=-1),
        'rationale_label':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.rationale_label,
                                 default_value=-1),
        'detection_boxes':
        tfexample_decoder.BoundingBox(
            keys=TFExampleFields.detection_boxes_keys,
            prefix=TFExampleFields.detection_boxes_scope),
        'detection_classes':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.detection_classes,
                                 default_value=PAD),
        'detection_scores':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.detection_scores,
                                 default_value=0),
        'question':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.question,
                                 default_value=PAD),
        'question_tag':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.question_tag,
                                 default_value=-1),
    }

    # Answer and rationale choices.
    for i in range(NUM_CHOICES):
        items_to_handlers['answer_choice_%i' % i] = tfexample_decoder.Tensor(
            tensor_key='answer_choice_%i' % i, default_value=PAD)
        items_to_handlers['answer_choice_tag_%i' %
                          i] = tfexample_decoder.Tensor(
                              tensor_key='answer_choice_tag_%i' % i,
                              default_value=-1)

        items_to_handlers['rationale_choice_%i' %
                          i] = tfexample_decoder.Tensor(
                              tensor_key='rationale_choice_%i' % i,
                              default_value=PAD)
        items_to_handlers['rationale_choice_tag_%i' %
                          i] = tfexample_decoder.Tensor(
                              tensor_key='rationale_choice_tag_%i' % i,
                              default_value=-1)

    # Decode example.
    example_decoder = tfexample_decoder.TFExampleDecoder(
        keys_to_features, items_to_handlers)

    output_keys = example_decoder.list_items()
    output_tensors = example_decoder.decode(example)
    output_tensors = [
        x if x.dtype != tf.int64 else tf.cast(x, tf.int32)
        for x in output_tensors
    ]
    decoded_example = dict(zip(output_keys, output_tensors))
    return _update_decoded_example(decoded_example, options)