コード例 #1
0
  def decode(self, tf_example_string_tensor):
    """Decodes serialized tensorflow example and returns a tensor dictionary.

    Args:
      tf_example_string_tensor: a string tensor holding a serialized tensorflow
        example proto.

    Returns:
      A dictionary of the following tensors.
      fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
        containing image.
      fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
        shape [2] containing shape of the image.
      fields.InputDataFields.source_id - string tensor containing original
        image id.
      fields.InputDataFields.key - string tensor with unique sha256 hash key.
      fields.InputDataFields.filename - string tensor with original dataset
        filename.
      fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
        [None, 4] containing box corners.
      fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
        shape [None] indicating the weights of groundtruth boxes.
      fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
        [None] containing containing object mask area in pixel squared.
      fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
        [None] indicating if the boxes enclose a crowd.

    Optional:
      fields.InputDataFields.groundtruth_image_confidences - 1D float tensor of
        shape [None] indicating if a class is present in the image (1.0) or
        a class is not present in the image (0.0).
      fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
        shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
        is width; 3rd dim is the number of additional channels.
      fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
        [None] indicating if the boxes represent `difficult` instances.
      fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
        [None] indicating if the boxes represent `group_of` instances.
      fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
        shape [None, num_keypoints, 2] containing keypoints, where the
        coordinates of the keypoints are ordered (y, x).
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
    serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
    decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
                                                    self.items_to_handlers)
    keys = decoder.list_items()
    tensors = decoder.decode(serialized_example, items=keys)
    tensor_dict = dict(zip(keys, tensors))
    is_crowd = fields.InputDataFields.groundtruth_is_crowd
    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
    tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
    tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
        tensor_dict[fields.InputDataFields.image])[:2]

    if fields.InputDataFields.image_additional_channels in tensor_dict:
      channels = tensor_dict[fields.InputDataFields.image_additional_channels]
      channels = tf.squeeze(channels, axis=3)
      channels = tf.transpose(channels, perm=[1, 2, 0])
      tensor_dict[fields.InputDataFields.image_additional_channels] = channels

    def default_groundtruth_weights():
      return tf.ones(
          [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
          dtype=tf.float32)

    tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
        tf.greater(
            tf.shape(
                tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
            0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
        default_groundtruth_weights)

    if fields.InputDataFields.groundtruth_keypoints in tensor_dict:
      # Set all keypoints that are not labeled to NaN.
      gt_kpt_fld = fields.InputDataFields.groundtruth_keypoints
      gt_kpt_vis_fld = fields.InputDataFields.groundtruth_keypoint_visibilities
      visibilities_tiled = tf.tile(
          tf.expand_dims(tensor_dict[gt_kpt_vis_fld], -1),
          [1, 1, 2])
      tensor_dict[gt_kpt_fld] = tf.where(
          visibilities_tiled,
          tensor_dict[gt_kpt_fld],
          np.nan * tf.ones_like(tensor_dict[gt_kpt_fld]))

    if self._expand_hierarchy_labels:
      input_fields = fields.InputDataFields
      image_classes, image_confidences = self._expand_image_label_hierarchy(
          tensor_dict[input_fields.groundtruth_image_classes],
          tensor_dict[input_fields.groundtruth_image_confidences])
      tensor_dict[input_fields.groundtruth_image_classes] = image_classes
      tensor_dict[input_fields.groundtruth_image_confidences] = (
          image_confidences)

      box_fields = [
          fields.InputDataFields.groundtruth_group_of,
          fields.InputDataFields.groundtruth_is_crowd,
          fields.InputDataFields.groundtruth_difficult,
          fields.InputDataFields.groundtruth_area,
          fields.InputDataFields.groundtruth_boxes,
          fields.InputDataFields.groundtruth_weights,
      ]

      def expand_field(field_name):
        return self._expansion_box_field_labels(
            tensor_dict[input_fields.groundtruth_classes],
            tensor_dict[field_name])

      # pylint: disable=cell-var-from-loop
      for field in box_fields:
        if field in tensor_dict:
          tensor_dict[field] = tf.cond(
              tf.size(tensor_dict[field]) > 0, lambda: expand_field(field),
              lambda: tensor_dict[field])
      # pylint: enable=cell-var-from-loop

      tensor_dict[input_fields.groundtruth_classes] = (
          self._expansion_box_field_labels(
              tensor_dict[input_fields.groundtruth_classes],
              tensor_dict[input_fields.groundtruth_classes], True))

    if fields.InputDataFields.groundtruth_group_of in tensor_dict:
      group_of = fields.InputDataFields.groundtruth_group_of
      tensor_dict[group_of] = tf.cast(tensor_dict[group_of], dtype=tf.bool)

    if fields.InputDataFields.groundtruth_dp_num_points in tensor_dict:
      tensor_dict[fields.InputDataFields.groundtruth_dp_num_points] = tf.cast(
          tensor_dict[fields.InputDataFields.groundtruth_dp_num_points],
          dtype=tf.int32)
      tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids] = tf.cast(
          tensor_dict[fields.InputDataFields.groundtruth_dp_part_ids],
          dtype=tf.int32)

    return tensor_dict
コード例 #2
0
def _parse_single_example(example, options):
    """Parses a single tf.Example proto.

  Args:
    example: An Example proto.
    options: An instance of reader_pb2.Reader.

  Returns:
    A dictionary indexed by tensor name.
  """
    # Initialize `keys_to_features`.
    keys_to_features = {
        TFExampleFields.img_id: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.annot_id: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.answer_label: tf.io.FixedLenFeature([], tf.int64),
        TFExampleFields.img_bbox_label: tf.io.VarLenFeature(tf.string),
        TFExampleFields.img_bbox_score: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.img_bbox_feature: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.question: tf.io.VarLenFeature(tf.string),
        TFExampleFields.question_tag: tf.io.VarLenFeature(tf.int64),
    }
    for bbox_key in TFExampleFields.img_bbox_field_keys:
        bbox_field = os.path.join(TFExampleFields.img_bbox_scope, bbox_key)
        keys_to_features[bbox_field] = tf.io.VarLenFeature(tf.float32)
    for i in range(1, 1 + NUM_CHOICES):
        keys_to_features.update({
            TFExampleFields.cls_bert + '_%i' % i:
            tf.io.VarLenFeature(tf.float32),
            TFExampleFields.question_bert + '_%i' % i:
            tf.io.VarLenFeature(tf.float32),
            TFExampleFields.answer_choice + '_%i' % i:
            tf.io.VarLenFeature(tf.string),
            TFExampleFields.answer_choice_tag + '_%i' % i:
            tf.io.VarLenFeature(tf.int64),
            TFExampleFields.answer_choice_bert + '_%i' % i:
            tf.io.VarLenFeature(tf.float32)
        })

    # Initialize `items_to_handlers`.
    items_to_handlers = {
        InputFields.img_id:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_id,
                                 default_value=''),
        InputFields.annot_id:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.annot_id,
                                 default_value=''),
        InputFields.answer_label:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.answer_label,
                                 default_value=-1),
        InputFields.object_bboxes:
        tfexample_decoder.BoundingBox(keys=TFExampleFields.img_bbox_field_keys,
                                      prefix=TFExampleFields.img_bbox_scope),
        InputFields.object_labels:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_label,
                                 default_value=''),
        InputFields.object_scores:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_score,
                                 default_value=0),
        InputFields.question:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.question,
                                 default_value=PAD),
        InputFields.question_tag:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.question_tag,
                                 default_value=-1),
        TFExampleFields.img_bbox_feature:
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_bbox_feature,
                                 default_value=0),
    }

    for i in range(1, 1 + NUM_CHOICES):
        tensor_key = TFExampleFields.cls_bert + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=0)
        tensor_key = TFExampleFields.question_bert + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=0)
        tensor_key = TFExampleFields.answer_choice + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=PAD)
        tensor_key = TFExampleFields.answer_choice_tag + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=-1)
        tensor_key = TFExampleFields.answer_choice_bert + '_%i' % i
        items_to_handlers[tensor_key] = tfexample_decoder.Tensor(
            tensor_key=tensor_key, default_value=0)
    if options.decode_jpeg:
        keys_to_features.update({
            TFExampleFields.img_encoded:
            tf.io.FixedLenFeature([], tf.string),
            TFExampleFields.img_format:
            tf.io.FixedLenFeature([], tf.string),
        })
        items_to_handlers.update({
            InputFields.img_data:
            tfexample_decoder.Image(image_key=TFExampleFields.img_encoded,
                                    format_key=TFExampleFields.img_format,
                                    shape=None)
        })

    # Decode example.
    example_decoder = tfexample_decoder.TFExampleDecoder(
        keys_to_features, items_to_handlers)

    output_keys = example_decoder.list_items()
    output_tensors = example_decoder.decode(example)
    output_tensors = [
        x if x.dtype != tf.int64 else tf.cast(x, tf.int32)
        for x in output_tensors
    ]
    decoded_example = dict(zip(output_keys, output_tensors))
    return _update_decoded_example(decoded_example, options)
コード例 #3
0
      fields.InputDataFields.groundtruth_keypoint_visibilities - 2D bool
        tensor of shape [None, num_keypoints] containing keypoint visibilites.
      fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
        shape [None, None, None] containing instance masks.
      fields.InputDataFields.groundtruth_image_classes - 1D int64 of shape
        [None] containing classes for the boxes.
      fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
        [None * num_classes] containing flattened multiclass scores for
        groundtruth boxes.
      fields.InputDataFields.context_features - 1D float32 tensor of shape
        [context_feature_length * num_context_features]
      fields.InputDataFields.context_feature_length - int32 tensor specifying
        the length of each feature in context_features
    """
    serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
    decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
                                                    self.items_to_handlers)
    keys = decoder.list_items()
    tensors = decoder.decode(serialized_example, items=keys)
    tensor_dict = dict(zip(keys, tensors))
    is_crowd = fields.InputDataFields.groundtruth_is_crowd
    tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
    tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
    tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
        tensor_dict[fields.InputDataFields.image])[:2]

    if fields.InputDataFields.image_additional_channels in tensor_dict:
      channels = tensor_dict[fields.InputDataFields.image_additional_channels]
      channels = tf.squeeze(channels, axis=3)
      channels = tf.transpose(channels, perm=[1, 2, 0])
      tensor_dict[fields.InputDataFields.image_additional_channels] = channels
コード例 #4
0
def _parse_single_example(example, options):
    """Parses a single tf.Example proto.

  Args:
    example: An Example proto.
    options: An instance of reader_pb2.Reader.

  Returns:
    A dictionary indexed by tensor name.
  """
    ###################################
    # Initialize `keys_to_features`.
    ###################################
    keys_to_features = {
        TFExampleFields.annot_id: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.img_id: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.img_encoded: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.img_format: tf.io.FixedLenFeature([], tf.string),
        TFExampleFields.answer_label: tf.io.FixedLenFeature([], tf.int64),
        TFExampleFields.rationale_label: tf.io.FixedLenFeature([], tf.int64),
        TFExampleFields.detection_classes: tf.io.VarLenFeature(tf.string),
        TFExampleFields.detection_scores: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.detection_boxes_ymin: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.detection_boxes_ymax: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.detection_boxes_xmin: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.detection_boxes_xmax: tf.io.VarLenFeature(tf.float32),
        TFExampleFields.question: tf.io.VarLenFeature(tf.string),
        TFExampleFields.question_tag: tf.io.VarLenFeature(tf.int64),
    }

    # Answer and rationale choices.
    for i in range(NUM_CHOICES):
        keys_to_features.update({
            TFExampleFields.answer_choice + '_%i' % i:
            tf.io.VarLenFeature(tf.string),
            TFExampleFields.answer_choice_tag + '_%i' % i:
            tf.io.VarLenFeature(tf.int64),
            TFExampleFields.rationale_choice + '_%i' % i:
            tf.io.VarLenFeature(tf.string),
            TFExampleFields.rationale_choice_tag + '_%i' % i:
            tf.io.VarLenFeature(tf.int64),
        })

    ###################################
    # Initialize `items_to_handlers`.
    ###################################
    items_to_handlers = {
        'annot_id':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.annot_id,
                                 default_value=''),
        'img_id':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.img_id,
                                 default_value=''),
        'img_data':
        tfexample_decoder.Image(image_key=TFExampleFields.img_encoded,
                                format_key=TFExampleFields.img_format,
                                shape=None),
        'answer_label':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.answer_label,
                                 default_value=-1),
        'rationale_label':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.rationale_label,
                                 default_value=-1),
        'detection_boxes':
        tfexample_decoder.BoundingBox(
            keys=TFExampleFields.detection_boxes_keys,
            prefix=TFExampleFields.detection_boxes_scope),
        'detection_classes':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.detection_classes,
                                 default_value=PAD),
        'detection_scores':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.detection_scores,
                                 default_value=0),
        'question':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.question,
                                 default_value=PAD),
        'question_tag':
        tfexample_decoder.Tensor(tensor_key=TFExampleFields.question_tag,
                                 default_value=-1),
    }

    # Answer and rationale choices.
    for i in range(NUM_CHOICES):
        items_to_handlers['answer_choice_%i' % i] = tfexample_decoder.Tensor(
            tensor_key='answer_choice_%i' % i, default_value=PAD)
        items_to_handlers['answer_choice_tag_%i' %
                          i] = tfexample_decoder.Tensor(
                              tensor_key='answer_choice_tag_%i' % i,
                              default_value=-1)

        items_to_handlers['rationale_choice_%i' %
                          i] = tfexample_decoder.Tensor(
                              tensor_key='rationale_choice_%i' % i,
                              default_value=PAD)
        items_to_handlers['rationale_choice_tag_%i' %
                          i] = tfexample_decoder.Tensor(
                              tensor_key='rationale_choice_tag_%i' % i,
                              default_value=-1)

    # Decode example.
    example_decoder = tfexample_decoder.TFExampleDecoder(
        keys_to_features, items_to_handlers)

    output_keys = example_decoder.list_items()
    output_tensors = example_decoder.decode(example)
    output_tensors = [
        x if x.dtype != tf.int64 else tf.cast(x, tf.int32)
        for x in output_tensors
    ]
    decoded_example = dict(zip(output_keys, output_tensors))
    return _update_decoded_example(decoded_example, options)