Example #1
0
 def __init__(self, example_specs):
     self._example_specs = example_specs
     self._flat_example_specs = utils.flatten_nest_dict(self._example_specs)
     self._nested_feature_specs = _build_feature_specs(
         self._flat_example_specs)
     self.flat_feature_specs = utils.flatten_nest_dict(
         self._nested_feature_specs)
Example #2
0
    def __init__(self, example_specs):
        """Constructor.

    Args:
      example_specs: Nested `dict` of `tfds.features.TensorInfo`, corresponding
        to the structure of data to write/read.
    """
        self._example_specs = example_specs
        self._flat_example_specs = utils.flatten_nest_dict(self._example_specs)
    def get_tf_example(self, example) -> tf.train.Example:
        """Creates a TF Example for the given example.

    Args:
      example: Nested `dict` containing the input to serialize. The input
        structure and values dtype/shape must match the `example_specs` provided
        at construction.

    Returns:
      The `tf.train.Example` proto
    """
        return _dict_to_tf_example(utils.flatten_nest_dict(example),
                                   self._flat_example_specs)
Example #4
0
    def serialize_example(self, example):
        """Serialize the given example.

    Args:
      example: Nested `dict` containing the input to serialize. The input
        structure and values dtype/shape must match the `example_specs`
        provided at construction.

    Returns:
      serialize_proto: `str`, the serialized `tf.train.Example` proto
    """
        example = utils.flatten_nest_dict(example)
        example = _dict_to_tf_example(example, self._flat_example_specs)
        return example.SerializeToString()
def _dict_to_tf_example(example_dict, tensor_info_dict):
    """Builds tf.train.Example from (string -> int/float/str list) dictionary.

  Args:
    example_dict: `dict`, dict of values, tensor,...
    tensor_info_dict: `dict` of `tfds.features.TensorInfo`

  Returns:
    example_proto: `tf.train.Example`, the encoded example proto.
  """
    def run_with_reraise(fn, k, example_data, tensor_info):
        try:
            return fn(example_data, tensor_info)
        except Exception as e:  # pylint: disable=broad-except
            utils.reraise(
                e,
                f"Error while serializing feature `{k}`: `{tensor_info}`: ",
            )

    if tensor_info_dict:
        # Add the RaggedTensor fields for the nested sequences
        # Nested sequences are encoded as {'flat_values':, 'row_lengths':}, so need
        # to flatten the example nested dict again.
        # Ex:
        # Input: {'objects/tokens': [[0, 1, 2], [], [3, 4]]}
        # Output: {
        #     'objects/tokens/flat_values': [0, 1, 2, 3, 4],
        #     'objects/tokens/row_lengths_0': [3, 0, 2],
        # }
        example_dict = utils.flatten_nest_dict({
            k: run_with_reraise(_add_ragged_fields, k, example_data,
                                tensor_info)
            for k, (
                example_data,
                tensor_info) in utils.zip_dict(example_dict, tensor_info_dict)
        })
        example_dict = {
            k: run_with_reraise(_item_to_tf_feature, k, item, tensor_info)
            for k, (item, tensor_info) in example_dict.items()
        }
    else:
        # TODO(epot): The following code is only executed in tests and could be
        # cleanned-up, as TensorInfo is always passed to _item_to_tf_feature.
        example_dict = {
            k: run_with_reraise(_item_to_tf_feature, k, example_data, None)
            for k, example_data in example_dict.items()
        }

    return tf.train.Example(features=tf.train.Features(feature=example_dict))
Example #6
0
  def parse_example(self, serialized_example):
    """Deserialize a single `tf.train.Example` proto.

    Usage:
    ```
    ds = tf.data.TFRecordDataset(filepath)
    ds = ds.map(file_adapter.parse_example)
    ```

    Args:
      serialized_example: `tf.Tensor`, the `tf.string` tensor containing the
        serialized proto to decode.

    Returns:
      example: A nested `dict` of `tf.Tensor` values. The structure and tensors
        shape/dtype match the  `example_specs` provided at construction.
    """
    nested_feature_specs = self._build_feature_specs()

    # Because of RaggedTensor specs, feature_specs can be a 2-level nested dict,
    # so have to wrap `tf.io.parse_single_example` between
    # `flatten_nest_dict`/`pack_as_nest_dict`.
    # {
    #     'video/image': tf.io.FixedLenSequenceFeature(...),
    #     'video/object/bbox': {
    #         'ragged_flat_values': tf.io.FixedLenSequenceFeature(...),
    #         'ragged_row_lengths_0', tf.io.FixedLenSequenceFeature(...),
    #     },
    # }
    flat_feature_specs = utils.flatten_nest_dict(nested_feature_specs)
    example = tf.io.parse_single_example(
        serialized=serialized_example,
        features=flat_feature_specs,
    )
    example = utils.pack_as_nest_dict(example, nested_feature_specs)

    example = {  # pylint:disable=g-complex-comprehension
        k: _deserialize_single_field(example_data, tensor_info)
        for k, (
            example_data,
            tensor_info) in utils.zip_dict(example, self._flat_example_specs)
    }
    # Reconstruct all nesting
    example = utils.pack_as_nest_dict(example, self._example_specs)
    return example
def _dict_to_tf_example(
    example_dict: Mapping[str, Any],
    tensor_info_dict: Mapping[str,
                              feature_lib.TensorInfo]) -> tf.train.Example:
    """Builds tf.train.Example from (string -> int/float/str list) dictionary.

  Args:
    example_dict: `dict`, dict of values, tensor,...
    tensor_info_dict: `dict` of `tfds.features.TensorInfo`

  Returns:
    example_proto: `tf.train.Example`, the encoded example proto.
  """
    def run_with_reraise(fn, k, example_data, tensor_info):
        try:
            return fn(example_data, tensor_info)
        except Exception as e:  # pylint: disable=broad-except
            utils.reraise(
                e,
                f"Error while serializing feature `{k}`: `{tensor_info}`: ",
            )

    # Add the RaggedTensor fields for the nested sequences
    # Nested sequences are encoded as {'flat_values':, 'row_lengths':}, so need
    # to flatten the example nested dict again.
    # Ex:
    # Input: {'objects/tokens': [[0, 1, 2], [], [3, 4]]}
    # Output: {
    #     'objects/tokens/flat_values': [0, 1, 2, 3, 4],
    #     'objects/tokens/row_lengths_0': [3, 0, 2],
    # }
    features = utils.flatten_nest_dict({
        k: run_with_reraise(_add_ragged_fields, k, example_dict[k],
                            tensor_info)
        for k, tensor_info in tensor_info_dict.items()
    })
    features = {
        k: run_with_reraise(_item_to_tf_feature, k, item, tensor_info)
        for k, (item, tensor_info) in features.items()
    }
    return tf.train.Example(features=tf.train.Features(feature=features))
 def __init__(self, example_specs):
     self._example_specs = example_specs
     self._flat_example_specs = utils.flatten_nest_dict(self._example_specs)