def __init__(self, example_specs): self._example_specs = example_specs self._flat_example_specs = utils.flatten_nest_dict(self._example_specs) self._nested_feature_specs = _build_feature_specs( self._flat_example_specs) self.flat_feature_specs = utils.flatten_nest_dict( self._nested_feature_specs)
def __init__(self, example_specs): """Constructor. Args: example_specs: Nested `dict` of `tfds.features.TensorInfo`, corresponding to the structure of data to write/read. """ self._example_specs = example_specs self._flat_example_specs = utils.flatten_nest_dict(self._example_specs)
def get_tf_example(self, example) -> tf.train.Example: """Creates a TF Example for the given example. Args: example: Nested `dict` containing the input to serialize. The input structure and values dtype/shape must match the `example_specs` provided at construction. Returns: The `tf.train.Example` proto """ return _dict_to_tf_example(utils.flatten_nest_dict(example), self._flat_example_specs)
def serialize_example(self, example): """Serialize the given example. Args: example: Nested `dict` containing the input to serialize. The input structure and values dtype/shape must match the `example_specs` provided at construction. Returns: serialize_proto: `str`, the serialized `tf.train.Example` proto """ example = utils.flatten_nest_dict(example) example = _dict_to_tf_example(example, self._flat_example_specs) return example.SerializeToString()
def _dict_to_tf_example(example_dict, tensor_info_dict): """Builds tf.train.Example from (string -> int/float/str list) dictionary. Args: example_dict: `dict`, dict of values, tensor,... tensor_info_dict: `dict` of `tfds.features.TensorInfo` Returns: example_proto: `tf.train.Example`, the encoded example proto. """ def run_with_reraise(fn, k, example_data, tensor_info): try: return fn(example_data, tensor_info) except Exception as e: # pylint: disable=broad-except utils.reraise( e, f"Error while serializing feature `{k}`: `{tensor_info}`: ", ) if tensor_info_dict: # Add the RaggedTensor fields for the nested sequences # Nested sequences are encoded as {'flat_values':, 'row_lengths':}, so need # to flatten the example nested dict again. # Ex: # Input: {'objects/tokens': [[0, 1, 2], [], [3, 4]]} # Output: { # 'objects/tokens/flat_values': [0, 1, 2, 3, 4], # 'objects/tokens/row_lengths_0': [3, 0, 2], # } example_dict = utils.flatten_nest_dict({ k: run_with_reraise(_add_ragged_fields, k, example_data, tensor_info) for k, ( example_data, tensor_info) in utils.zip_dict(example_dict, tensor_info_dict) }) example_dict = { k: run_with_reraise(_item_to_tf_feature, k, item, tensor_info) for k, (item, tensor_info) in example_dict.items() } else: # TODO(epot): The following code is only executed in tests and could be # cleanned-up, as TensorInfo is always passed to _item_to_tf_feature. example_dict = { k: run_with_reraise(_item_to_tf_feature, k, example_data, None) for k, example_data in example_dict.items() } return tf.train.Example(features=tf.train.Features(feature=example_dict))
def parse_example(self, serialized_example): """Deserialize a single `tf.train.Example` proto. Usage: ``` ds = tf.data.TFRecordDataset(filepath) ds = ds.map(file_adapter.parse_example) ``` Args: serialized_example: `tf.Tensor`, the `tf.string` tensor containing the serialized proto to decode. Returns: example: A nested `dict` of `tf.Tensor` values. The structure and tensors shape/dtype match the `example_specs` provided at construction. """ nested_feature_specs = self._build_feature_specs() # Because of RaggedTensor specs, feature_specs can be a 2-level nested dict, # so have to wrap `tf.io.parse_single_example` between # `flatten_nest_dict`/`pack_as_nest_dict`. # { # 'video/image': tf.io.FixedLenSequenceFeature(...), # 'video/object/bbox': { # 'ragged_flat_values': tf.io.FixedLenSequenceFeature(...), # 'ragged_row_lengths_0', tf.io.FixedLenSequenceFeature(...), # }, # } flat_feature_specs = utils.flatten_nest_dict(nested_feature_specs) example = tf.io.parse_single_example( serialized=serialized_example, features=flat_feature_specs, ) example = utils.pack_as_nest_dict(example, nested_feature_specs) example = { # pylint:disable=g-complex-comprehension k: _deserialize_single_field(example_data, tensor_info) for k, ( example_data, tensor_info) in utils.zip_dict(example, self._flat_example_specs) } # Reconstruct all nesting example = utils.pack_as_nest_dict(example, self._example_specs) return example
def _dict_to_tf_example( example_dict: Mapping[str, Any], tensor_info_dict: Mapping[str, feature_lib.TensorInfo]) -> tf.train.Example: """Builds tf.train.Example from (string -> int/float/str list) dictionary. Args: example_dict: `dict`, dict of values, tensor,... tensor_info_dict: `dict` of `tfds.features.TensorInfo` Returns: example_proto: `tf.train.Example`, the encoded example proto. """ def run_with_reraise(fn, k, example_data, tensor_info): try: return fn(example_data, tensor_info) except Exception as e: # pylint: disable=broad-except utils.reraise( e, f"Error while serializing feature `{k}`: `{tensor_info}`: ", ) # Add the RaggedTensor fields for the nested sequences # Nested sequences are encoded as {'flat_values':, 'row_lengths':}, so need # to flatten the example nested dict again. # Ex: # Input: {'objects/tokens': [[0, 1, 2], [], [3, 4]]} # Output: { # 'objects/tokens/flat_values': [0, 1, 2, 3, 4], # 'objects/tokens/row_lengths_0': [3, 0, 2], # } features = utils.flatten_nest_dict({ k: run_with_reraise(_add_ragged_fields, k, example_dict[k], tensor_info) for k, tensor_info in tensor_info_dict.items() }) features = { k: run_with_reraise(_item_to_tf_feature, k, item, tensor_info) for k, (item, tensor_info) in features.items() } return tf.train.Example(features=tf.train.Features(feature=features))
def __init__(self, example_specs): self._example_specs = example_specs self._flat_example_specs = utils.flatten_nest_dict(self._example_specs)