def features_encode_decode(features_dict, example, decoders): """Runs the full pipeline: encode > write > tmp files > read > decode.""" # Encode example encoded_example = features_dict.encode_example(example) # Serialize/deserialize the example specs = features_dict.get_serialized_info() serializer = example_serializer.ExampleSerializer(specs) parser = example_parser.ExampleParser(specs) serialized_example = serializer.serialize_example(encoded_example) ds = tf.data.Dataset.from_tensors(serialized_example) ds = ds.map(parser.parse_example) # Decode the example decode_fn = functools.partial( features_dict.decode_example, decoders=decoders, ) ds = ds.map(decode_fn) if tf.executing_eagerly(): out_tensor = next(iter(ds)) else: out_tensor = tf.compat.v1.data.make_one_shot_iterator(ds).get_next() out_numpy = dataset_utils.as_numpy(out_tensor) return out_tensor, out_numpy
def __init__(self, path, example_specs): """Initializes Reader. Args: path (str): path where tfrecords are stored. example_specs: spec to build ExampleParser. """ self._path = path self._parser = example_parser.ExampleParser(example_specs)
def _raise_error_for_duplicated_keys(example1, example2, example_specs): """Log information about the examples and raise an AssertionError.""" msg = "Two examples share the same hashed key!" logging.error(msg) parser = example_parser.ExampleParser(example_specs) ex1 = parser.parse_example(example1) ex2 = parser.parse_example(example2) logging.error("1st example: %s", ex1) logging.error("2nd example: %s", ex2) raise AssertionError(msg + " See logs above to view the examples.")
def __init__(self, path, example_specs, file_format=file_adapters.DEFAULT_FILE_FORMAT): """Initializes Reader. Args: path (str): path where tfrecords are stored. example_specs: spec to build ExampleParser. file_format: file_adapters.FileFormat, format of the record files in which the dataset will be read/written from. """ self._path = path self._parser = example_parser.ExampleParser(example_specs) self._file_format = file_format
def _example_parser(self): from tensorflow_datasets.core import example_parser # pytype: disable=import-error # pylint: disable=g-import-not-at-top example_specs = self.get_serialized_info() return example_parser.ExampleParser(example_specs)
def __init__(self, example_specs): super(TFRecordExampleAdapter, self).__init__(example_specs) self._serializer = example_serializer.ExampleSerializer( example_specs) self._parser = example_parser.ExampleParser(example_specs)