def test_default_value(self): # Encode example with the previous version file_adapter = file_format_adapter.TFRecordExampleAdapter({ "image/encoded": features.TensorInfo(shape=(), dtype=tf.string), }) serialized_example = file_adapter._serializer.serialize_example({ "image/encoded": "hello world", }) # Decode example with the new version file_adapter = file_format_adapter.TFRecordExampleAdapter({ "image/encoded": features.TensorInfo(shape=(), dtype=tf.string, default_value=b"some string"), "image/height": features.TensorInfo(shape=(), dtype=tf.int64, default_value=-1), "image/width": features.TensorInfo(shape=(), dtype=tf.int64, default_value=-1), }) # New fields should have the default values ds = tf.data.Dataset.from_tensors(serialized_example) ds = ds.map(file_adapter._parser.parse_example) example = next(iter(dataset_utils.as_numpy(ds))) self.assertEqual( example, { "image/encoded": b"hello world", "image/height": -1, "image/width": -1, })
def features_encode_decode(features_dict, sample, as_tensor=False): """Runs the full pipeline: encode > write > tmp files > read > decode.""" # Encode sample encoded_sample = features_dict.encode_sample(sample) with tmp_dir() as tmp_dir_: tmp_filename = os.path.join(tmp_dir_, "tmp.tfrecord") # Read/write the file file_adapter = file_format_adapter.TFRecordExampleAdapter( features_dict.get_serialized_features()) file_adapter.write_from_generator( generator_fn=lambda: [encoded_sample], output_files=[tmp_filename], ) dataset = file_adapter.dataset_from_filename(tmp_filename) # Decode the sample dataset = dataset.map(features_dict.decode_sample) if not as_tensor: # Evaluate to numpy array for el in dataset_utils.iterate_over_dataset(dataset): return el else: if tf.executing_eagerly(): return next(iter(dataset)) else: return dataset.make_one_shot_iterator().get_next()
def features_encode_decode(features_dict, example, decoders): """Runs the full pipeline: encode > write > tmp files > read > decode.""" # Encode example encoded_example = features_dict.encode_example(example) with tmp_dir() as tmp_dir_: tmp_filename = os.path.join(tmp_dir_, "tmp.tfrecord") # Read/write the file file_adapter = file_format_adapter.TFRecordExampleAdapter( features_dict.get_serialized_info()) file_adapter.write_from_generator( generator=[encoded_example], output_files=[tmp_filename], ) ds = file_adapter.dataset_from_filename(tmp_filename) # Decode the example decode_fn = functools.partial( features_dict.decode_example, decoders=decoders, ) ds = ds.map(decode_fn) if tf.executing_eagerly(): out_tensor = next(iter(ds)) else: out_tensor = tf.compat.v1.data.make_one_shot_iterator( ds).get_next() out_numpy = dataset_utils.as_numpy(out_tensor) return out_tensor, out_numpy
def assertWrongSpecs(self, specs, raise_cls, raise_msg): adapter = file_format_adapter.TFRecordExampleAdapter( {"wrong_field": specs}) # Raise error if an unsupported dtype is given with self.assertRaisesWithPredicateMatch(raise_cls, raise_msg): adapter._parser._build_feature_specs()
def features_encode_decode(features_dict, example, as_tensor=False): """Runs the full pipeline: encode > write > tmp files > read > decode.""" # Encode example encoded_example = features_dict.encode_example(example) with tmp_dir() as tmp_dir_: tmp_filename = os.path.join(tmp_dir_, "tmp.tfrecord") # Read/write the file file_adapter = file_format_adapter.TFRecordExampleAdapter( features_dict.get_serialized_info()) file_adapter.write_from_generator( generator=[encoded_example], output_files=[tmp_filename], ) ds = file_adapter.dataset_from_filename(tmp_filename) # Decode the example ds = ds.map(features_dict.decode_example) if not as_tensor: # Evaluate to numpy array for el in dataset_utils.as_numpy(ds): return el else: if tf.executing_eagerly(): return next(iter(ds)) else: return tf.compat.v1.data.make_one_shot_iterator(ds).get_next()
def _encode_decode(specs, sample): """Runs the full pipeline: encode > write > tmp files > read > decode.""" # Encode sample encoded_sample = specs.encode_sample(sample) # Build a unique filename to store the tfrecord global _encode_count _encode_count += 1 tmp_filename = os.path.join(tempfile.mkdtemp(), 'tmp.tfrecord') # Read/write the file file_adapter = file_format_adapter.TFRecordExampleAdapter( specs.get_specs()) file_adapter.write_from_generator( generator_fn=lambda: [encoded_sample], output_files=[tmp_filename], ) dataset = file_adapter.dataset_from_filename(tmp_filename) # Decode the sample dataset = dataset.map(specs.decode_sample) # Return the first sample if tf.executing_eagerly(): return next(iter(dataset)) else: with tf.Graph().as_default(): item = dataset.make_one_shot_iterator().get_next() with tf.Session(config=tf.ConfigProto( device_count={'GPU': 0})) as sess: return sess.run(item)
def _file_format_adapter(self): example_spec = { "input/encoded": tf.FixedLenFeature(tuple(), tf.string), "fine_label": tf.FixedLenFeature(tuple(), tf.int64), "coarse_label": tf.FixedLenFeature(tuple(), tf.int64), } return file_format_adapter.TFRecordExampleAdapter(example_spec)
def _file_format_adapter(self): example_spec = { "x": tf.FixedLenFeature(tuple(), tf.int64), "y": tf.FixedLenFeature(tuple(), tf.int64), "z": tf.FixedLenFeature(tuple(), tf.string), } return file_format_adapter.TFRecordExampleAdapter(example_spec)
def assertFeature(self, specs, serialized_info, tests): """Test the TFRecordExampleAdapter encoding.""" adapter = file_format_adapter.TFRecordExampleAdapter(specs) with self._subTest("serialized_info"): self.assertEqual(serialized_info, adapter._parser._build_feature_specs()) for i, test in enumerate(tests): with self._subTest(str(i)): if test.raise_cls is not None: with self.assertRaisesWithPredicateMatch( test.raise_cls, test.raise_msg): adapter._serializer.serialize_example(test.value) continue serialized = adapter._serializer.serialize_example(test.value) if test.expected_serialized is not None: example_proto = tf.train.Example() example_proto.ParseFromString(serialized) expected_proto = tf.train.Example( features=tf.train.Features( feature=test.expected_serialized)) self.assertEqual(expected_proto, example_proto) example = _parse_example(serialized, adapter._parser.parse_example) with self._subTest("dtype"): out_dtypes = utils.map_nested(lambda s: s.dtype, example) expected_dtypes = utils.map_nested(lambda s: s.dtype, specs) self.assertEqual(out_dtypes, expected_dtypes) with self._subTest("shape"): # For shape, because (None, 3) match with (5, 3), we use # tf.TensorShape.assert_is_compatible_with on each of the elements utils.map_nested( lambda x: x[0].shape.assert_is_compatible_with(x[1]. shape), utils.zip_nested(example, specs)) np_example = dataset_utils.as_numpy(example) self.assertAllEqualNested(np_example, test.expected)
def features_encode_decode(specs_dict, sample): """Runs the full pipeline: encode > write > tmp files > read > decode.""" # Encode sample encoded_sample = specs_dict.encode_sample(sample) with tmp_dir() as tmp_dir_: tmp_filename = os.path.join(tmp_dir_, 'tmp.tfrecord') # Read/write the file file_adapter = file_format_adapter.TFRecordExampleAdapter( specs_dict.get_specs()) file_adapter.write_from_generator( generator_fn=lambda: [encoded_sample], output_files=[tmp_filename], ) dataset = file_adapter.dataset_from_filename(tmp_filename) # Decode the sample dataset = dataset.map(specs_dict.decode_sample) for el in dataset_utils.iterate_over_dataset(dataset): return el