def __init__(self, output_path, tensor_data_spec, py_mode=False): """Creates observer object. Args: output_path: The path to the TFRecords file. tensor_data_spec: Nested list/tuple or dict of TensorSpecs, describing the shape of the non-batched Tensors. py_mode: Whether the observer is being used in a py_driver. Raises: ValueError: if the tensors and specs have incompatible dimensions or shapes. """ self._py_mode = py_mode self._array_data_spec = tensor_spec.to_nest_array_spec( tensor_data_spec) self._encoder = example_encoding.get_example_serializer( self._array_data_spec) # Two output files: a tfrecord file and a file with the serialized spec self.output_path = output_path tf.io.gfile.makedirs(os.path.dirname(self.output_path)) self._writer = tf.io.TFRecordWriter(self.output_path) logging.info('Writing dataset to TFRecord at %s', self.output_path) # Save the tensor spec used to write the dataset to file spec_output_path = self.output_path + _SPEC_FILE_EXTENSION encode_spec_to_file(spec_output_path, tensor_data_spec)
def test_endian_encodings(self): spec = { "a": array_spec.ArraySpec((2, ), np.int16), "b": array_spec.ArraySpec((2, ), np.int32), "c": array_spec.ArraySpec((2, ), np.float32), } serializer = example_encoding.get_example_serializer(spec) decoder = example_encoding.get_example_decoder(spec) # Little endian encoding. le_sample = { "a": np.array([100, 25000]).astype("<i2"), "b": np.array([-5, 80000000]).astype("<i4"), "c": np.array([12.5, np.pi]).astype("<f4") } example_proto = serializer(le_sample) recovered = self.evaluate(decoder(example_proto)) tf.nest.map_structure(np.testing.assert_almost_equal, le_sample, recovered) # Big endian encoding. be_sample = { "a": np.array([100, 25000]).astype(">i2"), "b": np.array([-5, 80000000]).astype(">i4"), "c": np.array([12.5, np.pi]).astype(">f4") } example_proto = serializer(be_sample) recovered = self.evaluate(decoder(example_proto)) tf.nest.map_structure(np.testing.assert_almost_equal, be_sample, recovered)
def test_serialize_deserialize(self, dtype): spec = example_nested_spec(dtype) serializer = example_encoding.get_example_serializer(spec) decoder = example_encoding.get_example_decoder(spec) sample = array_spec.sample_spec_nest(spec, np.random.RandomState(0)) example_proto = serializer(sample) recovered = self.evaluate(decoder(example_proto)) tf.nest.map_structure(np.testing.assert_almost_equal, sample, recovered)
def test_compress_image(self): if not common.has_eager_been_enabled(): self.skipTest("Image compression only supported in TF2.x") gin.parse_config_files_and_bindings([], """ _get_feature_encoder.compress_image=True _get_feature_parser.compress_image=True """) spec = {"image": array_spec.ArraySpec((128, 128, 3), np.uint8)} serializer = example_encoding.get_example_serializer(spec) decoder = example_encoding.get_example_decoder(spec) sample = {"image": 128 * np.ones([128, 128, 3], dtype=np.uint8)} example_proto = serializer(sample) recovered = self.evaluate(decoder(example_proto)) tf.nest.map_structure(np.testing.assert_almost_equal, sample, recovered)
def __init__(self, output_path, tensor_data_spec, py_mode=False, compress_image=False, image_quality=95): """Creates observer object. Args: output_path: The path to the TFRecords file. tensor_data_spec: Nested list/tuple or dict of TensorSpecs, describing the shape of the non-batched Tensors. py_mode: Whether the observer is being used in a py_driver. compress_image: Whether to compress image. It is assumed that any uint8 tensor of rank 3 with shape (w,h,c) is an image. image_quality: An optional int. Defaults to 95. Quality of the compression from 0 to 100 (higher is better and slower). Raises: ValueError: if the tensors and specs have incompatible dimensions or shapes. """ self._py_mode = py_mode self._array_data_spec = tensor_spec.to_nest_array_spec( tensor_data_spec) self._encoder = example_encoding.get_example_serializer( self._array_data_spec, compress_image=compress_image, image_quality=image_quality) # Two output files: a tfrecord file and a file with the serialized spec self.output_path = output_path tf.io.gfile.makedirs(os.path.dirname(self.output_path)) self._writer = tf.io.TFRecordWriter(self.output_path) logging.info('Writing dataset to TFRecord at %s', self.output_path) # Save the tensor spec used to write the dataset to file spec_output_path = self.output_path + _SPEC_FILE_EXTENSION encode_spec_to_file(spec_output_path, tensor_data_spec)