def __init__(self, output_path, tensor_data_spec, py_mode=False):
        """Creates observer object.

    Args:
      output_path: The path to the TFRecords file.
      tensor_data_spec: Nested list/tuple or dict of TensorSpecs, describing the
        shape of the non-batched Tensors.
      py_mode: Whether the observer is being used in a py_driver.

    Raises:
      ValueError: if the tensors and specs have incompatible dimensions or
      shapes.
    """
        self._py_mode = py_mode
        self._array_data_spec = tensor_spec.to_nest_array_spec(
            tensor_data_spec)
        self._encoder = example_encoding.get_example_serializer(
            self._array_data_spec)
        # Two output files: a tfrecord file and a file with the serialized spec
        self.output_path = output_path
        tf.io.gfile.makedirs(os.path.dirname(self.output_path))
        self._writer = tf.io.TFRecordWriter(self.output_path)
        logging.info('Writing dataset to TFRecord at %s', self.output_path)
        # Save the tensor spec used to write the dataset to file
        spec_output_path = self.output_path + _SPEC_FILE_EXTENSION
        encode_spec_to_file(spec_output_path, tensor_data_spec)
Exemple #2
0
    def test_endian_encodings(self):
        spec = {
            "a": array_spec.ArraySpec((2, ), np.int16),
            "b": array_spec.ArraySpec((2, ), np.int32),
            "c": array_spec.ArraySpec((2, ), np.float32),
        }

        serializer = example_encoding.get_example_serializer(spec)
        decoder = example_encoding.get_example_decoder(spec)

        # Little endian encoding.
        le_sample = {
            "a": np.array([100, 25000]).astype("<i2"),
            "b": np.array([-5, 80000000]).astype("<i4"),
            "c": np.array([12.5, np.pi]).astype("<f4")
        }

        example_proto = serializer(le_sample)
        recovered = self.evaluate(decoder(example_proto))
        tf.nest.map_structure(np.testing.assert_almost_equal, le_sample,
                              recovered)

        # Big endian encoding.
        be_sample = {
            "a": np.array([100, 25000]).astype(">i2"),
            "b": np.array([-5, 80000000]).astype(">i4"),
            "c": np.array([12.5, np.pi]).astype(">f4")
        }

        example_proto = serializer(be_sample)
        recovered = self.evaluate(decoder(example_proto))
        tf.nest.map_structure(np.testing.assert_almost_equal, be_sample,
                              recovered)
Exemple #3
0
    def test_serialize_deserialize(self, dtype):
        spec = example_nested_spec(dtype)
        serializer = example_encoding.get_example_serializer(spec)
        decoder = example_encoding.get_example_decoder(spec)

        sample = array_spec.sample_spec_nest(spec, np.random.RandomState(0))
        example_proto = serializer(sample)

        recovered = self.evaluate(decoder(example_proto))
        tf.nest.map_structure(np.testing.assert_almost_equal, sample,
                              recovered)
    def test_compress_image(self):
        if not common.has_eager_been_enabled():
            self.skipTest("Image compression only supported in TF2.x")

        gin.parse_config_files_and_bindings([], """
    _get_feature_encoder.compress_image=True
    _get_feature_parser.compress_image=True
    """)
        spec = {"image": array_spec.ArraySpec((128, 128, 3), np.uint8)}
        serializer = example_encoding.get_example_serializer(spec)
        decoder = example_encoding.get_example_decoder(spec)

        sample = {"image": 128 * np.ones([128, 128, 3], dtype=np.uint8)}
        example_proto = serializer(sample)

        recovered = self.evaluate(decoder(example_proto))
        tf.nest.map_structure(np.testing.assert_almost_equal, sample,
                              recovered)
    def __init__(self,
                 output_path,
                 tensor_data_spec,
                 py_mode=False,
                 compress_image=False,
                 image_quality=95):
        """Creates observer object.

    Args:
      output_path: The path to the TFRecords file.
      tensor_data_spec: Nested list/tuple or dict of TensorSpecs, describing the
        shape of the non-batched Tensors.
      py_mode: Whether the observer is being used in a py_driver.
      compress_image: Whether to compress image. It is assumed that any uint8
        tensor of rank 3 with shape (w,h,c) is an image.
      image_quality: An optional int. Defaults to 95. Quality of the compression
        from 0 to 100 (higher is better and slower).

    Raises:
      ValueError: if the tensors and specs have incompatible dimensions or
      shapes.
    """
        self._py_mode = py_mode
        self._array_data_spec = tensor_spec.to_nest_array_spec(
            tensor_data_spec)
        self._encoder = example_encoding.get_example_serializer(
            self._array_data_spec,
            compress_image=compress_image,
            image_quality=image_quality)
        # Two output files: a tfrecord file and a file with the serialized spec
        self.output_path = output_path
        tf.io.gfile.makedirs(os.path.dirname(self.output_path))
        self._writer = tf.io.TFRecordWriter(self.output_path)
        logging.info('Writing dataset to TFRecord at %s', self.output_path)
        # Save the tensor spec used to write the dataset to file
        spec_output_path = self.output_path + _SPEC_FILE_EXTENSION
        encode_spec_to_file(spec_output_path, tensor_data_spec)