Exemple #1
0
    def encode_sample(self, image_or_path):
        """Convert the given image into a dict convertible to tf example."""
        if isinstance(image_or_path, six.string_types):
            # TODO(epot): np_image = load_image_from_disk(image_or_path)
            raise NotImplementedError
        elif isinstance(image_or_path, np.ndarray):
            np_image = image_or_path
        else:
            # Could also add PIL support
            raise ValueError(
                'Could not convert {} to image'.format(image_or_path))

        # Check that the image is valid
        if np_image.dtype != np.uint8:
            raise ValueError('Image should be uint8. Detected: {}'.format(
                np_image.dtype))
        utils.assert_shape_match(np_image.shape, self._shape)

        # TODO(epot): Should support additional format
        image_format = 'png'
        # TODO(epot): Should clear the runner once every image has been encoded.
        # TODO(epot): Better support for multi-shape image (instead of re-building
        # a new graph every time)
        image_encoded = self._runner.run(ENCODE_FN[image_format], np_image)

        return {
            'encoded': image_encoded,
            'format': image_format,
            'shape': np_image.shape,
        }
def _item_to_tf_feature(item, tensor_info):
    """Single item to a tf.train.Feature."""
    v = _item_to_np_array(item,
                          shape=tensor_info.shape,
                          dtype=tensor_info.dtype)

    # Check that the shape is expected
    utils.assert_shape_match(v.shape, tensor_info.shape)
    if tensor_info.dtype == tf.string and not _is_string(v):
        raise ValueError(
            "Unsuported value: {}\nCould not convert to bytes list.".format(
                item))

    # Convert boolean to integer (tf.train.Example does not support bool)
    if v.dtype == np.bool_:
        v = v.astype(int)

    v = v.flatten()  # Convert v into a 1-d array
    if np.issubdtype(v.dtype, np.integer):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=v))
    elif np.issubdtype(v.dtype, np.floating):
        return tf.train.Feature(float_list=tf.train.FloatList(value=v))
    elif tensor_info.dtype == tf.string:
        v = [tf.compat.as_bytes(x) for x in v]
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=v))
    else:
        raise ValueError(
            "Unsuported value: {}.\n"
            "tf.train.Feature does not support type {}. "
            "This may indicate that one of the FeatureConnectors received an "
            "unsupported value as input.".format(repr(v), repr(type(v))))
 def _encode_image(self, np_image):
     """Returns np_image encoded as jpeg or png."""
     if np_image.dtype != np.uint8:
         raise ValueError('Image should be uint8. Detected: %s.' %
                          np_image.dtype)
     utils.assert_shape_match(np_image.shape, self._shape)
     return self._runner.run(ENCODE_FN[self._encoding_format], np_image)
def _item_to_np_array(item, dtype, shape):
  """Single item to a np.array."""
  original_item = item
  item = np.array(item, dtype=dtype.as_numpy_dtype)
  utils.assert_shape_match(item.shape, shape)
  if dtype == tf.string and not _is_string(original_item):
    raise ValueError(
        "Unsupported value: {}\nCould not convert to bytes list.".format(item))
  return item
Exemple #5
0
 def _encode_image(self, np_image):
     """Returns np_image encoded as jpeg or png."""
     if not self._runner:
         self._runner = utils.TFGraphRunner()
     if np_image.dtype != self._dtype.as_numpy_dtype:
         raise ValueError('Image dtype should be %s. Detected: %s.' %
                          (self._dtype.as_numpy_dtype, np_image.dtype))
     utils.assert_shape_match(np_image.shape, self._shape)
     return self._runner.run(ENCODE_FN[self._encoding_format], np_image)
def _item_to_np_array(item, dtype: tf.dtypes.DType, numpy_dtype: np.dtype,
                      shape: Shape) -> np.ndarray:
    """Single item to a np.array."""
    result = np.asanyarray(item, dtype=numpy_dtype)
    utils.assert_shape_match(result.shape, shape)
    if utils.is_same_tf_dtype(dtype, tf.string) and not _is_string(item):
        raise ValueError(
            f"Unsupported value: {result}\nCould not convert to bytes list.")
    return result
Exemple #7
0
def _validate_np_array(
    np_array: np.ndarray,
    shape: utils.Shape,
    dtype: tf.dtypes.DType,
) -> None:
  """Validate the numpy array match the expected shape/dtype."""
  if np_array.dtype != dtype.as_numpy_dtype:
    raise ValueError(f'Image dtype should be {dtype.as_numpy_dtype}. '
                     f'Detected: {np_array.dtype}.')
  utils.assert_shape_match(np_array.shape, shape)
Exemple #8
0
 def encode_example(self, example_data):
   """See base class for details."""
   np_dtype = np.dtype(self.dtype.as_numpy_dtype)
   if not isinstance(example_data, np.ndarray):
     example_data = np.array(example_data, dtype=np_dtype)
   # Ensure the shape and dtype match
   if example_data.dtype != np_dtype:
     raise ValueError('Dtype {} do not match {}'.format(
         example_data.dtype, np_dtype))
   utils.assert_shape_match(example_data.shape, self._shape)
   return example_data
Exemple #9
0
 def encode_sample(self, sample_data):
   """See base class for details."""
   np_dtype = np.dtype(self._dtype.as_numpy_dtype)
   # Convert to numpy if possible
   if not isinstance(sample_data, np.ndarray):
     sample_data = np.array(sample_data, dtype=np_dtype)
   # Ensure the shape and dtype match
   if sample_data.dtype != np_dtype:
     raise ValueError('Dtype {} do not match {}'.format(
         sample_data.dtype, np_dtype))
   utils.assert_shape_match(sample_data.shape, self._shape)
   return sample_data
def _item_to_tf_feature(item, tensor_info=None):
    """Single item to a tf.train.Feature."""
    v = item
    # TODO(epot): tensor_info is only None for file_format_adapter tests.
    # tensor_info could be made required to cleanup some of the following code,
    # for instance by re-using _item_to_np_array.
    if not tensor_info and isinstance(v, (list, tuple)) and not v:
        raise ValueError(
            "Received an empty list value, so is unable to infer the "
            "feature type to record. To support empty value, the corresponding "
            "FeatureConnector should return a numpy array with the correct dtype "
            "instead of a Python list.")

    # Handle strings/bytes first
    is_string = _is_string(v)

    if tensor_info:
        np_dtype = np.dtype(tensor_info.dtype.as_numpy_dtype)
    elif is_string:
        np_dtype = object  # Avoid truncating trailing '\x00' when converting to np
    else:
        np_dtype = None

    v = np.array(v, dtype=np_dtype)

    # Check that the shape is expected
    if tensor_info:
        utils.assert_shape_match(v.shape, tensor_info.shape)
        if tensor_info.dtype == tf.string and not is_string:
            raise ValueError(
                "Unsuported value: {}\nCould not convert to bytes list.".
                format(item))

    # Convert boolean to integer (tf.train.Example does not support bool)
    if v.dtype == np.bool_:
        v = v.astype(int)

    v = v.flatten()  # Convert v into a 1-d array
    if np.issubdtype(v.dtype, np.integer):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=v))
    elif np.issubdtype(v.dtype, np.floating):
        return tf.train.Feature(float_list=tf.train.FloatList(value=v))
    elif is_string:
        v = [tf.compat.as_bytes(x) for x in v]
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=v))
    else:
        raise ValueError(
            "Unsuported value: {}.\n"
            "tf.train.Feature does not support type {}. "
            "This may indicate that one of the FeatureConnectors received an "
            "unsupported value as input.".format(repr(v), repr(type(v))))
Exemple #11
0
 def encode_example(self, example_data):
     """See base class for details."""
     np_dtype = np.dtype(self.dtype.as_numpy_dtype)
     if isinstance(example_data, tf.Tensor):
         raise TypeError(
             f'Error encoding: {example_data}. `_generate_examples` should '
             'yield `np.array`-compatible values, not `tf.Tensor`')
     if not isinstance(example_data, np.ndarray):
         example_data = np.array(example_data, dtype=np_dtype)
     # Ensure the shape and dtype match
     if example_data.dtype != np_dtype:
         raise ValueError('Dtype {} do not match {}'.format(
             example_data.dtype, np_dtype))
     utils.assert_shape_match(example_data.shape, self._shape)
     return example_data
Exemple #12
0
 def encode_example(self, example_data):
     """See base class for details."""
     np_dtype = np.dtype(self._dtype.as_numpy_dtype)
     # Convert to numpy if possible
     if not isinstance(example_data, np.ndarray):
         example_data = np.array(example_data, dtype=np_dtype)
     # Ensure the shape and dtype match
     if example_data.dtype != np_dtype:
         raise ValueError('Dtype {} do not match {}'.format(
             example_data.dtype, np_dtype))
     utils.assert_shape_match(example_data.shape, self._shape)
     # For booleans, convert to integer (tf.train.Example does not support bool)
     if example_data.dtype == np.bool_:
         example_data = example_data.astype(int)
     return example_data
Exemple #13
0
 def _encode_image(self, np_image):
   """Returns np_image encoded as jpeg or png."""
   if not self._runner:
     self._runner = utils.TFGraphRunner()
   if np_image.dtype != self._dtype.as_numpy_dtype:
     raise ValueError('Image dtype should be %s. Detected: %s.' % (
         self._dtype.as_numpy_dtype, np_image.dtype))
   utils.assert_shape_match(np_image.shape, self._shape)
   # When encoding isn't defined, default to PNG.
   # Should we be more strict about explicitly define the encoding (raise
   # error / warning instead) ?
   # It has created subtle issues for imagenet_corrupted: images are read as
   # JPEG images to apply some processing, but final image saved as PNG
   # (default) rather than JPEG.
   return self._runner.run(ENCODE_FN[self._encoding_format or 'png'], np_image)
def compare_shapes_and_types(tensor_info, output_types, output_shapes):
    """Compare shapes and types between TensorInfo and Dataset types/shapes."""
    for feature_name, feature_info in tensor_info.items():
        if isinstance(feature_info, dict):
            compare_shapes_and_types(feature_info, output_types[feature_name],
                                     output_shapes[feature_name])
        else:
            expected_type = feature_info.dtype
            output_type = output_types[feature_name]
            if expected_type != output_type:
                raise TypeError("Feature %s has type %s but expected %s" %
                                (feature_name, output_type, expected_type))

            expected_shape = feature_info.shape
            output_shape = output_shapes[feature_name]
            utils.assert_shape_match(expected_shape, output_shape)
def compare_shapes_and_types(tensor_info, element_spec):
    """Compare shapes and types between TensorInfo and Dataset types/shapes."""
    for feature_name, (feature_info,
                       spec) in utils.zip_dict(tensor_info, element_spec):
        if isinstance(spec, tf.data.DatasetSpec):
            # We use _element_spec because element_spec was added in TF2.5+.
            compare_shapes_and_types(feature_info, spec._element_spec)  # pylint: disable=protected-access
        elif isinstance(feature_info, dict):
            compare_shapes_and_types(feature_info, spec)
        else:
            # Some earlier versions of TF don't expose dtype and shape for the
            # RaggedTensorSpec, so we use the protected versions.
            if feature_info.dtype != spec._dtype:  # pylint: disable=protected-access
                raise TypeError(
                    f"Feature {feature_name} has type {feature_info} but expected {spec}"
                )
            utils.assert_shape_match(feature_info.shape, spec._shape)  # pylint: disable=protected-access
Exemple #16
0
  def encode_example(self, example_data):
    """See base class for details."""
    # TODO(epot): Is there a better workaround ?
    # It seems some user have non-conventional use of tfds.features.Tensor where
    # they defined shape=(None, None) even if it wasn't supported.
    # For backward compatibility, the check is moved inside encode example.
    if self._dynamic_shape and not self._encoded_to_bytes:
      raise ValueError('Multiple unknown dimensions Tensor require to set '
                       "`Tensor(..., encoding='zlib')` (or 'bytes'). "
                       f'For {self}')

    np_dtype = np.dtype(self.numpy_dtype)
    if isinstance(example_data, tf.Tensor):
      raise TypeError(
          f'Error encoding: {example_data!r}. `_generate_examples` should '
          'yield `np.array` compatible values, not `tf.Tensor`')
    if not isinstance(example_data, np.ndarray):
      example_data = np.array(example_data, dtype=np_dtype)
    # Ensure the shape and dtype match
    if example_data.dtype != np_dtype:
      raise ValueError('Dtype {} do not match {}'.format(
          example_data.dtype, np_dtype))

    shape = example_data.shape
    if isinstance(shape, tf.TensorShape):
      shape = tuple(shape.as_list())
    utils.assert_shape_match(shape, self._shape)

    # Eventually encode the data
    if self._encoded_to_bytes:
      example_data = example_data.tobytes()
      if self._encoding == Encoding.ZLIB:
        example_data = zlib.compress(example_data)

    # For dynamically shaped tensors, also save the shape (the proto
    # flatten all values so we need a way to recover the shape).
    if self._dynamic_shape:
      return {
          'value': example_data,
          'shape': shape,
      }
    else:
      return example_data