def encode_sample(self, image_or_path): """Convert the given image into a dict convertible to tf example.""" if isinstance(image_or_path, six.string_types): # TODO(epot): np_image = load_image_from_disk(image_or_path) raise NotImplementedError elif isinstance(image_or_path, np.ndarray): np_image = image_or_path else: # Could also add PIL support raise ValueError( 'Could not convert {} to image'.format(image_or_path)) # Check that the image is valid if np_image.dtype != np.uint8: raise ValueError('Image should be uint8. Detected: {}'.format( np_image.dtype)) utils.assert_shape_match(np_image.shape, self._shape) # TODO(epot): Should support additional format image_format = 'png' # TODO(epot): Should clear the runner once every image has been encoded. # TODO(epot): Better support for multi-shape image (instead of re-building # a new graph every time) image_encoded = self._runner.run(ENCODE_FN[image_format], np_image) return { 'encoded': image_encoded, 'format': image_format, 'shape': np_image.shape, }
def _item_to_tf_feature(item, tensor_info): """Single item to a tf.train.Feature.""" v = _item_to_np_array(item, shape=tensor_info.shape, dtype=tensor_info.dtype) # Check that the shape is expected utils.assert_shape_match(v.shape, tensor_info.shape) if tensor_info.dtype == tf.string and not _is_string(v): raise ValueError( "Unsuported value: {}\nCould not convert to bytes list.".format( item)) # Convert boolean to integer (tf.train.Example does not support bool) if v.dtype == np.bool_: v = v.astype(int) v = v.flatten() # Convert v into a 1-d array if np.issubdtype(v.dtype, np.integer): return tf.train.Feature(int64_list=tf.train.Int64List(value=v)) elif np.issubdtype(v.dtype, np.floating): return tf.train.Feature(float_list=tf.train.FloatList(value=v)) elif tensor_info.dtype == tf.string: v = [tf.compat.as_bytes(x) for x in v] return tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) else: raise ValueError( "Unsuported value: {}.\n" "tf.train.Feature does not support type {}. " "This may indicate that one of the FeatureConnectors received an " "unsupported value as input.".format(repr(v), repr(type(v))))
def _encode_image(self, np_image): """Returns np_image encoded as jpeg or png.""" if np_image.dtype != np.uint8: raise ValueError('Image should be uint8. Detected: %s.' % np_image.dtype) utils.assert_shape_match(np_image.shape, self._shape) return self._runner.run(ENCODE_FN[self._encoding_format], np_image)
def _item_to_np_array(item, dtype, shape): """Single item to a np.array.""" original_item = item item = np.array(item, dtype=dtype.as_numpy_dtype) utils.assert_shape_match(item.shape, shape) if dtype == tf.string and not _is_string(original_item): raise ValueError( "Unsupported value: {}\nCould not convert to bytes list.".format(item)) return item
def _encode_image(self, np_image): """Returns np_image encoded as jpeg or png.""" if not self._runner: self._runner = utils.TFGraphRunner() if np_image.dtype != self._dtype.as_numpy_dtype: raise ValueError('Image dtype should be %s. Detected: %s.' % (self._dtype.as_numpy_dtype, np_image.dtype)) utils.assert_shape_match(np_image.shape, self._shape) return self._runner.run(ENCODE_FN[self._encoding_format], np_image)
def _item_to_np_array(item, dtype: tf.dtypes.DType, numpy_dtype: np.dtype, shape: Shape) -> np.ndarray: """Single item to a np.array.""" result = np.asanyarray(item, dtype=numpy_dtype) utils.assert_shape_match(result.shape, shape) if utils.is_same_tf_dtype(dtype, tf.string) and not _is_string(item): raise ValueError( f"Unsupported value: {result}\nCould not convert to bytes list.") return result
def _validate_np_array( np_array: np.ndarray, shape: utils.Shape, dtype: tf.dtypes.DType, ) -> None: """Validate the numpy array match the expected shape/dtype.""" if np_array.dtype != dtype.as_numpy_dtype: raise ValueError(f'Image dtype should be {dtype.as_numpy_dtype}. ' f'Detected: {np_array.dtype}.') utils.assert_shape_match(np_array.shape, shape)
def encode_example(self, example_data): """See base class for details.""" np_dtype = np.dtype(self.dtype.as_numpy_dtype) if not isinstance(example_data, np.ndarray): example_data = np.array(example_data, dtype=np_dtype) # Ensure the shape and dtype match if example_data.dtype != np_dtype: raise ValueError('Dtype {} do not match {}'.format( example_data.dtype, np_dtype)) utils.assert_shape_match(example_data.shape, self._shape) return example_data
def encode_sample(self, sample_data): """See base class for details.""" np_dtype = np.dtype(self._dtype.as_numpy_dtype) # Convert to numpy if possible if not isinstance(sample_data, np.ndarray): sample_data = np.array(sample_data, dtype=np_dtype) # Ensure the shape and dtype match if sample_data.dtype != np_dtype: raise ValueError('Dtype {} do not match {}'.format( sample_data.dtype, np_dtype)) utils.assert_shape_match(sample_data.shape, self._shape) return sample_data
def _item_to_tf_feature(item, tensor_info=None): """Single item to a tf.train.Feature.""" v = item # TODO(epot): tensor_info is only None for file_format_adapter tests. # tensor_info could be made required to cleanup some of the following code, # for instance by re-using _item_to_np_array. if not tensor_info and isinstance(v, (list, tuple)) and not v: raise ValueError( "Received an empty list value, so is unable to infer the " "feature type to record. To support empty value, the corresponding " "FeatureConnector should return a numpy array with the correct dtype " "instead of a Python list.") # Handle strings/bytes first is_string = _is_string(v) if tensor_info: np_dtype = np.dtype(tensor_info.dtype.as_numpy_dtype) elif is_string: np_dtype = object # Avoid truncating trailing '\x00' when converting to np else: np_dtype = None v = np.array(v, dtype=np_dtype) # Check that the shape is expected if tensor_info: utils.assert_shape_match(v.shape, tensor_info.shape) if tensor_info.dtype == tf.string and not is_string: raise ValueError( "Unsuported value: {}\nCould not convert to bytes list.". format(item)) # Convert boolean to integer (tf.train.Example does not support bool) if v.dtype == np.bool_: v = v.astype(int) v = v.flatten() # Convert v into a 1-d array if np.issubdtype(v.dtype, np.integer): return tf.train.Feature(int64_list=tf.train.Int64List(value=v)) elif np.issubdtype(v.dtype, np.floating): return tf.train.Feature(float_list=tf.train.FloatList(value=v)) elif is_string: v = [tf.compat.as_bytes(x) for x in v] return tf.train.Feature(bytes_list=tf.train.BytesList(value=v)) else: raise ValueError( "Unsuported value: {}.\n" "tf.train.Feature does not support type {}. " "This may indicate that one of the FeatureConnectors received an " "unsupported value as input.".format(repr(v), repr(type(v))))
def encode_example(self, example_data): """See base class for details.""" np_dtype = np.dtype(self.dtype.as_numpy_dtype) if isinstance(example_data, tf.Tensor): raise TypeError( f'Error encoding: {example_data}. `_generate_examples` should ' 'yield `np.array`-compatible values, not `tf.Tensor`') if not isinstance(example_data, np.ndarray): example_data = np.array(example_data, dtype=np_dtype) # Ensure the shape and dtype match if example_data.dtype != np_dtype: raise ValueError('Dtype {} do not match {}'.format( example_data.dtype, np_dtype)) utils.assert_shape_match(example_data.shape, self._shape) return example_data
def encode_example(self, example_data): """See base class for details.""" np_dtype = np.dtype(self._dtype.as_numpy_dtype) # Convert to numpy if possible if not isinstance(example_data, np.ndarray): example_data = np.array(example_data, dtype=np_dtype) # Ensure the shape and dtype match if example_data.dtype != np_dtype: raise ValueError('Dtype {} do not match {}'.format( example_data.dtype, np_dtype)) utils.assert_shape_match(example_data.shape, self._shape) # For booleans, convert to integer (tf.train.Example does not support bool) if example_data.dtype == np.bool_: example_data = example_data.astype(int) return example_data
def _encode_image(self, np_image): """Returns np_image encoded as jpeg or png.""" if not self._runner: self._runner = utils.TFGraphRunner() if np_image.dtype != self._dtype.as_numpy_dtype: raise ValueError('Image dtype should be %s. Detected: %s.' % ( self._dtype.as_numpy_dtype, np_image.dtype)) utils.assert_shape_match(np_image.shape, self._shape) # When encoding isn't defined, default to PNG. # Should we be more strict about explicitly define the encoding (raise # error / warning instead) ? # It has created subtle issues for imagenet_corrupted: images are read as # JPEG images to apply some processing, but final image saved as PNG # (default) rather than JPEG. return self._runner.run(ENCODE_FN[self._encoding_format or 'png'], np_image)
def compare_shapes_and_types(tensor_info, output_types, output_shapes): """Compare shapes and types between TensorInfo and Dataset types/shapes.""" for feature_name, feature_info in tensor_info.items(): if isinstance(feature_info, dict): compare_shapes_and_types(feature_info, output_types[feature_name], output_shapes[feature_name]) else: expected_type = feature_info.dtype output_type = output_types[feature_name] if expected_type != output_type: raise TypeError("Feature %s has type %s but expected %s" % (feature_name, output_type, expected_type)) expected_shape = feature_info.shape output_shape = output_shapes[feature_name] utils.assert_shape_match(expected_shape, output_shape)
def compare_shapes_and_types(tensor_info, element_spec): """Compare shapes and types between TensorInfo and Dataset types/shapes.""" for feature_name, (feature_info, spec) in utils.zip_dict(tensor_info, element_spec): if isinstance(spec, tf.data.DatasetSpec): # We use _element_spec because element_spec was added in TF2.5+. compare_shapes_and_types(feature_info, spec._element_spec) # pylint: disable=protected-access elif isinstance(feature_info, dict): compare_shapes_and_types(feature_info, spec) else: # Some earlier versions of TF don't expose dtype and shape for the # RaggedTensorSpec, so we use the protected versions. if feature_info.dtype != spec._dtype: # pylint: disable=protected-access raise TypeError( f"Feature {feature_name} has type {feature_info} but expected {spec}" ) utils.assert_shape_match(feature_info.shape, spec._shape) # pylint: disable=protected-access
def encode_example(self, example_data): """See base class for details.""" # TODO(epot): Is there a better workaround ? # It seems some user have non-conventional use of tfds.features.Tensor where # they defined shape=(None, None) even if it wasn't supported. # For backward compatibility, the check is moved inside encode example. if self._dynamic_shape and not self._encoded_to_bytes: raise ValueError('Multiple unknown dimensions Tensor require to set ' "`Tensor(..., encoding='zlib')` (or 'bytes'). " f'For {self}') np_dtype = np.dtype(self.numpy_dtype) if isinstance(example_data, tf.Tensor): raise TypeError( f'Error encoding: {example_data!r}. `_generate_examples` should ' 'yield `np.array` compatible values, not `tf.Tensor`') if not isinstance(example_data, np.ndarray): example_data = np.array(example_data, dtype=np_dtype) # Ensure the shape and dtype match if example_data.dtype != np_dtype: raise ValueError('Dtype {} do not match {}'.format( example_data.dtype, np_dtype)) shape = example_data.shape if isinstance(shape, tf.TensorShape): shape = tuple(shape.as_list()) utils.assert_shape_match(shape, self._shape) # Eventually encode the data if self._encoded_to_bytes: example_data = example_data.tobytes() if self._encoding == Encoding.ZLIB: example_data = zlib.compress(example_data) # For dynamically shaped tensors, also save the shape (the proto # flatten all values so we need a way to recover the shape). if self._dynamic_shape: return { 'value': example_data, 'shape': shape, } else: return example_data