def test_from_4D_image_with_invalid_data(): with np.testing.assert_raises( (ValueError, tf.errors.InvalidArgumentError)): img_utils.from_4D_image(tf.ones(shape=(2, 2, 4, 1)), 2) with np.testing.assert_raises( (ValueError, tf.errors.InvalidArgumentError)): img_utils.from_4D_image(tf.ones(shape=(2, 2, 4, 1)), tf.constant(2))
def test_from_4D_image_with_invalid_data(self): with self.assertRaises((ValueError, tf.errors.InvalidArgumentError)): self.evaluate( img_utils.from_4D_image(tf.ones(shape=(2, 2, 4, 1)), 2)) with self.assertRaises((ValueError, tf.errors.InvalidArgumentError)): self.evaluate( img_utils.from_4D_image(tf.ones(shape=(2, 2, 4, 1)), tf.constant(2)))
def clahe( image: TensorLike, clip_limit: Number = 4.0, tile_grid_size: Union[List[int], Tuple[int]] = (8, 8), name: Optional[str] = None, gpu_optimized: bool = True, ) -> tf.Tensor: """ Args: image: A tensor of shape `(num_images, num_rows, num_columns, num_channels)` or `(num_rows, num_columns, num_channels)` clip_limit: A floating point value or Tensor. 0 will result in no clipping (AHE only). Limits the noise amplification in near-constant regions. Default 4.0. tile_grid_size: A tensor of shape `(tiles_in_x_direction, tiles_in_y_direction)` Specifies how many tiles to break the image into. Default (8x8). name: (Optional) The name of the op. Default `None`. gpu_optimized: Whether or not to use functions that perform better when XLA-compiled on the GPU, but worse on the CPU Returns: Contrast-limited, adaptive-histogram-equalized image """ with tf.name_scope(name or "clahe"): image_dims = tf.rank(image) image = to_4D_image(image) fn = partial( lambda x: _clahe(x, clip_limit, tile_grid_size, gpu_optimized)) image = tf.map_fn(fn, image) return from_4D_image(image, image_dims)
def test_from_4D_image(): for shape in (2, 4), (2, 4, 1), (1, 2, 4, 1): exp = tf.ones(shape=shape) res = img_utils.from_4D_image(tf.ones(shape=(1, 2, 4, 1)), len(shape)) # static shape: assert exp.get_shape() == res.get_shape() np.testing.assert_equal(exp.numpy(), res.numpy())
def mean_filter2d( image: TensorLike, filter_shape: Union[List[int], Tuple[int], int] = [3, 3], padding: str = "REFLECT", constant_values: TensorLike = 0, name: Optional[str] = None, ) -> tf.Tensor: """Perform mean filtering on image(s). Args: image: Either a 2-D `Tensor` of shape `[height, width]`, a 3-D `Tensor` of shape `[height, width, channels]`, or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`. filter_shape: An `integer` or `tuple`/`list` of 2 integers, specifying the height and width of the 2-D mean filter. Can be a single integer to specify the same value for all spatial dimensions. padding: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC". The type of padding algorithm to use, which is compatible with `mode` argument in `tf.pad`. For more details, please refer to https://www.tensorflow.org/api_docs/python/tf/pad. constant_values: A `scalar`, the pad value to use in "CONSTANT" padding mode. name: A name for this operation (optional). Returns: 2-D, 3-D or 4-D `Tensor` of the same dtype as input. Raises: ValueError: If `image` is not 2, 3 or 4-dimensional, if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC", or if `filter_shape` is invalid. """ with tf.name_scope(name or "mean_filter2d"): image = tf.convert_to_tensor(image, name="image") original_ndims = img_utils.get_ndims(image) image = img_utils.to_4D_image(image) filter_shape = keras_utils.normalize_tuple(filter_shape, 2, "filter_shape") # Keep the precision if it's float; # otherwise, convert to float32 for computing. orig_dtype = image.dtype if not image.dtype.is_floating: image = tf.dtypes.cast(image, tf.dtypes.float32) # Explicitly pad the image image = _pad(image, filter_shape, mode=padding, constant_values=constant_values) # Filter of shape (filter_width, filter_height, in_channels, 1) # has the value of 1 for each element. area = tf.constant(filter_shape[0] * filter_shape[1], dtype=image.dtype) filter_shape += (tf.shape(image)[-1], 1) kernel = tf.ones(shape=filter_shape, dtype=image.dtype) output = tf.nn.depthwise_conv2d( image, kernel, strides=(1, 1, 1, 1), padding="VALID" ) output /= area output = img_utils.from_4D_image(output, original_ndims) return tf.dtypes.cast(output, orig_dtype)
def equalize(image: TensorLike, data_format: str = "channels_last", name: Optional[str] = None) -> tf.Tensor: """Equalize image(s) Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), or (num_images, num_channels, num_rows, num_columns) (NCHW), or (num_rows, num_columns, num_channels) (HWC), or (num_channels, num_rows, num_columns) (CHW), or (num_rows, num_columns) (HW). The rank must be statically known (the shape is not `TensorShape(None)`). data_format: Either 'channels_first' or 'channels_last' name: The name of the op. Returns: Image(s) with the same type and shape as `images`, equalized. """ if data_format is not None: warnings.warn( "Addons will support only channel-last image operations in the future." "The argument `data_format` will be removed in Addons `0.12`", DeprecationWarning, ) with tf.name_scope(name or "equalize"): image_dims = tf.rank(image) image = to_4D_image(image) fn = partial(equalize_image, data_format=data_format) image = tf.map_fn(fn, image) return from_4D_image(image, image_dims)
def rotate( images: TensorLike, angles: TensorLike, interpolation: str = "nearest", fill_mode: str = "constant", name: Optional[str] = None, fill_value: TensorLike = 0.0, ) -> tf.Tensor: """Rotate image(s) counterclockwise by the passed angle(s) in radians. Args: images: A tensor of shape `(num_images, num_rows, num_columns, num_channels)` (NHWC), `(num_rows, num_columns, num_channels)` (HWC), or `(num_rows, num_columns)` (HW). angles: A scalar angle to rotate all images by, or (if `images` has rank 4) a vector of length num_images, with an angle for each image in the batch. interpolation: Interpolation mode. Supported values: "nearest", "constant". fill_mode: Points outside the boundaries of the input are filled according to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`). - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by reflecting about the edge of the last pixel. - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by filling all values beyond the edge with the same constant value k = 0. - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by wrapping around to the opposite edge. - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the nearest pixel. fill_value: a float represents the value to be filled outside the boundaries when `fill_mode` is "constant". name: The name of the op. Returns: Image(s) with the same type and shape as `images`, rotated by the given angle(s). Empty space due to the rotation will be filled with zeros. Raises: TypeError: If `images` is an invalid type. """ with tf.name_scope(name or "rotate"): image_or_images = tf.convert_to_tensor(images) if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: raise TypeError("Invalid dtype %s." % image_or_images.dtype) images = img_utils.to_4D_image(image_or_images) original_ndims = img_utils.get_ndims(image_or_images) image_height = tf.cast(tf.shape(images)[1], tf.dtypes.float32)[None] image_width = tf.cast(tf.shape(images)[2], tf.dtypes.float32)[None] output = transform( images, angles_to_projective_transforms(angles, image_height, image_width), interpolation=interpolation, fill_mode=fill_mode, fill_value=fill_value, ) return img_utils.from_4D_image(output, original_ndims)
def test_from_4D_image(self): for shape in (2, 4), (2, 4, 1), (1, 2, 4, 1): exp = tf.ones(shape=shape) res = img_utils.from_4D_image(tf.ones(shape=(1, 2, 4, 1)), len(shape)) # static shape: self.assertAllEqual(exp.get_shape(), res.get_shape()) self.assertAllEqual(self.evaluate(exp), self.evaluate(res))
def test_from_4d_image_with_invalid_shape(rank): errors = (ValueError, tf.errors.InvalidArgumentError) with pytest.raises(errors, match="`image` must be 4D tensor"): img_utils.from_4D_image(tf.ones(shape=(2, 4)), rank) with pytest.raises(errors, match="`image` must be 4D tensor"): img_utils.from_4D_image(tf.ones(shape=(2, 4, 1)), rank) with pytest.raises(errors, match="`image` must be 4D tensor"): img_utils.from_4D_image(tf.ones(shape=(1, 2, 4, 1, 1)), rank)
def sharpness(image: TensorLike, factor: Number) -> tf.Tensor: """Change sharpness of image(s). Args: image: A tensor of shape `(num_images, num_rows, num_columns, num_channels)` (NHWC), or `(num_rows, num_columns, num_channels)` (HWC) factor: A floating point value or Tensor above 0.0. Returns: Image(s) with the same type and shape as `images`, sharper. """ image = tf.convert_to_tensor(image) image_dims = tf.rank(image) image = to_4D_image(image) image = sharpness_image(image, factor=factor) return from_4D_image(image, image_dims)
def sharpness(image: TensorLike, factor: Number) -> tf.Tensor: """Change sharpness of image(s) Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), or (num_rows, num_columns, num_channels) (HWC) factor: A floating point value or Tensor above 0.0. Returns: Image(s) with the same type and shape as `images`, sharper. """ image_dims = tf.rank(image) image = to_4D_image(image) fn = partial(sharpness_image, factor=factor) image = tf.map_fn(fn, image) return from_4D_image(image, image_dims)
def euclidean_dist_transform( images: TensorLike, dtype: Type[tf.dtypes.DType] = tf.float32, name: Optional[str] = None, ) -> tf.Tensor: """Applies euclidean distance transform(s) to the image(s). Args: images: A tensor of shape `(num_images, num_rows, num_columns, 1)` (NHWC), or `(num_rows, num_columns, 1)` (HWC) or `(num_rows, num_columns)` (HW). dtype: `tf.dtypes.DType` of the output tensor. name: The name of the op. Returns: Image(s) with the type `dtype` and same shape as `images`, with the transform applied. If a tensor of all ones is given as input, the output tensor will be filled with the max value of the `dtype`. Raises: TypeError: If `image` is not tf.uint8, or `dtype` is not floating point. ValueError: If `image` more than one channel, or `image` is not of rank between 2 and 4. """ with tf.name_scope(name or "euclidean_distance_transform"): image_or_images = tf.convert_to_tensor(images, name="images") if image_or_images.dtype.base_dtype != tf.uint8: raise TypeError("Invalid dtype %s. Expected uint8." % image_or_images.dtype) images = img_utils.to_4D_image(image_or_images) original_ndims = img_utils.get_ndims(image_or_images) if images.get_shape()[3] != 1 and images.get_shape()[3] is not None: raise ValueError("`images` must have only one channel") if dtype not in [tf.float16, tf.float32, tf.float64]: raise TypeError("`dtype` must be float16, float32 or float64") images = tf.cast(images, dtype) output = _image_so.ops.addons_euclidean_distance_transform(images) return img_utils.from_4D_image(output, original_ndims)
def sharpness(image: TensorLike, factor: Number, name: Optional[str] = None) -> tf.Tensor: """Change sharpness of image(s). Args: image: A tensor of shape `(num_images, num_rows, num_columns, num_channels)` (NHWC), or `(num_rows, num_columns, num_channels)` (HWC) factor: A floating point value or Tensor above 0.0. name: The name of the op. Returns: Image(s) with the same type and shape as `images`, sharper. """ with tf.name_scope(name or "sharpness"): image_dims = tf.rank(image) image = to_4D_image(image) image = _sharpness_image(image, factor=factor) return from_4D_image(image, image_dims)
def euclidean_dist_transform( images: TensorLike, dtype: Type[tf.dtypes.DType] = tf.float32, name: Optional[str] = None, ) -> tf.Tensor: """Applies euclidean distance transform(s) to the image(s). Based on [Distance Transforms of Sampled Functions] (http://www.theoryofcomputing.org/articles/v008a019/v008a019.pdf). Args: images: A tensor of shape `(num_images, num_rows, num_columns, num_channels)` (NHWC), or `(num_rows, num_columns, num_channels)` (HWC) or `(num_rows, num_columns)` (HW). dtype: `tf.dtypes.DType` of the output tensor. name: The name of the op. Returns: Image(s) with the type `dtype` and same shape as `images`, with the transform applied. If a tensor of all ones is given as input, the output tensor will be filled with the max value of the `dtype`. Raises: TypeError: If `image` is not tf.uint8, or `dtype` is not floating point. """ with tf.name_scope(name or "euclidean_distance_transform"): image_or_images = tf.convert_to_tensor(images, name="images") if image_or_images.dtype.base_dtype != tf.uint8: raise TypeError("Invalid dtype %s. Expected uint8." % image_or_images.dtype) images = img_utils.to_4D_image(image_or_images) original_ndims = img_utils.get_ndims(image_or_images) if dtype not in [tf.float16, tf.float32, tf.float64]: raise TypeError("`dtype` must be float16, float32 or float64") output = _image_so.ops.addons_euclidean_distance_transform( images, dtype) return img_utils.from_4D_image(output, original_ndims)
def equalize(image: TensorLike, name: Optional[str] = None) -> tf.Tensor: """Equalize image(s) Args: images: A tensor of shape `(num_images, num_rows, num_columns, num_channels)` (NHWC), or `(num_rows, num_columns, num_channels)` (HWC), or `(num_rows, num_columns)` (HW). The rank must be statically known (the shape is not `TensorShape(None)`). name: The name of the op. Returns: Image(s) with the same type and shape as `images`, equalized. """ with tf.name_scope(name or "equalize"): image_dims = tf.rank(image) image = to_4D_image(image) fn = partial(_equalize_image) image = tf.map_fn(fn, image) return from_4D_image(image, image_dims)
def rotate( images: TensorLike, angles: TensorLike, interpolation: str = "NEAREST", name: Optional[str] = None, ) -> tf.Tensor: """Rotate image(s) counterclockwise by the passed angle(s) in radians. Args: images: A tensor of shape `(num_images, num_rows, num_columns, num_channels)` (NHWC), `(num_rows, num_columns, num_channels)` (HWC), or `(num_rows, num_columns)` (HW). angles: A scalar angle to rotate all images by, or (if `images` has rank 4) a vector of length num_images, with an angle for each image in the batch. interpolation: Interpolation mode. Supported values: "NEAREST", "BILINEAR". name: The name of the op. Returns: Image(s) with the same type and shape as `images`, rotated by the given angle(s). Empty space due to the rotation will be filled with zeros. Raises: TypeError: If `images` is an invalid type. """ with tf.name_scope(name or "rotate"): image_or_images = tf.convert_to_tensor(images) if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: raise TypeError("Invalid dtype %s." % image_or_images.dtype) images = img_utils.to_4D_image(image_or_images) original_ndims = img_utils.get_ndims(image_or_images) image_height = tf.cast(tf.shape(images)[1], tf.dtypes.float32)[None] image_width = tf.cast(tf.shape(images)[2], tf.dtypes.float32)[None] output = transform( images, angles_to_projective_transforms(angles, image_height, image_width), interpolation=interpolation, ) return img_utils.from_4D_image(output, original_ndims)
def _rotate(image, angle): """Generates a rotated image with the use of tfa.image.transform Args: image(tensorflow.python.framework.ops.Tensor): The image. angle(tensorflow.python.framework.ops.EagerTensor): The rotation angle. Returns: The rotated image. """ with tf.name_scope("rotate"): image = tf.convert_to_tensor(image) img = img_utils.to_4D_image(image) ndim = image.get_shape().ndims image_h = tf.cast(img.shape[0], tf.dtypes.float32) image_w = tf.cast(img.shape[1], tf.dtypes.float32) rotation_key = _angles_to_projective_transforms( angle, image_w, image_h) output = tfa.image.transform(img, rotation_key, interpolation="NEAREST") return img_utils.from_4D_image(output, ndim)
def test_from_4D_image_with_invalid_shape(self): errors = (ValueError, tf.errors.InvalidArgumentError) for rank in 2, tf.constant(2): with self.subTest(rank=rank): with self.assertRaisesRegexp(errors, "`image` must be 4D tensor"): img_utils.from_4D_image(tf.ones(shape=(2, 4)), rank) with self.assertRaisesRegexp(errors, "`image` must be 4D tensor"): img_utils.from_4D_image(tf.ones(shape=(2, 4, 1)), rank) with self.assertRaisesRegexp(errors, "`image` must be 4D tensor"): img_utils.from_4D_image(tf.ones(shape=(1, 2, 4, 1, 1)), rank)
def equalize( image: TensorLike, data_format: str = "channels_last", name: Optional[str] = None ) -> tf.Tensor: """Equalize image(s) Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), or (num_images, num_channels, num_rows, num_columns) (NCHW), or (num_rows, num_columns, num_channels) (HWC), or (num_channels, num_rows, num_columns) (CHW), or (num_rows, num_columns) (HW). The rank must be statically known (the shape is not `TensorShape(None)`). data_format: Either 'channels_first' or 'channels_last' name: The name of the op. Returns: Image(s) with the same type and shape as `images`, equalized. """ with tf.name_scope(name or "equalize"): image_dims = tf.rank(image) image = to_4D_image(image) fn = partial(equalize_image, data_format=data_format) image = tf.map_fn(fn, image) return from_4D_image(image, image_dims)
def transform( images: TensorLike, transforms: TensorLike, interpolation: str = "NEAREST", output_shape: Optional[list] = None, name: Optional[str] = None, ) -> tf.Tensor: """Applies the given transform(s) to the image(s). Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or (num_rows, num_columns) (HW). transforms: Projective transform matrix/matrices. A vector of length 8 or tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point `(x, y)` to a transformed *input* point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the transform mapping input points to output points. Note that gradients are not backpropagated into transformation parameters. interpolation: Interpolation mode. Supported values: "NEAREST", "BILINEAR". output_shape: Output dimesion after the transform, [height, width]. If None, output is the same size as input image. name: The name of the op. Returns: Image(s) with the same type and shape as `images`, with the given transform(s) applied. Transformed coordinates outside of the input image will be filled with zeros. Raises: TypeError: If `image` is an invalid type. ValueError: If output shape is not 1-D int32 Tensor. """ with tf.name_scope(name or "transform"): image_or_images = tf.convert_to_tensor(images, name="images") transform_or_transforms = tf.convert_to_tensor(transforms, name="transforms", dtype=tf.dtypes.float32) if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: raise TypeError("Invalid dtype %s." % image_or_images.dtype) images = img_utils.to_4D_image(image_or_images) original_ndims = img_utils.get_ndims(image_or_images) if output_shape is None: output_shape = tf.shape(images)[1:3] output_shape = tf.convert_to_tensor(output_shape, tf.dtypes.int32, name="output_shape") if not output_shape.get_shape().is_compatible_with([2]): raise ValueError( "output_shape must be a 1-D Tensor of 2 elements: " "new_height, new_width") if len(transform_or_transforms.get_shape()) == 1: transforms = transform_or_transforms[None] elif transform_or_transforms.get_shape().ndims is None: raise ValueError("transforms rank must be statically known") elif len(transform_or_transforms.get_shape()) == 2: transforms = transform_or_transforms else: transforms = transform_or_transforms raise ValueError( "transforms should have rank 1 or 2, but got rank %d" % len(transforms.get_shape())) output = tf.raw_ops.ImageProjectiveTransformV2( images=images, transforms=transforms, output_shape=output_shape, interpolation=interpolation.upper(), ) return img_utils.from_4D_image(output, original_ndims)
def from_4D_float32_tensor(cls, image_4D_f32: Tensor): image_unint8 = convert_image_dtype(image_4D_f32, uint8) image = from_4D_image(image_unint8, 3) return cls(image)
def gaussian_filter2d( image: FloatTensorLike, filter_shape: Union[List[int], Tuple[int]] = [3, 3], sigma: FloatTensorLike = 1, padding: str = "REFLECT", constant_values: TensorLike = 0, name: Optional[str] = None, ) -> FloatTensorLike: """Perform Gaussian blur on image(s). Args: image: Either a 2-D `Tensor` of shape `[height, width]`, a 3-D `Tensor` of shape `[height, width, channels]`, or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`. filter_shape: An `integer` or `tuple`/`list` of 2 integers, specifying the height and width of the 2-D gaussian filter. Can be a single integer to specify the same value for all spatial dimensions. sigma: Standard deviation of Gaussian. padding: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC". The type of padding algorithm to use, which is compatible with `mode` argument in `tf.pad`. For more details, please refer to https://www.tensorflow.org/api_docs/python/tf/pad. constant_values: A `scalar`, the pad value to use in "CONSTANT" padding mode. name: A name for this operation (optional). Returns: 2-D, 3-D or 4-D `Tensor` of the same dtype as input. Raises: ValueError: If `image` is not 2, 3 or 4-dimensional, if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC", if `filter_shape` is invalid, or if `sigma` is less than or equal to 0. """ with tf.name_scope(name or "gaussian_filter2d"): if sigma <= 0: raise ValueError("Sigma should not be zero") if padding not in ["REFLECT", "CONSTANT", "SYMMETRIC"]: raise ValueError( "Padding should be REFLECT, CONSTANT, OR SYMMETRIC") image = tf.cast(image, tf.float32) original_ndims = img_utils.get_ndims(image) image = img_utils.to_4D_image(image) channels = tf.shape(image)[3] filter_shape = keras_utils.normalize_tuple(filter_shape, 2, "filter_shape") gaussian_filter_x = _get_gaussian_kernel(sigma, filter_shape[1]) gaussian_filter_x = tf.cast(gaussian_filter_x, tf.float32) gaussian_filter_x = tf.reshape(gaussian_filter_x, [1, filter_shape[1]]) gaussian_filter_y = _get_gaussian_kernel(sigma, filter_shape[0]) gaussian_filter_y = tf.reshape(gaussian_filter_y, [filter_shape[0], 1]) gaussian_filter_y = tf.cast(gaussian_filter_y, tf.float32) gaussian_filter_2d = _get_gaussian_kernel_2d(gaussian_filter_y, gaussian_filter_x) gaussian_filter_2d = tf.repeat(gaussian_filter_2d, channels) gaussian_filter_2d = tf.reshape( gaussian_filter_2d, [filter_shape[0], filter_shape[1], channels, 1]) image = _pad( image, filter_shape, mode=padding, constant_values=constant_values, ) output = tf.nn.depthwise_conv2d( input=image, filter=gaussian_filter_2d, strides=(1, 1, 1, 1), padding="VALID", ) output = img_utils.from_4D_image(output, original_ndims) return output
def sparse_image_warp( image: TensorLike, source_control_point_locations: TensorLike, dest_control_point_locations: TensorLike, interpolation_order: int = 2, regularization_weight: FloatTensorLike = 0.0, num_boundary_points: int = 0, name: str = "sparse_image_warp", ) -> tf.Tensor: """Image warping using correspondences between sparse control points. Apply a non-linear warp to the image, where the warp is specified by the source and destination locations of a (potentially small) number of control points. First, we use a polyharmonic spline (`tfa.image.interpolate_spline`) to interpolate the displacements between the corresponding control points to a dense flow field. Then, we warp the image using this dense flow field (`tfa.image.dense_image_warp`). Let t index our control points. For `regularization_weight = 0`, we have: warped_image[b, dest_control_point_locations[b, t, 0], dest_control_point_locations[b, t, 1], :] = image[b, source_control_point_locations[b, t, 0], source_control_point_locations[b, t, 1], :]. For `regularization_weight > 0`, this condition is met approximately, since regularized interpolation trades off smoothness of the interpolant vs. reconstruction of the interpolant at the control points. See `tfa.image.interpolate_spline` for further documentation of the `interpolation_order` and `regularization_weight` arguments. Args: image: Either a 2-D float `Tensor` of shape `[height, width]`, a 3-D `Tensor` of shape `[height, width, channels]`, or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`. `batch_size` is assumed as one when `image` is a 2-D or 3-D `Tensor`. source_control_point_locations: `[batch_size, num_control_points, 2]` float `Tensor`. dest_control_point_locations: `[batch_size, num_control_points, 2]` float `Tensor`. interpolation_order: polynomial order used by the spline interpolation regularization_weight: weight on smoothness regularizer in interpolation num_boundary_points: How many zero-flow boundary points to include at each image edge. Usage: - `num_boundary_points=0`: don't add zero-flow points - `num_boundary_points=1`: 4 corners of the image - `num_boundary_points=2`: 4 corners and one in the middle of each edge (8 points total) - `num_boundary_points=n`: 4 corners and n-1 along each edge name: A name for the operation (optional). Note that `image` and `offsets` can be of type `tf.half`, `tf.float32`, or `tf.float64`, and do not necessarily have to be the same type. Returns: warped_image: a float `Tensor` with the same shape and dtype as `image`. flow_field: `[batch_size, height, width, 2]` float `Tensor` containing the dense flow field produced by the interpolation. """ image = tf.convert_to_tensor(image) original_ndims = img_utils.get_ndims(image) image = img_utils.to_4D_image(image) source_control_point_locations = tf.convert_to_tensor( source_control_point_locations) dest_control_point_locations = tf.convert_to_tensor( dest_control_point_locations) control_point_flows = dest_control_point_locations - source_control_point_locations clamp_boundaries = num_boundary_points > 0 boundary_points_per_edge = num_boundary_points - 1 with tf.name_scope(name or "sparse_image_warp"): image_shape = tf.shape(image) batch_size, image_height, image_width = ( image_shape[0], image_shape[1], image_shape[2], ) # This generates the dense locations where the interpolant # will be evaluated. grid_locations = _get_grid_locations(image_height, image_width) flattened_grid_locations = tf.reshape(grid_locations, [image_height * image_width, 2]) flattened_grid_locations = tf.cast( _expand_to_minibatch(flattened_grid_locations, batch_size), image.dtype) if clamp_boundaries: ( dest_control_point_locations, control_point_flows, ) = _add_zero_flow_controls_at_boundary( dest_control_point_locations, control_point_flows, image_height, image_width, boundary_points_per_edge, ) flattened_flows = interpolate_spline( dest_control_point_locations, control_point_flows, flattened_grid_locations, interpolation_order, regularization_weight, ) dense_flows = tf.reshape(flattened_flows, [batch_size, image_height, image_width, 2]) warped_image = dense_image_warp(image, dense_flows) return img_utils.from_4D_image(warped_image, original_ndims), dense_flows
def gaussian_filter2d( image: TensorLike, filter_shape: Union[List[int], Tuple[int], int] = [3, 3], sigma: Union[List[float], Tuple[float], float] = 1.0, padding: str = "REFLECT", constant_values: TensorLike = 0, name: Optional[str] = None, ) -> TensorLike: """Perform Gaussian blur on image(s). Args: image: Either a 2-D `Tensor` of shape `[height, width]`, a 3-D `Tensor` of shape `[height, width, channels]`, or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`. filter_shape: An `integer` or `tuple`/`list` of 2 integers, specifying the height and width of the 2-D gaussian filter. Can be a single integer to specify the same value for all spatial dimensions. sigma: A `float` or `tuple`/`list` of 2 floats, specifying the standard deviation in x and y direction the 2-D gaussian filter. Can be a single float to specify the same value for all spatial dimensions. padding: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC". The type of padding algorithm to use, which is compatible with `mode` argument in `tf.pad`. For more details, please refer to https://www.tensorflow.org/api_docs/python/tf/pad. constant_values: A `scalar`, the pad value to use in "CONSTANT" padding mode. name: A name for this operation (optional). Returns: 2-D, 3-D or 4-D `Tensor` of the same dtype as input. Raises: ValueError: If `image` is not 2, 3 or 4-dimensional, if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC", if `filter_shape` is invalid, or if `sigma` is invalid. """ with tf.name_scope(name or "gaussian_filter2d"): if isinstance(sigma, (list, tuple)): if len(sigma) != 2: raise ValueError( "sigma should be a float or a tuple/list of 2 floats") else: sigma = (sigma, ) * 2 if any(s < 0 for s in sigma): raise ValueError("sigma should be greater than or equal to 0.") image = tf.convert_to_tensor(image, name="image") sigma = tf.convert_to_tensor(sigma, name="sigma") original_ndims = img_utils.get_ndims(image) image = img_utils.to_4D_image(image) # Keep the precision if it's float; # otherwise, convert to float32 for computing. orig_dtype = image.dtype if not image.dtype.is_floating: image = tf.cast(image, tf.float32) channels = tf.shape(image)[3] filter_shape = keras_utils.normalize_tuple(filter_shape, 2, "filter_shape") sigma = tf.cast(sigma, image.dtype) gaussian_kernel_x = _get_gaussian_kernel(sigma[1], filter_shape[1]) gaussian_kernel_x = gaussian_kernel_x[tf.newaxis, :] gaussian_kernel_y = _get_gaussian_kernel(sigma[0], filter_shape[0]) gaussian_kernel_y = gaussian_kernel_y[:, tf.newaxis] gaussian_kernel_2d = _get_gaussian_kernel_2d(gaussian_kernel_y, gaussian_kernel_x) gaussian_kernel_2d = gaussian_kernel_2d[:, :, tf.newaxis, tf.newaxis] gaussian_kernel_2d = tf.tile(gaussian_kernel_2d, [1, 1, channels, 1]) image = _pad(image, filter_shape, mode=padding, constant_values=constant_values) output = tf.nn.depthwise_conv2d( input=image, filter=gaussian_kernel_2d, strides=(1, 1, 1, 1), padding="VALID", ) output = img_utils.from_4D_image(output, original_ndims) return tf.cast(output, orig_dtype)
def median_filter2d(image, filter_shape=(3, 3), padding="REFLECT", constant_values=0, name=None): """Perform median filtering on image(s). Args: image: Either a 2-D `Tensor` of shape `[height, width]`, a 3-D `Tensor` of shape `[height, width, channels]`, or a 4-D `Tensor` of shape `[batch_size, height, width, channels]`. filter_shape: An `integer` or `tuple`/`list` of 2 integers, specifying the height and width of the 2-D median filter. Can be a single integer to specify the same value for all spatial dimensions. padding: A `string`, one of "REFLECT", "CONSTANT", or "SYMMETRIC". The type of padding algorithm to use, which is compatible with `mode` argument in `tf.pad`. For more details, please refer to https://www.tensorflow.org/api_docs/python/tf/pad. constant_values: A `scalar`, the pad value to use in "CONSTANT" padding mode. name: A name for this operation (optional). Returns: 3-D or 4-D `Tensor` of the same dtype as input. Raises: ValueError: If `image` is not 2, 3 or 4-dimensional, if `padding` is other than "REFLECT", "CONSTANT" or "SYMMETRIC", or if `filter_shape` is invalid. """ with tf.name_scope(name or "median_filter2d"): image = tf.convert_to_tensor(image, name="image") original_ndims = img_utils.get_ndims(image) image = img_utils.to_4D_image(image) if padding not in ["REFLECT", "CONSTANT", "SYMMETRIC"]: raise ValueError( "padding should be one of \"REFLECT\", \"CONSTANT\", or " "\"SYMMETRIC\".") filter_shape = keras_utils.normalize_tuple(filter_shape, 2, "filter_shape") image_shape = tf.shape(image) batch_size = image_shape[0] height = image_shape[1] width = image_shape[2] channels = image_shape[3] # Explicitly pad the image image = _pad(image, filter_shape, mode=padding, constant_values=constant_values) area = filter_shape[0] * filter_shape[1] floor = (area + 1) // 2 ceil = area // 2 + 1 patches = tf.image.extract_patches( image, sizes=[1, filter_shape[0], filter_shape[1], 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding="VALID") patches = tf.reshape(patches, shape=[batch_size, height, width, area, channels]) patches = tf.transpose(patches, [0, 1, 2, 4, 3]) # Note the returned median is casted back to the original type # Take [5, 6, 7, 8] for example, the median is (6 + 7) / 2 = 3.5 # It turns out to be int(6.5) = 6 if the original type is int top = tf.nn.top_k(patches, k=ceil).values if area % 2 == 1: median = top[:, :, :, :, floor - 1] else: median = (top[:, :, :, :, floor - 1] + top[:, :, :, :, ceil - 1]) / 2 output = tf.cast(median, image.dtype) output = img_utils.from_4D_image(output, original_ndims) return output
def transform( images: TensorLike, transforms: TensorLike, interpolation: str = "nearest", fill_mode: str = "constant", output_shape: Optional[list] = None, name: Optional[str] = None, fill_value: TensorLike = 0.0, ) -> tf.Tensor: """Applies the given transform(s) to the image(s). Args: images: A tensor of shape (num_images, num_rows, num_columns, num_channels) (NHWC), (num_rows, num_columns, num_channels) (HWC), or (num_rows, num_columns) (HW). transforms: Projective transform matrix/matrices. A vector of length 8 or tensor of size N x 8. If one row of transforms is [a0, a1, a2, b0, b1, b2, c0, c1], then it maps the *output* point `(x, y)` to a transformed *input* point `(x', y') = ((a0 x + a1 y + a2) / k, (b0 x + b1 y + b2) / k)`, where `k = c0 x + c1 y + 1`. The transforms are *inverted* compared to the transform mapping input points to output points. Note that gradients are not backpropagated into transformation parameters. interpolation: Interpolation mode. Supported values: "nearest", "bilinear". fill_mode: Points outside the boundaries of the input are filled according to the given mode (one of `{'constant', 'reflect', 'wrap', 'nearest'}`). - *reflect*: `(d c b a | a b c d | d c b a)` The input is extended by reflecting about the edge of the last pixel. - *constant*: `(k k k k | a b c d | k k k k)` The input is extended by filling all values beyond the edge with the same constant value k = 0. - *wrap*: `(a b c d | a b c d | a b c d)` The input is extended by wrapping around to the opposite edge. - *nearest*: `(a a a a | a b c d | d d d d)` The input is extended by the nearest pixel. fill_value: a float represents the value to be filled outside the boundaries when `fill_mode` is "constant". output_shape: Output dimesion after the transform, [height, width]. If None, output is the same size as input image. name: The name of the op. Returns: Image(s) with the same type and shape as `images`, with the given transform(s) applied. Transformed coordinates outside of the input image will be filled with zeros. Raises: TypeError: If `image` is an invalid type. ValueError: If output shape is not 1-D int32 Tensor. """ with tf.name_scope(name or "transform"): image_or_images = tf.convert_to_tensor(images, name="images") transform_or_transforms = tf.convert_to_tensor(transforms, name="transforms", dtype=tf.dtypes.float32) if image_or_images.dtype.base_dtype not in _IMAGE_DTYPES: raise TypeError("Invalid dtype %s." % image_or_images.dtype) images = img_utils.to_4D_image(image_or_images) original_ndims = img_utils.get_ndims(image_or_images) if output_shape is None: output_shape = tf.shape(images)[1:3] output_shape = tf.convert_to_tensor(output_shape, tf.dtypes.int32, name="output_shape") if not output_shape.get_shape().is_compatible_with([2]): raise ValueError( "output_shape must be a 1-D Tensor of 2 elements: " "new_height, new_width") if len(transform_or_transforms.get_shape()) == 1: transforms = transform_or_transforms[None] elif transform_or_transforms.get_shape().ndims is None: raise ValueError("transforms rank must be statically known") elif len(transform_or_transforms.get_shape()) == 2: transforms = transform_or_transforms else: transforms = transform_or_transforms raise ValueError( "transforms should have rank 1 or 2, but got rank %d" % len(transforms.get_shape())) if LooseVersion(tf.__version__) >= LooseVersion("2.4.0"): fill_value = tf.convert_to_tensor(fill_value, dtype=tf.float32, name="fill_value") output = tf.raw_ops.ImageProjectiveTransformV3( images=images, transforms=transforms, output_shape=output_shape, interpolation=interpolation.upper(), fill_mode=fill_mode.upper(), fill_value=fill_value, ) else: fill_mode = fill_mode.upper() # TODO(WindQAQ): Get rid of the check once we drop TensorFlow < 2.4 support. if fill_mode == "CONSTANT": warnings.warn( "fill_value is not supported and is always 0 for TensorFlow < 2.4.0." ) if fill_mode == "NEAREST": raise ValueError( "NEAREST fill_mode is not supported for TensorFlow < 2.4.0." ) output = tf.raw_ops.ImageProjectiveTransformV2( images=images, transforms=transforms, output_shape=output_shape, interpolation=interpolation.upper(), fill_mode=fill_mode, ) return img_utils.from_4D_image(output, original_ndims)