def add_fire(x, seg_preds, fire_opts): """ Transforms input tensor given wildfires event Args: x (torch.Tensor): Input tensor seg_preds (torch.Tensor): Semantic segmentation predictions for input tensor filter_color (tuple): (r,g,b) tuple for the color of the sky blur_radius (float): radius of the Gaussian blur that smooths the transition between sky and foreground Returns: torch.Tensor: Wildfire version of input tensor """ wildfire_tens = normalize(x, 0, 255) # Warm the image wildfire_tens[:, 2, :, :] -= 20 wildfire_tens[:, 1, :, :] -= 10 wildfire_tens[:, 0, :, :] += 40 wildfire_tens.clamp_(0, 255) wildfire_tens = wildfire_tens.to(torch.uint8) # Darken the picture and increase contrast wildfire_tens = adjust_contrast(wildfire_tens, contrast_factor=1.5) wildfire_tens = adjust_brightness(wildfire_tens, brightness_factor=0.73) sky_mask = retrieve_sky_mask(seg_preds).unsqueeze(1) if fire_opts.get("crop_bottom_sky_mask"): i = 2 * sky_mask.shape[-2] // 3 sky_mask[..., i:, :] = 0 sky_mask = F.interpolate( sky_mask.to(torch.float), (wildfire_tens.shape[-2], wildfire_tens.shape[-1]), ) sky_mask = increase_sky_mask(sky_mask, 0.18, 0.18) kernel_size = (fire_opts.get("kernel_size", 301), fire_opts.get("kernel_size", 301)) sigma = (fire_opts.get("kernel_sigma", 150.5), fire_opts.get("kernel_sigma", 150.5)) border_type = "reflect" kernel = torch.unsqueeze( kornia.filters.kernels.get_gaussian_kernel2d(kernel_size, sigma), dim=0 ).to(x.device) sky_mask = filter2d(sky_mask, kernel, border_type) filter_ = torch.ones(wildfire_tens.shape, device=x.device) filter_[:, 0, :, :] = 255 filter_[:, 1, :, :] = random.randint(100, 150) filter_[:, 2, :, :] = 0 wildfire_tens = paste_tensor(wildfire_tens, filter_, sky_mask, 200) wildfire_tens = adjust_brightness(wildfire_tens.to(torch.uint8), 0.8) wildfire_tens = wildfire_tens.to(torch.float) # dummy pixels to fool scaling and preserve range wildfire_tens[:, :, 0, 0] = 255.0 wildfire_tens[:, :, -1, -1] = 0.0 return wildfire_tens
def forward(self, inputs): _device = inputs.device batch_size, num_channels, height, width = inputs.size() kernel_size = height // 10 radius = int(kernel_size / 2) kernel_size = radius * 2 + 1 sigma = np.random.uniform(*self.sigma_range) kernel = torch.unsqueeze(get_gaussian_kernel2d( (kernel_size, kernel_size), (sigma, sigma)), dim=0) blurred = filter2d(inputs, kernel, "reflect") return blurred
def pyrdown(input: torch.Tensor, border_type: str = 'reflect', align_corners: bool = False, factor: float = 2.0) -> torch.Tensor: r"""Blur a tensor and downsamples it. .. image:: _static/img/pyrdown.png Args: input: the tensor to be downsampled. border_type: the padding mode to be applied before convolving. The expected modes are: ``'constant'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. align_corners: interpolation flag. factor: the downsampling factor Return: the downsampled tensor. Examples: >>> input = torch.arange(16, dtype=torch.float32).reshape(1, 1, 4, 4) >>> pyrdown(input, align_corners=True) tensor([[[[ 3.7500, 5.2500], [ 9.7500, 11.2500]]]]) """ if not len(input.shape) == 4: raise ValueError( f"Invalid input shape, we expect BxCxHxW. Got: {input.shape}") kernel: torch.Tensor = _get_pyramid_gaussian_kernel() _, _, height, width = input.shape # blur image x_blur: torch.Tensor = filter2d(input, kernel, border_type) # TODO: use kornia.geometry.resize/rescale # downsample. out: torch.Tensor = F.interpolate(x_blur, size=(int(float(height) / factor), int(float(width) // factor)), mode='bilinear', align_corners=align_corners) return out
def pyrup(input: torch.Tensor, border_type: str = 'reflect', align_corners: bool = False) -> torch.Tensor: r"""Upsample a tensor and then blurs it. .. image:: _static/img/pyrup.png Args: input: the tensor to be downsampled. border_type: the padding mode to be applied before convolving. The expected modes are: ``'constant'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. align_corners: interpolation flag. Return: the downsampled tensor. Examples: >>> input = torch.arange(4, dtype=torch.float32).reshape(1, 1, 2, 2) >>> pyrup(input, align_corners=True) tensor([[[[0.7500, 0.8750, 1.1250, 1.2500], [1.0000, 1.1250, 1.3750, 1.5000], [1.5000, 1.6250, 1.8750, 2.0000], [1.7500, 1.8750, 2.1250, 2.2500]]]]) """ if not len(input.shape) == 4: raise ValueError( f"Invalid input shape, we expect BxCxHxW. Got: {input.shape}") kernel: torch.Tensor = _get_pyramid_gaussian_kernel() # upsample tensor _, _, height, width = input.shape # TODO: use kornia.geometry.resize/rescale x_up: torch.Tensor = F.interpolate(input, size=(height * 2, width * 2), mode='bilinear', align_corners=align_corners) # blurs upsampled tensor x_blur: torch.Tensor = filter2d(x_up, kernel, border_type) return x_blur
def pyrup(input: torch.Tensor, border_type: str = 'reflect', align_corners: bool = False) -> torch.Tensor: r"""Upsamples a tensor and then blurs it. Args: input (tensor): the tensor to be downsampled. border_type (str): the padding mode to be applied before convolving. The expected modes are: ``'constant'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'reflect'``. align_corners(bool): interpolation flag. Default: False. See https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.interpolate for detail. Return: torch.Tensor: the downsampled tensor. Examples: >>> input = torch.arange(4, dtype=torch.float32).reshape(1, 1, 2, 2) >>> pyrup(input, align_corners=True) tensor([[[[0.7500, 0.8750, 1.1250, 1.2500], [1.0000, 1.1250, 1.3750, 1.5000], [1.5000, 1.6250, 1.8750, 2.0000], [1.7500, 1.8750, 2.1250, 2.2500]]]]) """ if not len(input.shape) == 4: raise ValueError( f"Invalid input shape, we expect BxCxHxW. Got: {input.shape}") kernel: torch.Tensor = _get_pyramid_gaussian_kernel() # upsample tensor b, c, height, width = input.shape x_up: torch.Tensor = F.interpolate(input, size=(height * 2, width * 2), mode='bilinear', align_corners=align_corners) # blurs upsampled tensor x_blur: torch.Tensor = filter2d(x_up, kernel, border_type) return x_blur
def pyrdown(input: torch.Tensor, border_type: str = 'reflect', align_corners: bool = False) -> torch.Tensor: r"""Blurs a tensor and downsamples it. Args: input (tensor): the tensor to be downsampled. border_type (str): the padding mode to be applied before convolving. The expected modes are: ``'constant'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'reflect'``. align_corners(bool): interpolation flag. Default: False. See https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.interpolate for detail. Return: torch.Tensor: the downsampled tensor. Examples: >>> input = torch.arange(16, dtype=torch.float32).reshape(1, 1, 4, 4) >>> pyrdown(input, align_corners=True) tensor([[[[ 3.7500, 5.2500], [ 9.7500, 11.2500]]]]) """ if not len(input.shape) == 4: raise ValueError( f"Invalid input shape, we expect BxCxHxW. Got: {input.shape}") kernel: torch.Tensor = _get_pyramid_gaussian_kernel() b, c, height, width = input.shape # blur image x_blur: torch.Tensor = filter2d(input, kernel, border_type) # downsample. out: torch.Tensor = F.interpolate(x_blur, size=(height // 2, width // 2), mode='bilinear', align_corners=align_corners) return out
def ssim(img1: torch.Tensor, img2: torch.Tensor, window_size: int, max_val: float = 1.0, eps: float = 1e-12) -> torch.Tensor: r"""Function that computes the Structural Similarity (SSIM) index map between two images. Measures the (SSIM) index between each element in the input `x` and target `y`. The index can be described as: .. math:: \text{SSIM}(x, y) = \frac{(2\mu_x\mu_y+c_1)(2\sigma_{xy}+c_2)} {(\mu_x^2+\mu_y^2+c_1)(\sigma_x^2+\sigma_y^2+c_2)} where: - :math:`c_1=(k_1 L)^2` and :math:`c_2=(k_2 L)^2` are two variables to stabilize the division with weak denominator. - :math:`L` is the dynamic range of the pixel-values (typically this is :math:`2^{\#\text{bits per pixel}}-1`). Args: img1 (torch.Tensor): the first input image with shape :math:`(B, C, H, W)`. img2 (torch.Tensor): the second input image with shape :math:`(B, C, H, W)`. window_size (int): the size of the gaussian kernel to smooth the images. max_val (float): the dynamic range of the images. Default: 1. eps (float): Small value for numerically stability when dividing. Default: 1e-12. Returns: torch.Tensor: The ssim index map with shape :math:`(B, C, H, W)`. Examples: >>> input1 = torch.rand(1, 4, 5, 5) >>> input2 = torch.rand(1, 4, 5, 5) >>> ssim_map = ssim(input1, input2, 5) # 1x4x5x5 """ if not isinstance(img1, torch.Tensor): raise TypeError("Input img1 type is not a torch.Tensor. Got {}".format( type(img1))) if not isinstance(img2, torch.Tensor): raise TypeError("Input img2 type is not a torch.Tensor. Got {}".format( type(img2))) if not isinstance(max_val, float): raise TypeError( f"Input max_val type is not a float. Got {type(max_val)}") if not len(img1.shape) == 4: raise ValueError( "Invalid img1 shape, we expect BxCxHxW. Got: {}".format( img1.shape)) if not len(img2.shape) == 4: raise ValueError( "Invalid img2 shape, we expect BxCxHxW. Got: {}".format( img2.shape)) if not img1.shape == img2.shape: raise ValueError( "img1 and img2 shapes must be the same. Got: {} and {}".format( img1.shape, img2.shape)) # prepare kernel kernel: torch.Tensor = get_gaussian_kernel2d((window_size, window_size), (1.5, 1.5)).unsqueeze(0) # compute coefficients C1: float = (0.01 * max_val)**2 C2: float = (0.03 * max_val)**2 # compute local mean per channel mu1: torch.Tensor = filter2d(img1, kernel) mu2: torch.Tensor = filter2d(img2, kernel) mu1_sq = mu1**2 mu2_sq = mu2**2 mu1_mu2 = mu1 * mu2 # compute local sigma per channel sigma1_sq = filter2d(img1**2, kernel) - mu1_sq sigma2_sq = filter2d(img2**2, kernel) - mu2_sq sigma12 = filter2d(img1 * img2, kernel) - mu1_mu2 # compute the similarity index map num: torch.Tensor = (2.0 * mu1_mu2 + C1) * (2.0 * sigma12 + C2) den: torch.Tensor = (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2) return num / (den + eps)
def forward(self, x): f = self.f f = f[None, None, :] * f[None, :, None] return filter2d(x, f, normalized=True)
def ssim( img1: torch.Tensor, img2: torch.Tensor, window_size: int, max_val: float = 1.0, eps: float = 1e-12, padding: str = 'same', ) -> torch.Tensor: r"""Function that computes the Structural Similarity (SSIM) index map between two images. Measures the (SSIM) index between each element in the input `x` and target `y`. The index can be described as: .. math:: \text{SSIM}(x, y) = \frac{(2\mu_x\mu_y+c_1)(2\sigma_{xy}+c_2)} {(\mu_x^2+\mu_y^2+c_1)(\sigma_x^2+\sigma_y^2+c_2)} where: - :math:`c_1=(k_1 L)^2` and :math:`c_2=(k_2 L)^2` are two variables to stabilize the division with weak denominator. - :math:`L` is the dynamic range of the pixel-values (typically this is :math:`2^{\#\text{bits per pixel}}-1`). Args: img1: the first input image with shape :math:`(B, C, H, W)`. img2: the second input image with shape :math:`(B, C, H, W)`. window_size: the size of the gaussian kernel to smooth the images. max_val: the dynamic range of the images. eps: Small value for numerically stability when dividing. padding: ``'same'`` | ``'valid'``. Whether to only use the "valid" convolution area to compute SSIM to match the MATLAB implementation of original SSIM paper. Returns: The ssim index map with shape :math:`(B, C, H, W)`. Examples: >>> input1 = torch.rand(1, 4, 5, 5) >>> input2 = torch.rand(1, 4, 5, 5) >>> ssim_map = ssim(input1, input2, 5) # 1x4x5x5 """ if not isinstance(img1, torch.Tensor): raise TypeError( f"Input img1 type is not a torch.Tensor. Got {type(img1)}") if not isinstance(img2, torch.Tensor): raise TypeError( f"Input img2 type is not a torch.Tensor. Got {type(img2)}") if not isinstance(max_val, float): raise TypeError( f"Input max_val type is not a float. Got {type(max_val)}") if not len(img1.shape) == 4: raise ValueError( f"Invalid img1 shape, we expect BxCxHxW. Got: {img1.shape}") if not len(img2.shape) == 4: raise ValueError( f"Invalid img2 shape, we expect BxCxHxW. Got: {img2.shape}") if not img1.shape == img2.shape: raise ValueError( f"img1 and img2 shapes must be the same. Got: {img1.shape} and {img2.shape}" ) # prepare kernel kernel: torch.Tensor = get_gaussian_kernel2d((window_size, window_size), (1.5, 1.5)).unsqueeze(0) # compute coefficients C1: float = (0.01 * max_val)**2 C2: float = (0.03 * max_val)**2 # compute local mean per channel mu1: torch.Tensor = filter2d(img1, kernel) mu2: torch.Tensor = filter2d(img2, kernel) cropping_shape: List[int] = [] if padding == 'valid': height, width = kernel.shape[-2:] cropping_shape = _compute_padding([height, width]) mu1 = _crop(mu1, cropping_shape) mu2 = _crop(mu2, cropping_shape) elif padding == 'same': pass mu1_sq = mu1**2 mu2_sq = mu2**2 mu1_mu2 = mu1 * mu2 mu_img1_sq = filter2d(img1**2, kernel) mu_img2_sq = filter2d(img2**2, kernel) mu_img1_img2 = filter2d(img1 * img2, kernel) if padding == 'valid': mu_img1_sq = _crop(mu_img1_sq, cropping_shape) mu_img2_sq = _crop(mu_img2_sq, cropping_shape) mu_img1_img2 = _crop(mu_img1_img2, cropping_shape) elif padding == 'same': pass # compute local sigma per channel sigma1_sq = mu_img1_sq - mu1_sq sigma2_sq = mu_img2_sq - mu2_sq sigma12 = mu_img1_img2 - mu1_mu2 # compute the similarity index map num: torch.Tensor = (2.0 * mu1_mu2 + C1) * (2.0 * sigma12 + C2) den: torch.Tensor = (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2) return num / (den + eps)
def distance_transform(image: torch.Tensor, kernel_size: int = 3, h: float = 0.35) -> torch.Tensor: r"""Approximates the Manhattan distance transform of images using cascaded convolution operations. The value at each pixel in the output represents the distance to the nearest non-zero pixel in the image image. It uses the method described in :cite:`pham2021dtlayer`. The transformation is applied independently across the channel dimension of the images. Args: image: Image with shape :math:`(B,C,H,W)`. kernel_size: size of the convolution kernel. h: value that influence the approximation of the min function. Returns: tensor with shape :math:`(B,C,H,W)`. Example: >>> tensor = torch.zeros(1, 1, 5, 5) >>> tensor[:,:, 1, 2] = 1 >>> dt = kornia.contrib.distance_transform(tensor) """ if not isinstance(image, torch.Tensor): raise TypeError(f"image type is not a torch.Tensor. Got {type(image)}") if not len(image.shape) == 4: raise ValueError( f"Invalid image shape, we expect BxCxHxW. Got: {image.shape}") if kernel_size % 2 == 0: raise ValueError("Kernel size must be an odd number.") # n_iters is set such that the DT will be able to propagate from any corner of the image to its far, # diagonally opposite corner n_iters: int = math.ceil( max(image.shape[2], image.shape[3]) / math.floor(kernel_size / 2)) grid = create_meshgrid(kernel_size, kernel_size, normalized_coordinates=False, device=image.device, dtype=image.dtype) grid -= math.floor(kernel_size / 2) kernel = torch.hypot(grid[0, :, :, 0], grid[0, :, :, 1]) kernel = torch.exp(kernel / -h).unsqueeze(0) out = torch.zeros_like(image) # It is possible to avoid cloning the image if boundary = image, but this would require modifying the image tensor. boundary = image.clone() signal_ones = torch.ones_like(boundary) for i in range(n_iters): cdt = filter2d(boundary, kernel, border_type='replicate') cdt = -h * torch.log(cdt) # We are calculating log(0) above. cdt = torch.nan_to_num(cdt, posinf=0.0) mask = torch.where(cdt > 0, 1.0, 0.0) if mask.sum() == 0: break offset: int = i * kernel_size // 2 out += (offset + cdt) * mask boundary = torch.where(mask == 1, signal_ones, boundary) return out