def __init__(self, patch_size: int = 19, eps: float = 1e-10): super(PatchAffineShapeEstimator, self).__init__() self.patch_size: int = patch_size self.gradient: nn.Module = SpatialGradient('sobel', 1) self.eps: float = eps sigma: float = float(self.patch_size) / math.sqrt(2.0) self.weighting: torch.Tensor = get_gaussian_kernel2d((self.patch_size, self.patch_size), (sigma, sigma), True)
def __init__( self, patch_size: int = 41, num_ang_bins: int = 8, num_spatial_bins: int = 4, rootsift: bool = True, clipval: float = 0.2, ) -> None: super(SIFTDescriptor, self).__init__() self.eps = 1e-10 self.num_ang_bins = num_ang_bins self.num_spatial_bins = num_spatial_bins self.clipval = clipval self.rootsift = rootsift self.patch_size = patch_size ks: int = self.patch_size sigma: float = float(ks) / math.sqrt(2.0) self.gk = get_gaussian_kernel2d((ks, ks), (sigma, sigma), True) (self.bin_ksize, self.bin_stride, self.pad) = get_sift_bin_ksize_stride_pad(patch_size, num_spatial_bins) nw = get_sift_pooling_kernel(ksize=self.bin_ksize).float() self.pk = nn.Conv2d( 1, 1, kernel_size=(nw.size(0), nw.size(1)), stride=(self.bin_stride, self.bin_stride), padding=(self.pad, self.pad), bias=False, ) self.pk.weight.data.copy_(nw.reshape(1, 1, nw.size(0), nw.size(1))) # type: ignore # noqa return
def __init__( self, kernel_size: Tuple[int, int], sigma: Tuple[int, int], padding_mode: str = "reflect", fill_value: float = 0 ): super().__init__() self.kernel_size = kernel_size self.sigma = sigma self.padding_mode = padding_mode self.fill_value = fill_value self._kernel = get_gaussian_kernel2d(kernel_size, sigma).unsqueeze_(0)
def __init__(self, patch_size: int = 32, num_angular_bins: int = 36, eps: float = 1e-8): super().__init__() self.patch_size = patch_size self.num_ang_bins = num_angular_bins self.gradient = SpatialGradient('sobel', 1) self.eps = eps self.angular_smooth = nn.Conv1d(1, 1, kernel_size=3, padding=1, bias=False, padding_mode="circular") with torch.no_grad(): self.angular_smooth.weight[:] = torch.tensor([[[0.33, 0.34, 0.33]]]) sigma: float = float(self.patch_size) / math.sqrt(2.0) self.weighting = get_gaussian_kernel2d((self.patch_size, self.patch_size), (sigma, sigma), True)
def __init__(self, window_size: int, reduction: str = 'none', max_val: float = 1.0) -> None: super(SSIM, self).__init__() self.window_size: int = window_size self.max_val: float = max_val self.reduction: str = reduction self.window: torch.Tensor = get_gaussian_kernel2d( (window_size, window_size), (1.5, 1.5)) self.padding: int = self.compute_zero_padding(window_size) self.C1: float = (0.01 * self.max_val)**2 self.C2: float = (0.03 * self.max_val)**2
def forward(self, inputs): _device = inputs.device batch_size, num_channels, height, width = inputs.size() kernel_size = height // 10 radius = int(kernel_size / 2) kernel_size = radius * 2 + 1 sigma = np.random.uniform(*self.sigma_range) kernel = torch.unsqueeze(get_gaussian_kernel2d( (kernel_size, kernel_size), (sigma, sigma)), dim=0) blurred = filter2d(inputs, kernel, "reflect") return blurred
def __init__(self, window_size: int, reduction: str = "none", max_val: float = 1.0) -> None: super(SSIM, self).__init__() self.window_size: int = window_size self.max_val: float = max_val self.reduction: str = reduction self.window: torch.Tensor = get_gaussian_kernel2d( (window_size, window_size), (1.5, 1.5)) self.window = self.window.requires_grad_( False) # need to disable gradients self.padding: int = _compute_zero_padding(window_size) self.C1: float = (0.01 * self.max_val)**2 self.C2: float = (0.03 * self.max_val)**2
def test_get_gaussian_kernel2d(ksize_x, ksize_y, sigma): kernel = filters.get_gaussian_kernel2d((ksize_x, ksize_y), (sigma, sigma)) assert kernel.shape == (ksize_x, ksize_y) assert kernel.sum().item() == pytest.approx(1.0)
def ssim(img1: torch.Tensor, img2: torch.Tensor, window_size: int, max_val: float = 1.0, eps: float = 1e-12) -> torch.Tensor: r"""Function that computes the Structural Similarity (SSIM) index map between two images. Measures the (SSIM) index between each element in the input `x` and target `y`. The index can be described as: .. math:: \text{SSIM}(x, y) = \frac{(2\mu_x\mu_y+c_1)(2\sigma_{xy}+c_2)} {(\mu_x^2+\mu_y^2+c_1)(\sigma_x^2+\sigma_y^2+c_2)} where: - :math:`c_1=(k_1 L)^2` and :math:`c_2=(k_2 L)^2` are two variables to stabilize the division with weak denominator. - :math:`L` is the dynamic range of the pixel-values (typically this is :math:`2^{\#\text{bits per pixel}}-1`). Args: img1 (torch.Tensor): the first input image with shape :math:`(B, C, H, W)`. img2 (torch.Tensor): the second input image with shape :math:`(B, C, H, W)`. window_size (int): the size of the gaussian kernel to smooth the images. max_val (float): the dynamic range of the images. Default: 1. eps (float): Small value for numerically stability when dividing. Default: 1e-12. Returns: torch.Tensor: The ssim index map with shape :math:`(B, C, H, W)`. Examples: >>> input1 = torch.rand(1, 4, 5, 5) >>> input2 = torch.rand(1, 4, 5, 5) >>> ssim_map = ssim(input1, input2, 5) # 1x4x5x5 """ if not isinstance(img1, torch.Tensor): raise TypeError("Input img1 type is not a torch.Tensor. Got {}".format( type(img1))) if not isinstance(img2, torch.Tensor): raise TypeError("Input img2 type is not a torch.Tensor. Got {}".format( type(img2))) if not isinstance(max_val, float): raise TypeError( f"Input max_val type is not a float. Got {type(max_val)}") if not len(img1.shape) == 4: raise ValueError( "Invalid img1 shape, we expect BxCxHxW. Got: {}".format( img1.shape)) if not len(img2.shape) == 4: raise ValueError( "Invalid img2 shape, we expect BxCxHxW. Got: {}".format( img2.shape)) if not img1.shape == img2.shape: raise ValueError( "img1 and img2 shapes must be the same. Got: {} and {}".format( img1.shape, img2.shape)) # prepare kernel kernel: torch.Tensor = (get_gaussian_kernel2d((window_size, window_size), (1.5, 1.5)).unsqueeze(0)) # compute coefficients C1: float = (0.01 * max_val)**2 C2: float = (0.03 * max_val)**2 # compute local mean per channel mu1: torch.Tensor = filter2D(img1, kernel) mu2: torch.Tensor = filter2D(img2, kernel) mu1_sq = mu1**2 mu2_sq = mu2**2 mu1_mu2 = mu1 * mu2 # compute local sigma per channel sigma1_sq = filter2D(img1**2, kernel) - mu1_sq sigma2_sq = filter2D(img2**2, kernel) - mu2_sq sigma12 = filter2D(img1 * img2, kernel) - mu1_mu2 # compute the similarity index map num: torch.Tensor = (2. * mu1_mu2 + C1) * (2. * sigma12 + C2) den: torch.Tensor = ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) return num / (den + eps)
def ssim( img1: torch.Tensor, img2: torch.Tensor, window_size: int, max_val: float = 1.0, eps: float = 1e-12, padding: str = 'same', ) -> torch.Tensor: r"""Function that computes the Structural Similarity (SSIM) index map between two images. Measures the (SSIM) index between each element in the input `x` and target `y`. The index can be described as: .. math:: \text{SSIM}(x, y) = \frac{(2\mu_x\mu_y+c_1)(2\sigma_{xy}+c_2)} {(\mu_x^2+\mu_y^2+c_1)(\sigma_x^2+\sigma_y^2+c_2)} where: - :math:`c_1=(k_1 L)^2` and :math:`c_2=(k_2 L)^2` are two variables to stabilize the division with weak denominator. - :math:`L` is the dynamic range of the pixel-values (typically this is :math:`2^{\#\text{bits per pixel}}-1`). Args: img1: the first input image with shape :math:`(B, C, H, W)`. img2: the second input image with shape :math:`(B, C, H, W)`. window_size: the size of the gaussian kernel to smooth the images. max_val: the dynamic range of the images. eps: Small value for numerically stability when dividing. padding: ``'same'`` | ``'valid'``. Whether to only use the "valid" convolution area to compute SSIM to match the MATLAB implementation of original SSIM paper. Returns: The ssim index map with shape :math:`(B, C, H, W)`. Examples: >>> input1 = torch.rand(1, 4, 5, 5) >>> input2 = torch.rand(1, 4, 5, 5) >>> ssim_map = ssim(input1, input2, 5) # 1x4x5x5 """ if not isinstance(img1, torch.Tensor): raise TypeError( f"Input img1 type is not a torch.Tensor. Got {type(img1)}") if not isinstance(img2, torch.Tensor): raise TypeError( f"Input img2 type is not a torch.Tensor. Got {type(img2)}") if not isinstance(max_val, float): raise TypeError( f"Input max_val type is not a float. Got {type(max_val)}") if not len(img1.shape) == 4: raise ValueError( f"Invalid img1 shape, we expect BxCxHxW. Got: {img1.shape}") if not len(img2.shape) == 4: raise ValueError( f"Invalid img2 shape, we expect BxCxHxW. Got: {img2.shape}") if not img1.shape == img2.shape: raise ValueError( f"img1 and img2 shapes must be the same. Got: {img1.shape} and {img2.shape}" ) # prepare kernel kernel: torch.Tensor = get_gaussian_kernel2d((window_size, window_size), (1.5, 1.5)).unsqueeze(0) # compute coefficients C1: float = (0.01 * max_val)**2 C2: float = (0.03 * max_val)**2 # compute local mean per channel mu1: torch.Tensor = filter2d(img1, kernel) mu2: torch.Tensor = filter2d(img2, kernel) cropping_shape: List[int] = [] if padding == 'valid': height, width = kernel.shape[-2:] cropping_shape = _compute_padding([height, width]) mu1 = _crop(mu1, cropping_shape) mu2 = _crop(mu2, cropping_shape) elif padding == 'same': pass mu1_sq = mu1**2 mu2_sq = mu2**2 mu1_mu2 = mu1 * mu2 mu_img1_sq = filter2d(img1**2, kernel) mu_img2_sq = filter2d(img2**2, kernel) mu_img1_img2 = filter2d(img1 * img2, kernel) if padding == 'valid': mu_img1_sq = _crop(mu_img1_sq, cropping_shape) mu_img2_sq = _crop(mu_img2_sq, cropping_shape) mu_img1_img2 = _crop(mu_img1_img2, cropping_shape) elif padding == 'same': pass # compute local sigma per channel sigma1_sq = mu_img1_sq - mu1_sq sigma2_sq = mu_img2_sq - mu2_sq sigma12 = mu_img1_img2 - mu1_mu2 # compute the similarity index map num: torch.Tensor = (2.0 * mu1_mu2 + C1) * (2.0 * sigma12 + C2) den: torch.Tensor = (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2) return num / (den + eps)