def forward(self, input, target, mask=None): assert input.shape == target.shape if mask is not None: input = input[mask] target = target[mask] # Calculate spatial depth gradient grad_input = KF.spatial_gradient(input, mode='sobel') grad_target = KF.spatial_gradient(target, mode='sobel') # Create homogeneous column vectors n_input = torch.cat((-grad_input.view(-1), torch.ones([ 1, ], dtype=torch.float32, device=grad_input.device))) n_target = torch.cat((-grad_target.view(-1), torch.ones([ 1, ], dtype=torch.float32, device=grad_target.device))) # Inner product of prediction and target numerator = torch.dot(n_input, n_target) # Normalize by vector magnitudes d1 = torch.sqrt(torch.dot(n_input, n_input)) d2 = torch.sqrt(torch.dot(n_target, n_target)) denominator = torch.mul(d1, d2) losses = 1 - numerator / denominator return torch.mean(losses)
def get_normal_anlges(image, eps=EPS): """Calculate the normal direction of edges. Ref: https://github.com/nv-tlabs/STEAL/blob/master/utils/edges_nms.m """ first_grads = spatial_gradient(gaussian_blur2d(image, (5, 5), (2, 2))) second_grad_x = spatial_gradient(first_grads[:, :, 0, :, :].squeeze_(2)) second_grad_y = spatial_gradient(first_grads[:, :, 1, :, :].squeeze_(2)) grad_xx = second_grad_x[:, :, 0, :, :].squeeze_() grad_xy = second_grad_y[:, :, 0, :, :].squeeze_() grad_yy = second_grad_y[:, :, 1, :, :].squeeze_() angle = torch.atan(grad_yy * torch.sign(-(grad_xy + eps)) / (grad_xx + eps)) return angle
def forward(self, input): KORNIA_CHECK_SHAPE(input, ["B", "1", "H", "W"]) B, CH, W, H = input.size() self.bin_pooling_kernel = self.bin_pooling_kernel.to(input.dtype).to(input.device) self.PoolingConv = self.PoolingConv.to(input.dtype).to(input.device) grads: torch.Tensor = spatial_gradient(input, 'diff') # unpack the edges gx: torch.Tensor = grads[:, :, 0] gy: torch.Tensor = grads[:, :, 1] mag: torch.Tensor = torch.sqrt(gx * gx + gy * gy + self.eps) ori: torch.Tensor = torch.atan2(gy, gx + self.eps) + 2.0 * pi o_big: torch.Tensor = float(self.num_ang_bins) * ori / (2.0 * pi) bo0_big_: torch.Tensor = torch.floor(o_big) wo1_big_: torch.Tensor = (o_big - bo0_big_) bo0_big: torch.Tensor = bo0_big_ % self.num_ang_bins bo1_big: torch.Tensor = (bo0_big + 1) % self.num_ang_bins wo0_big: torch.Tensor = (1.0 - wo1_big_) * mag # type: ignore wo1_big: torch.Tensor = wo1_big_ * mag ang_bins = [] for i in range(0, self.num_ang_bins): out = self.bin_pooling_kernel((bo0_big == i).to(input.dtype) * wo0_big + # noqa (bo1_big == i).to(input.dtype) * wo1_big) ang_bins.append(out) ang_bins = torch.cat(ang_bins, dim=1) out_no_norm = self.PoolingConv(ang_bins) out = F.normalize(out_no_norm, dim=1, p=2).clamp_(0, float(self.clipval)) out = F.normalize(out, dim=1, p=2) if self.rootsift: out = torch.sqrt(F.normalize(out, p=1) + self.eps) return out
def forward(self, input): KORNIA_CHECK_SHAPE(input, ["B", "1", f"{self.patch_size}", f"{self.patch_size}"]) B: int = input.shape[0] self.pk = self.pk.to(input.dtype).to(input.device) grads: torch.Tensor = spatial_gradient(input, 'diff') # unpack the edges gx: torch.Tensor = grads[:, :, 0] gy: torch.Tensor = grads[:, :, 1] mag: torch.Tensor = torch.sqrt(gx * gx + gy * gy + self.eps) ori: torch.Tensor = torch.atan2(gy, gx + self.eps) + 2.0 * pi mag = mag * self.gk.expand_as(mag).type_as(mag).to(mag.device) o_big: torch.Tensor = float(self.num_ang_bins) * ori / (2.0 * pi) bo0_big_: torch.Tensor = torch.floor(o_big) wo1_big_: torch.Tensor = o_big - bo0_big_ bo0_big: torch.Tensor = bo0_big_ % self.num_ang_bins bo1_big: torch.Tensor = (bo0_big + 1) % self.num_ang_bins wo0_big: torch.Tensor = (1.0 - wo1_big_) * mag # type: ignore wo1_big: torch.Tensor = wo1_big_ * mag ang_bins = [] for i in range(0, self.num_ang_bins): out = self.pk((bo0_big == i).to(input.dtype) * wo0_big + (bo1_big == i).to(input.dtype) * wo1_big) ang_bins.append(out) ang_bins = torch.cat(ang_bins, dim=1) ang_bins = ang_bins.view(B, -1) ang_bins = F.normalize(ang_bins, p=2) ang_bins = torch.clamp(ang_bins, 0.0, float(self.clipval)) ang_bins = F.normalize(ang_bins, p=2) if self.rootsift: ang_bins = torch.sqrt(F.normalize(ang_bins, p=1) + self.eps) return ang_bins
def depth_to_normals(depth: torch.Tensor, camera_matrix: torch.Tensor, normalize_points: bool = False) -> torch.Tensor: """Compute the normal surface per pixel. Args: depth (torch.Tensor): image tensor containing a depth value per pixel. camera_matrix (torch.Tensor): tensor containing the camera intrinsics. normalize_points (bool): whether to normalise the pointcloud. This must be set to `True` when the depth is represented as the Euclidean ray length from the camera position. Default is `False`. Shape: - Input: :math:`(B, 1, H, W)` and :math:`(B, 3, 3)` - Output: :math:`(B, 3, H, W)` Return: torch.Tensor: tensor with a normal surface vector per pixel of the same resolution as the input. """ if not isinstance(depth, torch.Tensor): raise TypeError( f"Input depht type is not a torch.Tensor. Got {type(depth)}.") if not len(depth.shape) == 4 and depth.shape[-3] == 1: raise ValueError( f"Input depth musth have a shape (B, 1, H, W). Got: {depth.shape}") if not isinstance(camera_matrix, torch.Tensor): raise TypeError(f"Input camera_matrix type is not a torch.Tensor. " f"Got {type(camera_matrix)}.") if not len(camera_matrix.shape) == 3 and camera_matrix.shape[-2:] == (3, 3): raise ValueError(f"Input camera_matrix must have a shape (B, 3, 3). " f"Got: {camera_matrix.shape}.") # compute the 3d points from depth xyz: torch.Tensor = depth_to_3d(depth, camera_matrix, normalize_points) # Bx3xHxW # compute the pointcloud spatial gradients gradients: torch.Tensor = spatial_gradient(xyz) # Bx3x2xHxW # compute normals a, b = gradients[:, :, 0], gradients[:, :, 1] # Bx3xHxW normals: torch.Tensor = torch.cross(a, b, dim=1) # Bx3xHxW return F.normalize(normals, dim=1, p=2)
def forward(self, input): if not isinstance(input, torch.Tensor): raise TypeError("Input type is not a torch.Tensor. Got {}".format( type(input))) if not len(input.shape) == 4: raise ValueError( "Invalid input shape, we expect Bx1xHxW. Got: {}".format( input.shape)) B, CH, W, H = input.size() if (W != self.patch_size) or (H != self.patch_size) or (CH != 1): raise TypeError("input shape should be must be [Bx1x{}x{}]. " "Got {}".format(self.patch_size, self.patch_size, input.size())) self.pk = self.pk.to(input.dtype).to(input.device) grads: torch.Tensor = spatial_gradient(input, 'diff') # unpack the edges gx: torch.Tensor = grads[:, :, 0] gy: torch.Tensor = grads[:, :, 1] mag: torch.Tensor = torch.sqrt(gx * gx + gy * gy + self.eps) ori: torch.Tensor = torch.atan2(gy, gx + self.eps) + 2.0 * pi mag = mag * self.gk.expand_as(mag).type_as(mag).to(mag.device) o_big: torch.Tensor = float(self.num_ang_bins) * ori / (2.0 * pi) bo0_big_: torch.Tensor = torch.floor(o_big) wo1_big_: torch.Tensor = (o_big - bo0_big_) bo0_big: torch.Tensor = bo0_big_ % self.num_ang_bins bo1_big: torch.Tensor = (bo0_big + 1) % self.num_ang_bins wo0_big: torch.Tensor = (1.0 - wo1_big_) * mag # type: ignore wo1_big: torch.Tensor = wo1_big_ * mag ang_bins = [] for i in range(0, self.num_ang_bins): out = self.pk((bo0_big == i).to(input.dtype) * wo0_big + # noqa (bo1_big == i).to(input.dtype) * wo1_big) ang_bins.append(out) ang_bins = torch.cat(ang_bins, dim=1) ang_bins = ang_bins.view(B, -1) ang_bins = F.normalize(ang_bins, p=2) ang_bins = torch.clamp(ang_bins, 0., float(self.clipval)) ang_bins = F.normalize(ang_bins, p=2) if self.rootsift: ang_bins = torch.sqrt(F.normalize(ang_bins, p=1) + self.eps) return ang_bins
def __init__(self, img): if not img.dim() == 2 or not img.size()[0] == img.size()[1]: raise ValueError('Image should be single channel square image') with torch.no_grad(): #creating dataset self.img = img self.size = img.size()[0] self.coords_abs = generate_coordinates(self.size) # better not normalize self.grad = spatial_gradient(img.view(1, 1, self.size, self.size), mode='sobel', normalized=False)[0][0] self.grad = torch.stack((self.grad[1], self.grad[0]), axis=0) self.grad_norm = torch.linalg.norm(self.grad, dim=0) self.grad = self.grad.permute(1, 2, 0) self.laplace = laplacian(img.view(1, 1, self.size, self.size), kernel_size=3, normalized=False)[0][0]
def forward(self, input: torch.Tensor) -> torch.Tensor: # type: ignore if not torch.is_tensor(input): raise TypeError("Input type is not a torch.Tensor. Got {}".format( type(input))) if not len(input.shape) == 4: raise ValueError( "Invalid input shape, we expect BxCxHxW. Got: {}".format( input.shape)) # compute the first order gradients with sobel operator # TODO: implement support for kernel different than three gradients: torch.Tensor = spatial_gradient(input) dx: torch.Tensor = gradients[:, :, 0] dy: torch.Tensor = gradients[:, :, 1] # compute the structure tensor M elements def g(x): return gaussian_blur2d(x, (3, 3), (1., 1.)) dx2: torch.Tensor = g(dx * dx) dy2: torch.Tensor = g(dy * dy) dxy: torch.Tensor = g(dx * dy) det_m: torch.Tensor = dx2 * dy2 - dxy * dxy trace_m: torch.Tensor = dx2 + dy2 # compute the response map scores: torch.Tensor = det_m - self.k * trace_m**2 # threshold # TODO: add as signature parameter ? scores = torch.clamp(scores, min=1e-6) # apply non maxima suppresion scores = non_maxima_suppression2d(scores, kernel_size=(3, 3)) # normalize and return scores_max: torch.Tensor = F.adaptive_max_pool2d(scores, output_size=1) return scores / scores_max
def harris_response(input: torch.Tensor, k: Union[torch.Tensor, float] = 0.04, grads_mode: str = 'sobel', sigmas: Optional[torch.Tensor] = None) -> torch.Tensor: r"""Computes the Harris cornerness function. Function does not do any normalization or nms.The response map is computed according the following formulation: .. math:: R = max(0, det(M) - k \cdot trace(M)^2) where: .. math:: M = \sum_{(x,y) \in W} \begin{bmatrix} I^{2}_x & I_x I_y \\ I_x I_y & I^{2}_y \\ \end{bmatrix} and :math:`k` is an empirically determined constant :math:`k ∈ [ 0.04 , 0.06 ]` Args: input: torch.Tensor: 4d tensor k (torch.Tensor): the Harris detector free parameter. grads_mode (string): can be 'sobel' for standalone use or 'diff' for use on Gaussian pyramid sigmas (optional, torch.Tensor): coefficients to be multiplied by multichannel response. \n Should be shape of (B) It is necessary for performing non-maxima-suppression across different scale pyramid levels.\ See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_ Return: torch.Tensor: the response map per channel. Shape: - Input: :math:`(B, C, H, W)` - Output: :math:`(B, C, H, W)` Examples: >>> input = torch.tensor([[[ ... [0., 0., 0., 0., 0., 0., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 0., 0., 0., 0., 0., 0.], ... ]]]) # 1x1x7x7 >>> # compute the response map harris_response(input, 0.04) tensor([[[[0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012], [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039], [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020], [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039], [0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012]]]]) """ # TODO: Recompute doctest if not torch.is_tensor(input): raise TypeError("Input type is not a torch.Tensor. Got {}" .format(type(input))) if not len(input.shape) == 4: raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}" .format(input.shape)) if sigmas is not None: if not torch.is_tensor(sigmas): raise TypeError("sigmas type is not a torch.Tensor. Got {}" .format(type(sigmas))) if (not len(sigmas.shape) == 1) or (sigmas.size(0) != input.size(0)): raise ValueError("Invalid sigmas shape, we expect B == input.size(0). Got: {}".format(sigmas.shape)) gradients: torch.Tensor = spatial_gradient(input, grads_mode) dx: torch.Tensor = gradients[:, :, 0] dy: torch.Tensor = gradients[:, :, 1] # compute the structure tensor M elements def g(x): return gaussian_blur2d(x, (7, 7), (1., 1.)) dx2: torch.Tensor = g(dx ** 2) dy2: torch.Tensor = g(dy ** 2) dxy: torch.Tensor = g(dx * dy) det_m: torch.Tensor = dx2 * dy2 - dxy * dxy trace_m: torch.Tensor = dx2 + dy2 # compute the response map scores: torch.Tensor = det_m - k * (trace_m ** 2) if sigmas is not None: scores = scores * sigmas.pow(4).view(-1, 1, 1, 1) return scores
def hessian_response(input: torch.Tensor, grads_mode: str = 'sobel', sigmas: Optional[torch.Tensor] = None) -> torch.Tensor: r"""Computes the absolute of determinant of the Hessian matrix. Function does not do any normalization or nms. The response map is computed according the following formulation: .. math:: R = det(H) where: .. math:: M = \sum_{(x,y) \in W} \begin{bmatrix} I_{xx} & I_{xy} \\ I_{xy} & I_{yy} \\ \end{bmatrix} Args: input: torch.Tensor: 4d tensor grads_mode (string): can be 'sobel' for standalone use or 'diff' for use on Gaussian pyramid sigmas (optional, torch.Tensor): coefficients to be multiplied by multichannel response. \n Should be shape of (B) It is necessary for performing non-maxima-suppression across different scale pyramid levels.\ See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_ Return: torch.Tensor: the response map per channel. Shape: - Input: :math:`(B, C, H, W)` - Output: :math:`(B, C, H, W)` Examples: >>> input = torch.tensor([[[ ... [0., 0., 0., 0., 0., 0., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 0., 0., 0., 0., 0., 0.], ... ]]]) # 1x1x7x7 >>> # compute the response map hessian_response(input) tensor([[[[0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155], [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334], [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194], [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334], [0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155]]]]) """ # TODO: Recompute doctest if not torch.is_tensor(input): raise TypeError("Input type is not a torch.Tensor. Got {}" .format(type(input))) if not len(input.shape) == 4: raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}" .format(input.shape)) if sigmas is not None: if not torch.is_tensor(sigmas): raise TypeError("sigmas type is not a torch.Tensor. Got {}" .format(type(sigmas))) if (not len(sigmas.shape) == 1) or (sigmas.size(0) != input.size(0)): raise ValueError("Invalid sigmas shape, we expect B == input.size(0). Got: {}" .format(sigmas.shape)) gradients: torch.Tensor = spatial_gradient(input, grads_mode, 2) dxx: torch.Tensor = gradients[:, :, 0] dxy: torch.Tensor = gradients[:, :, 1] dyy: torch.Tensor = gradients[:, :, 2] scores: torch.Tensor = dxx * dyy - dxy ** 2 if sigmas is not None: scores = scores * sigmas.pow(4).view(-1, 1, 1, 1) return scores
def gftt_response(input: torch.Tensor, grads_mode: str = 'sobel', sigmas: Optional[torch.Tensor] = None) -> torch.Tensor: r"""Computes the Shi-Tomasi cornerness function. Function does not do any normalization or nms. The response map is computed according the following formulation: .. math:: R = min(eig(M)) where: .. math:: M = \sum_{(x,y) \in W} \begin{bmatrix} I^{2}_x & I_x I_y \\ I_x I_y & I^{2}_y \\ \end{bmatrix} Args: input (torch.Tensor): 4d tensor grads_mode (string): can be 'sobel' for standalone use or 'diff' for use on Gaussian pyramid sigmas (optional, torch.Tensor): coefficients to be multiplied by multichannel response. \n Should be shape of (B) It is necessary for performing non-maxima-suppression across different scale pyramid levels.\ See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_ Return: torch.Tensor: the response map per channel. Shape: - Input: :math:`(B, C, H, W)` - Output: :math:`(B, C, H, W)` Examples: >>> input = torch.tensor([[[ ... [0., 0., 0., 0., 0., 0., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 0., 0., 0., 0., 0., 0.], ... ]]]) # 1x1x7x7 >>> # compute the response map gftt_response(input) tensor([[[[0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155], [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334], [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194], [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334], [0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155]]]]) """ # TODO: Recompute doctest if not torch.is_tensor(input): raise TypeError("Input type is not a torch.Tensor. Got {}" .format(type(input))) if not len(input.shape) == 4: raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}" .format(input.shape)) gradients: torch.Tensor = spatial_gradient(input, grads_mode) dx: torch.Tensor = gradients[:, :, 0] dy: torch.Tensor = gradients[:, :, 1] # compute the structure tensor M elements def g(x): return gaussian_blur2d(x, (7, 7), (1., 1.)) dx2: torch.Tensor = g(dx ** 2) dy2: torch.Tensor = g(dy ** 2) dxy: torch.Tensor = g(dx * dy) det_m: torch.Tensor = dx2 * dy2 - dxy * dxy trace_m: torch.Tensor = dx2 + dy2 e1: torch.Tensor = 0.5 * (trace_m + torch.sqrt((trace_m ** 2 - 4 * det_m).abs())) e2: torch.Tensor = 0.5 * (trace_m - torch.sqrt((trace_m ** 2 - 4 * det_m).abs())) scores: torch.Tensor = torch.min(e1, e2) if sigmas is not None: scores = scores * sigmas.pow(4).view(-1, 1, 1, 1) return scores
def canny( input: torch.Tensor, low_threshold: float = 0.1, high_threshold: float = 0.2, kernel_size: Tuple[int, int] = (5, 5), sigma: Tuple[float, float] = (1, 1), hysteresis: bool = True, eps: float = 1e-6, ) -> Tuple[torch.Tensor, torch.Tensor]: r"""Finds edges of the input image and filters them using the Canny algorithm. Args: input (torch.Tensor): input image tensor with shape :math:`(B,C,H,W)`. low_threshold (float): lower threshold for the hysteresis procedure. Default: 0.1. high_threshold (float): upper threshold for the hysteresis procedure. Default: 0.1. kernel_size (Tuple[int, int]): the size of the kernel for the gaussian blur. sigma (Tuple[float, float]): the standard deviation of the kernel for the gaussian blur. hysteresis (bool): if True, applies the hysteresis edge tracking. Otherwise, the edges are divided between weak (0.5) and strong (1) edges. eps (float): regularization number to avoid NaN during backprop. Default: 1e-6. Returns: Tuple[torch.Tensor, torch.Tensor]: - the canny edge magnitudes map, shape of :math:`(B,1,H,W)`. - the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`. Example: >>> input = torch.rand(5, 3, 4, 4) >>> magnitude, edges = canny(input) # 5x3x4x4 >>> magnitude.shape torch.Size([5, 1, 4, 4]) >>> edges.shape torch.Size([5, 1, 4, 4]) """ if not isinstance(input, torch.Tensor): raise TypeError("Input type is not a torch.Tensor. Got {}".format(type(input))) if not len(input.shape) == 4: raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}".format(input.shape)) if low_threshold > high_threshold: raise ValueError( "Invalid input thresholds. low_threshold should be smaller than the high_threshold. Got: {}>{}".format( low_threshold, high_threshold ) ) if low_threshold < 0 and low_threshold > 1: raise ValueError( "Invalid input threshold. low_threshold should be in range (0,1). Got: {}".format(low_threshold) ) if high_threshold < 0 and high_threshold > 1: raise ValueError( "Invalid input threshold. high_threshold should be in range (0,1). Got: {}".format(high_threshold) ) device: torch.device = input.device dtype: torch.dtype = input.dtype # To Grayscale if input.shape[1] == 3: input = rgb_to_grayscale(input) # Gaussian filter blurred: torch.Tensor = gaussian_blur2d(input, kernel_size, sigma) # Compute the gradients gradients: torch.Tensor = spatial_gradient(blurred, normalized=False) # Unpack the edges gx: torch.Tensor = gradients[:, :, 0] gy: torch.Tensor = gradients[:, :, 1] # Compute gradient magnitude and angle magnitude: torch.Tensor = torch.sqrt(gx * gx + gy * gy + eps) angle: torch.Tensor = torch.atan2(gy, gx) # Radians to Degrees angle = rad2deg(angle) # Round angle to the nearest 45 degree angle = torch.round(angle / 45) * 45 # Non-maximal suppression nms_kernels: torch.Tensor = get_canny_nms_kernel(device, dtype) nms_magnitude: torch.Tensor = F.conv2d(magnitude, nms_kernels, padding=nms_kernels.shape[-1] // 2) # Get the indices for both directions positive_idx: torch.Tensor = (angle / 45) % 8 positive_idx = positive_idx.long() negative_idx: torch.Tensor = ((angle / 45) + 4) % 8 negative_idx = negative_idx.long() # Apply the non-maximum suppresion to the different directions channel_select_filtered_positive: torch.Tensor = torch.gather(nms_magnitude, 1, positive_idx) channel_select_filtered_negative: torch.Tensor = torch.gather(nms_magnitude, 1, negative_idx) channel_select_filtered: torch.Tensor = torch.stack( [channel_select_filtered_positive, channel_select_filtered_negative], 1 ) is_max: torch.Tensor = channel_select_filtered.min(dim=1)[0] > 0.0 magnitude = magnitude * is_max # Threshold edges: torch.Tensor = F.threshold(magnitude, low_threshold, 0.0) low: torch.Tensor = magnitude > low_threshold high: torch.Tensor = magnitude > high_threshold edges = low * 0.5 + high * 0.5 edges = edges.to(dtype) # Hysteresis if hysteresis: edges_old: torch.Tensor = -torch.ones(edges.shape, device=edges.device, dtype=dtype) hysteresis_kernels: torch.Tensor = get_hysteresis_kernel(device, dtype) while ((edges_old - edges).abs() != 0).any(): weak: torch.Tensor = (edges == 0.5).float() strong: torch.Tensor = (edges == 1).float() hysteresis_magnitude: torch.Tensor = F.conv2d( edges, hysteresis_kernels, padding=hysteresis_kernels.shape[-1] // 2 ) hysteresis_magnitude = (hysteresis_magnitude == 1).any(1, keepdim=True).to(dtype) hysteresis_magnitude = hysteresis_magnitude * weak + strong edges_old = edges.clone() edges = hysteresis_magnitude + (hysteresis_magnitude == 0) * weak * 0.5 edges = hysteresis_magnitude return magnitude, edges
def gftt_response(input: torch.Tensor, grads_mode: str = 'sobel', sigmas: Optional[torch.Tensor] = None) -> torch.Tensor: r"""Compute the Shi-Tomasi cornerness function. Function does not do any normalization or nms. The response map is computed according the following formulation: .. math:: R = min(eig(M)) where: .. math:: M = \sum_{(x,y) \in W} \begin{bmatrix} I^{2}_x & I_x I_y \\ I_x I_y & I^{2}_y \\ \end{bmatrix} Args: input: input image with shape :math:`(B, C, H, W)`. grads_mode: can be ``'sobel'`` for standalone use or ``'diff'`` for use on Gaussian pyramid. sigmas: coefficients to be multiplied by multichannel response. Should be shape of :math:`(B)` It is necessary for performing non-maxima-suppression across different scale pyramid levels. See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_. Return: the response map per channel with shape :math:`(B, C, H, W)`. Example: >>> input = torch.tensor([[[ ... [0., 0., 0., 0., 0., 0., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 0., 0., 0., 0., 0., 0.], ... ]]]) # 1x1x7x7 >>> # compute the response map gftt_response(input) tensor([[[[0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155], [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334], [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0194, 0.0339, 0.0497, 0.0000, 0.0497, 0.0339, 0.0194], [0.0334, 0.0575, 0.0339, 0.0000, 0.0339, 0.0575, 0.0334], [0.0155, 0.0334, 0.0194, 0.0000, 0.0194, 0.0334, 0.0155]]]]) """ # TODO: Recompute doctest KORNIA_CHECK_SHAPE(input, ["B", "C", "H", "W"]) gradients: torch.Tensor = spatial_gradient(input, grads_mode) dx: torch.Tensor = gradients[:, :, 0] dy: torch.Tensor = gradients[:, :, 1] dx2: torch.Tensor = gaussian_blur2d(dx**2, (7, 7), (1.0, 1.0)) dy2: torch.Tensor = gaussian_blur2d(dy**2, (7, 7), (1.0, 1.0)) dxy: torch.Tensor = gaussian_blur2d(dx * dy, (7, 7), (1.0, 1.0)) det_m: torch.Tensor = dx2 * dy2 - dxy * dxy trace_m: torch.Tensor = dx2 + dy2 e1: torch.Tensor = 0.5 * (trace_m + torch.sqrt( (trace_m**2 - 4 * det_m).abs())) e2: torch.Tensor = 0.5 * (trace_m - torch.sqrt( (trace_m**2 - 4 * det_m).abs())) scores: torch.Tensor = torch.min(e1, e2) if sigmas is not None: scores = scores * sigmas.pow(4).view(-1, 1, 1, 1) return scores
def harris_response( input: torch.Tensor, k: Union[torch.Tensor, float] = 0.04, grads_mode: str = 'sobel', sigmas: Optional[torch.Tensor] = None, ) -> torch.Tensor: r"""Compute the Harris cornerness function. Function does not do any normalization or nms. The response map is computed according the following formulation: .. math:: R = max(0, det(M) - k \cdot trace(M)^2) where: .. math:: M = \sum_{(x,y) \in W} \begin{bmatrix} I^{2}_x & I_x I_y \\ I_x I_y & I^{2}_y \\ \end{bmatrix} and :math:`k` is an empirically determined constant :math:`k ∈ [ 0.04 , 0.06 ]` Args: input: input image with shape :math:`(B, C, H, W)`. k: the Harris detector free parameter. grads_mode: can be ``'sobel'`` for standalone use or ``'diff'`` for use on Gaussian pyramid. sigmas: coefficients to be multiplied by multichannel response. Should be shape of :math:`(B)` It is necessary for performing non-maxima-suppression across different scale pyramid levels. See `vlfeat <https://github.com/vlfeat/vlfeat/blob/master/vl/covdet.c#L874>`_. Return: the response map per channel with shape :math:`(B, C, H, W)`. Example: >>> input = torch.tensor([[[ ... [0., 0., 0., 0., 0., 0., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 1., 1., 1., 1., 1., 0.], ... [0., 0., 0., 0., 0., 0., 0.], ... ]]]) # 1x1x7x7 >>> # compute the response map harris_response(input, 0.04) tensor([[[[0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012], [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039], [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020], [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000], [0.0020, 0.0040, 0.0029, 0.0000, 0.0029, 0.0040, 0.0020], [0.0039, 0.0065, 0.0040, 0.0000, 0.0040, 0.0065, 0.0039], [0.0012, 0.0039, 0.0020, 0.0000, 0.0020, 0.0039, 0.0012]]]]) """ # TODO: Recompute doctest KORNIA_CHECK_SHAPE(input, ["B", "C", "H", "W"]) if sigmas is not None: if not isinstance(sigmas, torch.Tensor): raise TypeError( f"sigmas type is not a torch.Tensor. Got {type(sigmas)}") if (not len(sigmas.shape) == 1) or (sigmas.size(0) != input.size(0)): raise ValueError( f"Invalid sigmas shape, we expect B == input.size(0). Got: {sigmas.shape}" ) gradients: torch.Tensor = spatial_gradient(input, grads_mode) dx: torch.Tensor = gradients[:, :, 0] dy: torch.Tensor = gradients[:, :, 1] # compute the structure tensor M elements dx2: torch.Tensor = gaussian_blur2d(dx**2, (7, 7), (1.0, 1.0)) dy2: torch.Tensor = gaussian_blur2d(dy**2, (7, 7), (1.0, 1.0)) dxy: torch.Tensor = gaussian_blur2d(dx * dy, (7, 7), (1.0, 1.0)) det_m: torch.Tensor = dx2 * dy2 - dxy * dxy trace_m: torch.Tensor = dx2 + dy2 # compute the response map scores: torch.Tensor = det_m - k * (trace_m**2) if sigmas is not None: scores = scores * sigmas.pow(4).view(-1, 1, 1, 1) return scores