def _get_window_grid_kernel3d( d: int, h: int, w: int, device: torch.device = torch.device('cpu')) -> torch.Tensor: r"""Helper function, which generates a kernel to return coordinates, residual to window center. Args: d (int): kernel depth. h (int): kernel height. w (int): kernel width. device (torch.device): device, on which generate. Returns: conv_kernel (torch.Tensor) [3x1xdxhxw] """ grid2d = create_meshgrid(h, w, True, device=device) if d > 1: z = torch.linspace(-1, 1, d, device=device).view(d, 1, 1, 1) else: # only onr channel with index == 0 z = torch.zeros(1, 1, 1, 1, device=device) grid3d = torch.cat( [z.repeat(1, h, w, 1).contiguous(), grid2d.repeat(d, 1, 1, 1)], dim=3) conv_kernel = grid3d.permute(3, 0, 1, 2).unsqueeze(1) return conv_kernel
def test_ellipse(self, device, dtype): b, c, h, w = 1, 3, 500, 500 n = 5000 im = torch.zeros(b, c, h, w, device=device, dtype=dtype) t = torch.linspace(0, 1, steps=n, device=device, dtype=dtype)[None].expand(b, n) color = torch.tensor([1, 1, 1], device=device, dtype=dtype)[None].expand(b, c) lam = 2 x = lam * (2 * math.pi * t).cos() y = (2 * math.pi * t).sin() ctr = 200 radius = 100 pts = ctr + radius * torch.stack((x, y), dim=-1) poly_im = draw_convex_polygon(im, pts, color) XY = create_meshgrid(h, w, normalized_coordinates=False, device=device, dtype=dtype) inside = (((XY[..., 1] - ctr)**2 + ((XY[..., 0] - ctr) / lam)**2).sqrt() <= radius)[:, None].expand(b, c, h, w) ellipse_im = inside * color[..., None, None] assert (ellipse_im - poly_im).abs().mean() <= 1e-4
def homo_warp(src_feat, proj_mat, depth_values, src_grid=None, pad=0): """ src_feat: (B, C, H, W) proj_mat: (B, 3, 4) equal to "src_proj @ ref_proj_inv" depth_values: (B, D, H, W) out: (B, C, D, H, W) """ if src_grid == None: B, C, H, W = src_feat.shape device = src_feat.device if pad > 0: H_pad, W_pad = H + pad * 2, W + pad * 2 else: H_pad, W_pad = H, W depth_values = depth_values[..., None, None].repeat(1, 1, H_pad, W_pad) D = depth_values.shape[1] R = proj_mat[:, :, :3] # (B, 3, 3) T = proj_mat[:, :, 3:] # (B, 3, 1) # create grid from the ref frame ref_grid = create_meshgrid(H_pad, W_pad, normalized_coordinates=False, device=device) # (1, H, W, 2) if pad > 0: ref_grid -= pad ref_grid = ref_grid.permute(0, 3, 1, 2) # (1, 2, H, W) ref_grid = ref_grid.reshape(1, 2, W_pad * H_pad) # (1, 2, H*W) ref_grid = ref_grid.expand(B, -1, -1) # (B, 2, H*W) ref_grid = torch.cat((ref_grid, torch.ones_like(ref_grid[:, :1])), 1) # (B, 3, H*W) ref_grid_d = ref_grid.repeat(1, 1, D) # (B, 3, D*H*W) src_grid_d = R @ ref_grid_d + T / depth_values.view( B, 1, D * W_pad * H_pad) del ref_grid_d, ref_grid, proj_mat, R, T, depth_values # release (GPU) memory src_grid = src_grid_d[:, : 2] / src_grid_d[:, 2:] # divide by depth (B, 2, D*H*W) del src_grid_d src_grid[:, 0] = src_grid[:, 0] / ((W - 1) / 2) - 1 # scale to -1~1 src_grid[:, 1] = src_grid[:, 1] / ((H - 1) / 2) - 1 # scale to -1~1 src_grid = src_grid.permute(0, 2, 1) # (B, D*H*W, 2) src_grid = src_grid.view(B, D, W_pad, H_pad, 2) B, D, W_pad, H_pad = src_grid.shape[:4] warped_src_feat = F.grid_sample(src_feat, src_grid.view(B, D, W_pad * H_pad, 2), mode='bilinear', padding_mode='zeros', align_corners=True) # (B, C, D, H*W) warped_src_feat = warped_src_feat.view(B, -1, D, H_pad, W_pad) # src_grid = src_grid.view(B, 1, D, H_pad, W_pad, 2) return warped_src_feat, src_grid
def spatial_soft_argmax2d(input: torch.Tensor, temperature: torch.Tensor = torch.tensor(1.0), normalized_coordinates: bool = True, eps: float = 1e-8) -> torch.Tensor: r"""Function that computes the Spatial Soft-Argmax 2D of a given input heatmap. Returns the index of the maximum 2d coordinates of the give map. The output order is x-coord and y-coord. Arguments: temperature (torch.Tensor): factor to apply to input. Default is 1. normalized_coordinates (bool): wether to return the coordinates normalized in the range of [-1, 1]. Otherwise, it will return the coordinates in the range of the input shape. Default is True. eps (float): small value to avoid zero division. Default is 1e-8. Shape: - Input: :math:`(B, N, H, W)` - Output: :math:`(B, N, 2)` Examples: >>> input = torch.tensor([[[ [0., 0., 0.], [0., 10., 0.], [0., 0., 0.]]]]) >>> coords = kornia.spatial_soft_argmax2d(input, False) tensor([[[1.0000, 1.0000]]]) """ if not torch.is_tensor(input): raise TypeError( "Input input type is not a torch.Tensor. Got {}".format( type(input))) if not len(input.shape) == 4: raise ValueError( "Invalid input shape, we expect BxCxHxW. Got: {}".format( input.shape)) # unpack shapes and create view from input tensor batch_size, channels, height, width = input.shape x: torch.Tensor = input.view(batch_size, channels, -1) # compute softmax along the feature map x_soft: torch.Tensor = F.softmax(x * temperature, dim=-1) # create coordinates grid grid: torch.Tensor = create_meshgrid(height, width, normalized_coordinates) grid = grid.to(input.device).to(input.dtype) pos_x: torch.Tensor = grid[..., 0].reshape(-1) pos_y: torch.Tensor = grid[..., 1].reshape(-1) # compute the expected coordinates expected_y: torch.Tensor = torch.sum(pos_y * x_soft, dim=-1, keepdim=True) expected_x: torch.Tensor = torch.sum(pos_x * x_soft, dim=-1, keepdim=True) output: torch.Tensor = torch.cat([expected_x, expected_y], dim=-1) return output.view(batch_size, channels, 2) # BxNx2
def get_grid_dict(patch_size: int = 32) -> Dict[str, torch.Tensor]: r"""Get cartesian and polar parametrizations of grid.""" kgrid = create_meshgrid(height=patch_size, width=patch_size, normalized_coordinates=True) x = kgrid[0, :, :, 0] y = kgrid[0, :, :, 1] rho, phi = cart2pol(x, y) grid_dict = {'x': x, 'y': y, 'rho': rho, 'phi': phi} return grid_dict
def depth_to_3d(depth: torch.Tensor, camera_matrix: torch.Tensor, normalize_points: bool = False) -> torch.Tensor: """Compute a 3d point per pixel given its depth value and the camera intrinsics. Args: depth: image tensor containing a depth value per pixel with shape :math:`(B, 1, H, W)`. camera_matrix: tensor containing the camera intrinsics with shape :math:`(B, 3, 3)`. normalize_points: whether to normalise the pointcloud. This must be set to `True` when the depth is represented as the Euclidean ray length from the camera position. Return: tensor with a 3d point per pixel of the same resolution as the input :math:`(B, 3, H, W)`. Example: >>> depth = torch.rand(1, 1, 4, 4) >>> K = torch.eye(3)[None] >>> depth_to_3d(depth, K).shape torch.Size([1, 3, 4, 4]) """ if not isinstance(depth, torch.Tensor): raise TypeError( f"Input depht type is not a torch.Tensor. Got {type(depth)}.") if not (len(depth.shape) == 4 and depth.shape[-3] == 1): raise ValueError( f"Input depth musth have a shape (B, 1, H, W). Got: {depth.shape}") if not isinstance(camera_matrix, torch.Tensor): raise TypeError(f"Input camera_matrix type is not a torch.Tensor. " f"Got {type(camera_matrix)}.") if not (len(camera_matrix.shape) == 3 and camera_matrix.shape[-2:] == (3, 3)): raise ValueError(f"Input camera_matrix must have a shape (B, 3, 3). " f"Got: {camera_matrix.shape}.") # create base coordinates grid _, _, height, width = depth.shape points_2d: torch.Tensor = create_meshgrid( height, width, normalized_coordinates=False) # 1xHxWx2 points_2d = points_2d.to(depth.device).to(depth.dtype) # depth should come in Bx1xHxW points_depth: torch.Tensor = depth.permute(0, 2, 3, 1) # 1xHxWx1 # project pixels to camera frame camera_matrix_tmp: torch.Tensor = camera_matrix[:, None, None] # Bx1x1x3x3 points_3d: torch.Tensor = unproject_points( points_2d, points_depth, camera_matrix_tmp, normalize=normalize_points) # BxHxWx3 return points_3d.permute(0, 3, 1, 2) # Bx3xHxW
def _get_window_grid_kernel2d(h: int, w: int) -> torch.Tensor: '''Helper function, which generates a kernel to with window coordinates, residual to window center Args: h (int): kernel height w (int): kernel width Returns: conv_kernel (torch.Tensor) [2x1xhxw] ''' window_grid2d = create_meshgrid(h, w, False) window_grid2d = normalize_pixel_coordinates(window_grid2d, h, w) conv_kernel = window_grid2d.permute(3, 0, 1, 2) return conv_kernel
def homography_warp(patch_src: torch.Tensor, src_homo_dst: torch.Tensor, dsize: Tuple[int, int], mode: str = 'bilinear', padding_mode: str = 'zeros', align_corners: bool = False, normalized_coordinates: bool = True) -> torch.Tensor: r"""Warp image patchs or tensors by normalized 2D homographies. See :class:`~kornia.geometry.warp.HomographyWarper` for details. Args: patch_src (torch.Tensor): The image or tensor to warp. Should be from source of shape :math:`(N, C, H, W)`. src_homo_dst (torch.Tensor): The homography or stack of homographies from destination to source of shape :math:`(N, 3, 3)`. dsize (Tuple[int, int]): The height and width of the image to warp. mode (str): interpolation mode to calculate output values 'bilinear' | 'nearest'. Default: 'bilinear'. padding_mode (str): padding mode for outside grid values 'zeros' | 'border' | 'reflection'. Default: 'zeros'. align_corners(bool): interpolation flag. Default: False. See https://pytorch.org/docs/stable/nn.functional.html#torch.nn.functional.interpolate for detail normalized_coordinates (bool): Whether the homography assumes [-1, 1] normalized coordinates or not. Return: torch.Tensor: Patch sampled at locations from source to destination. Example: >>> input = torch.rand(1, 3, 32, 32) >>> homography = torch.eye(3).view(1, 3, 3) >>> output = homography_warp(input, homography, (32, 32)) """ if not src_homo_dst.device == patch_src.device: raise TypeError("Patch and homography must be on the same device. \ Got patch.device: {} src_H_dst.device: {}.".format( patch_src.device, src_homo_dst.device)) height, width = dsize grid = create_meshgrid(height, width, normalized_coordinates=normalized_coordinates) warped_grid = warp_grid(grid, src_homo_dst) return F.grid_sample(patch_src, warped_grid, mode=mode, padding_mode=padding_mode, align_corners=align_corners)
def depth_to_3d(depth: torch.Tensor, camera_matrix: torch.Tensor) -> torch.Tensor: """Compute a 3d point per pixel given its depth value and the camera intrinsics. Args: depth (torch.Tensor): image tensor containing a depth value per pixel. camera_matrix (torch.Tensor): tensor containing the camera intrinsics. Shape: - Input: :math:`(B, 1, H, W)` and :math:`(B, 3, 3)` - Output: :math:`(B, 3, H, W)` Return: torch.Tensor: tensor with a 3d point per pixel of the same resolution as the input. """ if not isinstance(depth, torch.Tensor): raise TypeError( f"Input depht type is not a torch.Tensor. Got {type(depth)}.") if not len(depth.shape) == 4 and depth.shape[-3] == 1: raise ValueError( f"Input depth musth have a shape (B, 1, H, W). Got: {depth.shape}") if not isinstance(camera_matrix, torch.Tensor): raise TypeError(f"Input camera_matrix type is not a torch.Tensor. " f"Got {type(camera_matrix)}.") if not len(camera_matrix.shape) == 3 and camera_matrix.shape[-2:] == (3, 3): raise ValueError(f"Input camera_matrix must have a shape (B, 3, 3). " f"Got: {camera_matrix.shape}.") # create base coordinates grid batch_size, _, height, width = depth.shape points_2d: torch.Tensor = create_meshgrid( height, width, normalized_coordinates=False) # 1xHxWx2 points_2d = points_2d.to(depth.device).to(depth.dtype) # depth should come in Bx1xHxW points_depth: torch.Tensor = depth.permute(0, 2, 3, 1) # 1xHxWx1 # project pixels to camera frame camera_matrix_tmp: torch.Tensor = camera_matrix[:, None, None] # Bx1x1x3x3 points_3d: torch.Tensor = unproject_points(points_2d, points_depth, camera_matrix_tmp, normalize=True) # BxHxWx3 return points_3d.permute(0, 3, 1, 2) # Bx3xHxW
def homo_warp(src_feat, proj_mat, depth_values): """ src_feat: (B, C, H, W) proj_mat: (B, 3, 4) equal to "src_proj @ ref_proj_inv" depth_values: (B, D, H, W) out: (B, C, D, H, W) """ B, C, H, W = src_feat.shape D = depth_values.shape[1] device = src_feat.device R = proj_mat[:, :, :3] # (B, 3, 3) T = proj_mat[:, :, 3:] # (B, 3, 1) # create grid from the ref frame ref_grid = create_meshgrid(H, W, normalized_coordinates=False, device=device) # (1, H, W, 2) ref_grid = ref_grid.permute(0, 3, 1, 2) # (1, 2, H, W) ref_grid = ref_grid.reshape(1, 2, H * W) # (1, 2, H*W) ref_grid = ref_grid.expand(B, -1, -1) # (B, 2, H*W) ref_grid = torch.cat((ref_grid, torch.ones_like(ref_grid[:, :1])), 1) # (B, 3, H*W) ref_grid_d = ref_grid.repeat(1, 1, D) # (B, 3, D*H*W) src_grid_d = R @ ref_grid_d + T / depth_values.view(B, 1, D * H * W) del ref_grid_d, ref_grid, proj_mat, R, T, depth_values # release (GPU) memory # project negative depth pixels to somewhere outside the image negative_depth_mask = src_grid_d[:, 2:] <= 1e-7 src_grid_d[:, 0:1][negative_depth_mask] = W src_grid_d[:, 1:2][negative_depth_mask] = H src_grid_d[:, 2:3][negative_depth_mask] = 1 src_grid = src_grid_d[:, : 2] / src_grid_d[:, 2:] # divide by depth (B, 2, D*H*W) del src_grid_d src_grid[:, 0] = src_grid[:, 0] / ((W - 1) / 2) - 1 # scale to -1~1 src_grid[:, 1] = src_grid[:, 1] / ((H - 1) / 2) - 1 # scale to -1~1 src_grid = src_grid.permute(0, 2, 1) # (B, D*H*W, 2) src_grid = src_grid.view(B, D, H * W, 2) warped_src_feat = F.grid_sample(src_feat, src_grid, mode='bilinear', padding_mode='zeros', align_corners=True) # (B, C, D, H*W) warped_src_feat = warped_src_feat.view(B, C, D, H, W) return warped_src_feat
def __init__(self, height: int, width: int, mode: str = 'bilinear', padding_mode: str = 'zeros', normalized_coordinates: bool = True) -> None: super(HomographyWarper, self).__init__() self.width: int = width self.height: int = height self.mode: str = mode self.padding_mode: str = padding_mode self.normalized_coordinates: bool = normalized_coordinates # create base grid to compute the flow self.grid: torch.Tensor = create_meshgrid( height, width, normalized_coordinates=normalized_coordinates)
def _get_window_grid_kernel2d(h: int, w: int, device: torch.device = torch.device('cpu')) -> torch.Tensor: r"""Helper function, which generates a kernel to with window coordinates, residual to window center. Args: h: kernel height. : kernel width. device: device, on which generate. Returns: conv_kernel [2x1xhxw] """ window_grid2d = create_meshgrid(h, w, False, device=device) window_grid2d = normalize_pixel_coordinates(window_grid2d, h, w) conv_kernel = window_grid2d.permute(3, 0, 1, 2) return conv_kernel
def __init__(self, patch_size: int = 32, relative: bool = False) -> None: super().__init__() self.patch_size = patch_size self.relative = relative self.eps = 1e-8 # Theta kernel for gradients. self.kernel = VonMisesKernel(patch_size=patch_size, coeffs=COEFFS['theta']) # Relative gradients. kgrid = create_meshgrid(height=patch_size, width=patch_size, normalized_coordinates=True) _, phi = cart2pol(kgrid[:, :, :, 0], kgrid[:, :, :, 1]) self.register_buffer('phi', phi)
def apply_transform( self, input: Tensor, params: Dict[str, Tensor], transform: Optional[Tensor] = None ) -> Tensor: # create the initial sampling fields B, _, H, W = input.shape grid = create_meshgrid(H, W, normalized_coordinates=True) field_x = grid[..., 0].to(input) # 1xHxW field_y = grid[..., 1].to(input) # 1xHxW # vectorize the random parameters center_x = params["center_x"].view(B, 1, 1).to(input) center_y = params["center_y"].view(B, 1, 1).to(input) gamma = params["gamma"].view(B, 1, 1).to(input) # compute and apply the distances respect to the camera optical center distance = ((center_x - field_x) ** 2 + (center_y - field_y) ** 2) ** 0.5 field_x = field_x + field_x * distance ** gamma # BxHxw field_y = field_y + field_y * distance ** gamma # BxHxW return remap(input, field_x, field_y, normalized_coordinates=True, align_corners=True)
def homo_warp(src_feat, src_proj, ref_proj_inv, depth_values): # src_feat: (B, C, H, W) # src_proj: (B, 4, 4) # ref_proj_inv: (B, 4, 4) # depth_values: (B, D) # out: (B, C, D, H, W) B, C, H, W = src_feat.shape D = depth_values.shape[1] device = src_feat.device dtype = src_feat.dtype transform = src_proj @ ref_proj_inv R = transform[:, :3, :3] # (B, 3, 3) T = transform[:, :3, 3:] # (B, 3, 1) # create grid from the ref frame ref_grid = create_meshgrid(H, W, normalized_coordinates=False) # (1, H, W, 2) ref_grid = ref_grid.to(device).to(dtype) ref_grid = ref_grid.permute(0, 3, 1, 2) # (1, 2, H, W) ref_grid = ref_grid.reshape(1, 2, H * W) # (1, 2, H*W) ref_grid = ref_grid.expand(B, -1, -1) # (B, 2, H*W) ref_grid = torch.cat((ref_grid, torch.ones_like(ref_grid[:, :1])), 1) # (B, 3, H*W) ref_grid_d = ref_grid.unsqueeze(2) * depth_values.view(B, 1, D, 1) # (B, 3, D, H*W) ref_grid_d = ref_grid_d.view(B, 3, D * H * W) src_grid_d = R @ ref_grid_d + T # (B, 3, D*H*W) del ref_grid_d, ref_grid, transform, R, T # release (GPU) memory src_grid = src_grid_d[:, : 2] / src_grid_d[:, -1:] # divide by depth (B, 2, D*H*W) del src_grid_d src_grid[:, 0] = src_grid[:, 0] / ((W - 1) / 2) - 1 # scale to -1~1 src_grid[:, 1] = src_grid[:, 1] / ((H - 1) / 2) - 1 # scale to -1~1 src_grid = src_grid.permute(0, 2, 1) # (B, D*H*W, 2) src_grid = src_grid.view(B, D, H * W, 2) warped_src_feat = F.grid_sample(src_feat, src_grid, mode='bilinear', padding_mode='zeros', align_corners=True) # (B, C, D, H*W) warped_src_feat = warped_src_feat.view(B, C, D, H, W) return warped_src_feat
def __init__(self, height: int, width: int, mode: str = 'bilinear', padding_mode: str = 'zeros', normalized_coordinates: bool = True, align_corners: bool = False) -> None: super(HomographyWarper, self).__init__() self.width: int = width self.height: int = height self.mode: str = mode self.padding_mode: str = padding_mode self.normalized_coordinates: bool = normalized_coordinates self.align_corners: bool = align_corners # create base grid to compute the flow self.grid: torch.Tensor = create_meshgrid( height, width, normalized_coordinates=normalized_coordinates) # initialice the warped destination grid self._warped_grid: Optional[torch.Tensor] = None
def _create_meshgrid(height: int, width: int) -> torch.Tensor: grid: torch.Tensor = create_meshgrid( height, width, normalized_coordinates=False) # 1xHxWx2 return convert_points_to_homogeneous(grid) # append ones to last dim
def conv_soft_argmax2d(input: torch.Tensor, kernel_size: Tuple[int, int] = (3, 3), stride: Tuple[int, int] = (1, 1), padding: Tuple[int, int] = (1, 1), temperature: Union[torch.Tensor, float] = torch.tensor(1.0), normalized_coordinates: bool = True, eps: float = 1e-8, output_value: bool = False) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]: r"""Function that computes the convolutional spatial Soft-Argmax 2D over the windows of a given input heatmap. Function has two outputs: argmax coordinates and the softmaxpooled heatmap values themselves. On each window, the function computed is .. math:: ij(X) = \frac{\sum{(i,j)} * exp(x / T) \in X} {\sum{exp(x / T) \in X}} .. math:: val(X) = \frac{\sum{x * exp(x / T) \in X}} {\sum{exp(x / T) \in X}} where T is temperature. Args: kernel_size (Tuple[int,int]): the size of the window stride (Tuple[int,int]): the stride of the window. padding (Tuple[int,int]): input zero padding temperature (torch.Tensor): factor to apply to input. Default is 1. normalized_coordinates (bool): whether to return the coordinates normalized in the range of [-1, 1]. Otherwise, it will return the coordinates in the range of the input shape. Default is True. eps (float): small value to avoid zero division. Default is 1e-8. output_value (bool): if True, val is outputed, if False, only ij Shape: - Input: :math:`(N, C, H_{in}, W_{in})` - Output: :math:`(N, C, 2, H_{out}, W_{out})`, :math:`(N, C, H_{out}, W_{out})`, where .. math:: H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor .. math:: W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor Examples:: >>> input = torch.randn(20, 16, 50, 32) >>> nms_coords, nms_val = conv_soft_argmax2d(input, (3,3), (2,2), (1,1)) """ if not torch.is_tensor(input): raise TypeError("Input type is not a torch.Tensor. Got {}" .format(type(input))) if not len(input.shape) == 4: raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}" .format(input.shape)) if temperature <= 0: raise ValueError("Temperature should be positive float or tensor. Got: {}" .format(temperature)) b, c, h, w = input.shape kx, ky = kernel_size device: torch.device = input.device dtype: torch.dtype = input.dtype input = input.view(b * c, 1, h, w) center_kernel: torch.Tensor = _get_center_kernel2d(kx, ky, device).to(dtype) window_kernel: torch.Tensor = _get_window_grid_kernel2d(kx, ky, device).to(dtype) # applies exponential normalization trick # https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/ # https://github.com/pytorch/pytorch/blob/bcb0bb7e0e03b386ad837015faba6b4b16e3bfb9/aten/src/ATen/native/SoftMax.cpp#L44 x_max = F.adaptive_max_pool2d(input, (1, 1)) # max is detached to prevent undesired backprop loops in the graph x_exp = ((input - x_max.detach()) / temperature).exp() # F.avg_pool2d(.., divisor_override = 1.0) - proper way for sum pool in PyTorch 1.2. # Not available yet in version 1.0, so let's do manually pool_coef: float = float(kx * ky) # softmax denominator den = pool_coef * F.avg_pool2d(x_exp, kernel_size, stride=stride, padding=padding) + eps x_softmaxpool = pool_coef * F.avg_pool2d(x_exp * input, kernel_size, stride=stride, padding=padding) / den x_softmaxpool = x_softmaxpool.view(b, c, x_softmaxpool.size(2), x_softmaxpool.size(3)) # We need to output also coordinates # Pooled window center coordinates grid_global: torch.Tensor = create_meshgrid(h, w, False, device).to( dtype).permute(0, 3, 1, 2) grid_global_pooled = F.conv2d(grid_global, center_kernel, stride=stride, padding=padding) # Coordinates of maxima residual to window center # prepare kernel coords_max: torch.Tensor = F.conv2d(x_exp, window_kernel, stride=stride, padding=padding) coords_max = coords_max / den.expand_as(coords_max) coords_max = coords_max + grid_global_pooled.expand_as(coords_max) # [:,:, 0, ...] is x # [:,:, 1, ...] is y if normalized_coordinates: coords_max = normalize_pixel_coordinates(coords_max.permute(0, 2, 3, 1), h, w) coords_max = coords_max.permute(0, 3, 1, 2) # Back B*C -> (b, c) coords_max = coords_max.view(b, c, 2, coords_max.size(2), coords_max.size(3)) if output_value: return coords_max, x_softmaxpool return coords_max
def distance_transform(image: torch.Tensor, kernel_size: int = 3, h: float = 0.35) -> torch.Tensor: r"""Approximates the Manhattan distance transform of images using cascaded convolution operations. The value at each pixel in the output represents the distance to the nearest non-zero pixel in the image image. It uses the method described in :cite:`pham2021dtlayer`. The transformation is applied independently across the channel dimension of the images. Args: image: Image with shape :math:`(B,C,H,W)`. kernel_size: size of the convolution kernel. h: value that influence the approximation of the min function. Returns: tensor with shape :math:`(B,C,H,W)`. Example: >>> tensor = torch.zeros(1, 1, 5, 5) >>> tensor[:,:, 1, 2] = 1 >>> dt = kornia.contrib.distance_transform(tensor) """ if not isinstance(image, torch.Tensor): raise TypeError(f"image type is not a torch.Tensor. Got {type(image)}") if not len(image.shape) == 4: raise ValueError( f"Invalid image shape, we expect BxCxHxW. Got: {image.shape}") if kernel_size % 2 == 0: raise ValueError("Kernel size must be an odd number.") # n_iters is set such that the DT will be able to propagate from any corner of the image to its far, # diagonally opposite corner n_iters: int = math.ceil( max(image.shape[2], image.shape[3]) / math.floor(kernel_size / 2)) grid = create_meshgrid(kernel_size, kernel_size, normalized_coordinates=False, device=image.device, dtype=image.dtype) grid -= math.floor(kernel_size / 2) kernel = torch.hypot(grid[0, :, :, 0], grid[0, :, :, 1]) kernel = torch.exp(kernel / -h).unsqueeze(0) out = torch.zeros_like(image) # It is possible to avoid cloning the image if boundary = image, but this would require modifying the image tensor. boundary = image.clone() signal_ones = torch.ones_like(boundary) for i in range(n_iters): cdt = filter2d(boundary, kernel, border_type='replicate') cdt = -h * torch.log(cdt) # We are calculating log(0) above. cdt = torch.nan_to_num(cdt, posinf=0.0) mask = torch.where(cdt > 0, 1.0, 0.0) if mask.sum() == 0: break offset: int = i * kernel_size // 2 out += (offset + cdt) * mask boundary = torch.where(mask == 1, signal_ones, boundary) return out
def undistort_image(image: torch.Tensor, K: torch.Tensor, dist: torch.Tensor) -> torch.Tensor: r"""Compensate an image for lens distortion. Radial :math:`(k_1, k_2, k_3, k_4, k_4, k_6)`, tangential :math:`(p_1, p_2)`, thin prism :math:`(s_1, s_2, s_3, s_4)`, and tilt :math:`(\tau_x, \tau_y)` distortion models are considered in this function. Args: image: Input image with shape :math:`(*, C, H, W)`. K: Intrinsic camera matrix with shape :math:`(*, 3, 3)`. dist: Distortion coefficients :math:`(k_1,k_2,p_1,p_2[,k_3[,k_4,k_5,k_6[,s_1,s_2,s_3,s_4[,\tau_x,\tau_y]]]])`. This is a vector with 4, 5, 8, 12 or 14 elements with shape :math:`(*, n)`. Returns: Undistorted image with shape :math:`(*, C, H, W)`. Example: >>> img = torch.rand(1, 3, 5, 5) >>> K = torch.eye(3)[None] >>> dist_coeff = torch.rand(4) >>> out = undistort_image(img, K, dist_coeff) >>> out.shape torch.Size([1, 3, 5, 5]) """ if len(image.shape) < 2: raise ValueError(f"Image shape is invalid. Got: {image.shape}.") if K.shape[-2:] != (3, 3): raise ValueError(f'K matrix shape is invalid. Got {K.shape}.') if dist.shape[-1] not in [4, 5, 8, 12, 14]: raise ValueError( f'Invalid number of distortion coefficients. Got {dist.shape[-1]}.' ) if not image.is_floating_point(): raise ValueError( f'Invalid input image data type. Input should be float. Got {image.dtype}.' ) B, _, rows, cols = image.shape # Create point coordinates for each pixel of the image xy_grid: torch.Tensor = create_meshgrid(rows, cols, False, image.device, image.dtype) pts = xy_grid.reshape(-1, 2) # (rows*cols)x2 matrix of pixel coordinates # Distort points and define maps ptsd: torch.Tensor = distort_points(pts, K, dist) # Bx(rows*cols)x2 mapx: torch.Tensor = ptsd[..., 0].reshape(B, rows, cols) # B x rows x cols, float mapy: torch.Tensor = ptsd[..., 1].reshape(B, rows, cols) # B x rows x cols, float # Remap image to undistort out = remap(image, mapx, mapy, align_corners=True) return out
def warp_image_tps( image: torch.Tensor, kernel_centers: torch.Tensor, kernel_weights: torch.Tensor, affine_weights: torch.Tensor, align_corners: bool = False, ) -> torch.Tensor: r"""Warp an image tensor according to the thin plate spline transform defined by kernel centers, kernel weights, and affine weights. The transform is applied to each pixel coordinate in the output image to obtain a point in the input image for interpolation of the output pixel. So the TPS parameters should correspond to a warp from output space to input space. The input `image` is a :math:`(B, C, H, W)` tensor. The kernel centers, kernel weight and affine weights are the same as in `warp_points_tps`. Args: image (torch.Tensor): input image tensor :math:`(B, C, H, W)`. kernel_centers (torch.Tensor): kernel center points :math:`(B, K, 2)`. kernel_weights (torch.Tensor): tensor of kernl weights :math:`(B, K, 2)`. affine_weights (torch.Tensor): tensor of affine weights :math:`(B, 3, 2)`. align_corners (bool): interpolation flag used by `grid_sample`. Default: False. Returns: torch.Tensor: warped image tensor :math:`(B, C, H, W)`. Example: >>> points_src = torch.rand(1, 5, 2) >>> points_dst = torch.rand(1, 5, 2) >>> image = torch.rand(1, 3, 32, 32) >>> # note that we are getting the reverse transform: dst -> src >>> kernel_weights, affine_weights = get_tps_transform(points_dst, points_src) >>> warped_image = warp_image_tps(image, points_src, kernel_weights, affine_weights) .. note:: This function is often used in conjuntion with :func:`get_tps_transform`. """ if not isinstance(image, torch.Tensor): raise TypeError(f"Input image is not torch.Tensor. Got {type(image)}") if not isinstance(kernel_centers, torch.Tensor): raise TypeError(f"Input kernel_centers is not torch.Tensor. Got {type(kernel_centers)}") if not isinstance(kernel_weights, torch.Tensor): raise TypeError(f"Input kernel_weights is not torch.Tensor. Got {type(kernel_weights)}") if not isinstance(affine_weights, torch.Tensor): raise TypeError(f"Input affine_weights is not torch.Tensor. Got {type(affine_weights)}") if not len(image.shape) == 4: raise ValueError(f"Invalid shape for image, expected BxCxHxW. Got {image.shape}") if not len(kernel_centers.shape) == 3: raise ValueError(f"Invalid shape for kernel_centers, expected BxNx2. Got {kernel_centers.shape}") if not len(kernel_weights.shape) == 3: raise ValueError(f"Invalid shape for kernel_weights, expected BxNx2. Got {kernel_weights.shape}") if not len(affine_weights.shape) == 3: raise ValueError(f"Invalid shape for affine_weights, expected BxNx2. Got {affine_weights.shape}") device, dtype = image.device, image.dtype batch_size, _, h, w = image.shape coords: torch.Tensor = create_meshgrid(h, w, device=device).to(dtype=dtype) coords = coords.reshape(-1, 2).expand(batch_size, -1, -1) warped: torch.Tensor = warp_points_tps(coords, kernel_centers, kernel_weights, affine_weights) warped = warped.view(-1, h, w, 2) warped_image: torch.Tensor = nn.functional.grid_sample(image, warped, align_corners=align_corners) return warped_image
def warp_perspective( src: torch.Tensor, M: torch.Tensor, dsize: Tuple[int, int], mode: str = 'bilinear', padding_mode: str = 'zeros', align_corners: Optional[bool] = None, ) -> torch.Tensor: r"""Applies a perspective transformation to an image. .. image:: https://kornia-tutorials.readthedocs.io/en/latest/_images/warp_perspective_10_2.png The function warp_perspective transforms the source image using the specified matrix: .. math:: \text{dst} (x, y) = \text{src} \left( \frac{M^{-1}_{11} x + M^{-1}_{12} y + M^{-1}_{13}}{M^{-1}_{31} x + M^{-1}_{32} y + M^{-1}_{33}} , \frac{M^{-1}_{21} x + M^{-1}_{22} y + M^{-1}_{23}}{M^{-1}_{31} x + M^{-1}_{32} y + M^{-1}_{33}} \right ) Args: src: input image with shape :math:`(B, C, H, W)`. M: transformation matrix with shape :math:`(B, 3, 3)`. dsize: size of the output image (height, width). mode: interpolation mode to calculate output values ``'bilinear'`` | ``'nearest'``. padding_mode: padding mode for outside grid values ``'zeros'`` | ``'border'`` | ``'reflection'``. align_corners(bool, optional): interpolation flag. Returns: the warped input image :math:`(B, C, H, W)`. Example: >>> img = torch.rand(1, 4, 5, 6) >>> H = torch.eye(3)[None] >>> out = warp_perspective(img, H, (4, 2), align_corners=True) >>> print(out.shape) torch.Size([1, 4, 4, 2]) .. note:: This function is often used in conjuntion with :func:`get_perspective_transform`. .. note:: See a working example `here <https://kornia-tutorials.readthedocs.io/en/ latest/warp_perspective.html>`_. """ if not isinstance(src, torch.Tensor): raise TypeError("Input src type is not a torch.Tensor. Got {}".format( type(src))) if not isinstance(M, torch.Tensor): raise TypeError("Input M type is not a torch.Tensor. Got {}".format( type(M))) if not len(src.shape) == 4: raise ValueError("Input src must be a BxCxHxW tensor. Got {}".format( src.shape)) if not (len(M.shape) == 3 and M.shape[-2:] == (3, 3)): raise ValueError("Input M must be a Bx3x3 tensor. Got {}".format( M.shape)) # TODO: remove the statement below in kornia v0.6 if align_corners is None: message: str = ( "The align_corners default value has been changed. By default now is set True " "in order to match cv2.warpPerspective. In case you want to keep your previous " "behaviour set it to False. This warning will disappear in kornia > v0.6." ) warnings.warn(message) # set default value for align corners align_corners = True B, C, H, W = src.size() h_out, w_out = dsize # we normalize the 3x3 transformation matrix and convert to 3x4 dst_norm_trans_src_norm: torch.Tensor = normalize_homography( M, (H, W), (h_out, w_out)) # Bx3x3 src_norm_trans_dst_norm = _torch_inverse_cast( dst_norm_trans_src_norm) # Bx3x3 # this piece of code substitutes F.affine_grid since it does not support 3x3 grid = (create_meshgrid(h_out, w_out, normalized_coordinates=True, device=src.device).to(src.dtype).repeat( B, 1, 1, 1)) grid = transform_points(src_norm_trans_dst_norm[:, None, None], grid) return F.grid_sample(src, grid, align_corners=align_corners, mode=mode, padding_mode=padding_mode)
def spvs_coarse(data, config): """ Update: data (dict): { "conf_matrix_gt": [N, hw0, hw1], 'spv_b_ids': [M] 'spv_i_ids': [M] 'spv_j_ids': [M] 'spv_w_pt0_i': [N, hw0, 2], in original image resolution 'spv_pt1_i': [N, hw1, 2], in original image resolution } NOTE: - for scannet dataset, there're 3 kinds of resolution {i, c, f} - for megadepth dataset, there're 4 kinds of resolution {i, i_resize, c, f} """ # 1. misc device = data['image0'].device N, _, H0, W0 = data['image0'].shape _, _, H1, W1 = data['image1'].shape scale = config['LOFTR']['RESOLUTION'][0] scale0 = scale * data['scale0'][:, None] if 'scale0' in data else scale scale1 = scale * data['scale1'][:, None] if 'scale0' in data else scale h0, w0, h1, w1 = map(lambda x: x // scale, [H0, W0, H1, W1]) # 2. warp grids # create kpts in meshgrid and resize them to image resolution grid_pt0_c = create_meshgrid(h0, w0, False, device).reshape(1, h0 * w0, 2).repeat(N, 1, 1) # [N, hw, 2] grid_pt0_i = scale0 * grid_pt0_c grid_pt1_c = create_meshgrid(h1, w1, False, device).reshape(1, h1 * w1, 2).repeat(N, 1, 1) grid_pt1_i = scale1 * grid_pt1_c # mask padded region to (0, 0), so no need to manually mask conf_matrix_gt if 'mask0' in data: grid_pt0_i = mask_pts_at_padded_regions(grid_pt0_i, data['mask0']) grid_pt1_i = mask_pts_at_padded_regions(grid_pt1_i, data['mask1']) # warp kpts bi-directionally and resize them to coarse-level resolution # (no depth consistency check, since it leads to worse results experimentally) # (unhandled edge case: points with 0-depth will be warped to the left-up corner) _, w_pt0_i = warp_kpts(grid_pt0_i, data['depth0'], data['depth1'], data['T_0to1'], data['K0'], data['K1']) _, w_pt1_i = warp_kpts(grid_pt1_i, data['depth1'], data['depth0'], data['T_1to0'], data['K1'], data['K0']) w_pt0_c = w_pt0_i / scale1 w_pt1_c = w_pt1_i / scale0 # 3. check if mutual nearest neighbor w_pt0_c_round = w_pt0_c[:, :, :].round().long() nearest_index1 = w_pt0_c_round[..., 0] + w_pt0_c_round[..., 1] * w1 w_pt1_c_round = w_pt1_c[:, :, :].round().long() nearest_index0 = w_pt1_c_round[..., 0] + w_pt1_c_round[..., 1] * w0 # corner case: out of boundary def out_bound_mask(pt, w, h): return (pt[..., 0] < 0) + (pt[..., 0] >= w) + (pt[..., 1] < 0) + (pt[..., 1] >= h) nearest_index1[out_bound_mask(w_pt0_c_round, w1, h1)] = 0 nearest_index0[out_bound_mask(w_pt1_c_round, w0, h0)] = 0 loop_back = torch.stack([nearest_index0[_b][_i] for _b, _i in enumerate(nearest_index1)], dim=0) correct_0to1 = loop_back == torch.arange(h0 * w0, device=device)[None].repeat(N, 1) correct_0to1[:, 0] = False # ignore the top-left corner # 4. construct a gt conf_matrix conf_matrix_gt = torch.zeros(N, h0 * w0, h1 * w1, device=device) b_ids, i_ids = torch.where(correct_0to1 != 0) j_ids = nearest_index1[b_ids, i_ids] conf_matrix_gt[b_ids, i_ids, j_ids] = 1 data.update({'conf_matrix_gt': conf_matrix_gt}) # 5. save coarse matches(gt) for training fine level if len(b_ids) == 0: # this won't affect fine-level loss calculation b_ids = torch.tensor([0], device=device) i_ids = torch.tensor([0], device=device) j_ids = torch.tensor([0], device=device) data.update({ 'spv_b_ids': b_ids, 'spv_i_ids': i_ids, 'spv_j_ids': j_ids }) # 6. save intermediate results (for fast fine-level computation) data.update({ 'spv_w_pt0_i': w_pt0_i, 'spv_pt1_i': grid_pt1_i })