def symmetrical_epipolar_distance(pts1: torch.Tensor, pts2: torch.Tensor, Fm: torch.Tensor, squared: bool = True, eps: float = 1e-8) -> torch.Tensor: r"""Return symmetrical epipolar distance for correspondences given the fundamental matrix. Args: pts1: correspondences from the left images with shape (B, N, 2 or 3). If they are not homogeneous, converted automatically. pts2: correspondences from the right images with shape (B, N, 2 or 3). If they are not homogeneous, converted automatically. Fm: Fundamental matrices with shape :math:`(B, 3, 3)`. Called Fm to avoid ambiguity with torch.nn.functional. squared: if True (default), the squared distance is returned. eps: Small constant for safe sqrt. Returns: the computed Symmetrical distance with shape :math:`(B, N)`. """ if not isinstance(Fm, torch.Tensor): raise TypeError(f"Fm type is not a torch.Tensor. Got {type(Fm)}") if (len(Fm.shape) != 3) or not Fm.shape[-2:] == (3, 3): raise ValueError(f"Fm must be a (*, 3, 3) tensor. Got {Fm.shape}") if pts1.size(-1) == 2: pts1 = convert_points_to_homogeneous(pts1) if pts2.size(-1) == 2: pts2 = convert_points_to_homogeneous(pts2) # From Hartley and Zisserman, symmetric epipolar distance (11.10) # sed = (x'^T F x) ** 2 / (((Fx)_1**2) + (Fx)_2**2)) + 1/ (((F^Tx')_1**2) + (F^Tx')_2**2)) # line1_in_2: torch.Tensor = (F @ pts1.permute(0,2,1)).permute(0,2,1) # line2_in_1: torch.Tensor = (F.permute(0,2,1) @ pts2.permute(0,2,1)).permute(0,2,1) # Instead we can just transpose F once and switch the order of multiplication F_t: torch.Tensor = Fm.permute(0, 2, 1) line1_in_2: torch.Tensor = pts1 @ F_t line2_in_1: torch.Tensor = pts2 @ Fm # numerator = (x'^T F x) ** 2 numerator: torch.Tensor = (pts2 * line1_in_2).sum(2).pow(2) # denominator_inv = 1/ (((Fx)_1**2) + (Fx)_2**2)) + 1/ (((F^Tx')_1**2) + (F^Tx')_2**2)) denominator_inv: torch.Tensor = 1.0 / (line1_in_2[..., :2].norm( 2, dim=2).pow(2)) + 1.0 / (line2_in_1[..., :2].norm(2, dim=2).pow(2)) out: torch.Tensor = numerator * denominator_inv if squared: return out return (out + eps).sqrt()
def compute_correspond_epilines(points: torch.Tensor, F_mat: torch.Tensor) -> torch.Tensor: r"""Compute the corresponding epipolar line for a given set of points. Args: points: tensor containing the set of points to project in the shape of :math:`(B, N, 2)`. F_mat: the fundamental to use for projection the points in the shape of :math:`(B, 3, 3)`. Returns: a tensor with shape :math:`(B, N, 3)` containing a vector of the epipolar lines corresponding to the points to the other image. Each line is described as :math:`ax + by + c = 0` and encoding the vectors as :math:`(a, b, c)`. """ if not (len(points.shape) == 3 and points.shape[2] == 2): raise AssertionError(points.shape) if not (len(F_mat.shape) == 3 and F_mat.shape[-2:] == (3, 3)): raise AssertionError(F_mat.shape) points_h: torch.Tensor = convert_points_to_homogeneous(points) # project points and retrieve lines components a, b, c = torch.chunk(F_mat @ points_h.permute(0, 2, 1), dim=1, chunks=3) # compute normal and compose equation line nu: torch.Tensor = a * a + b * b nu = torch.where(nu > 0.0, 1.0 / torch.sqrt(nu), torch.ones_like(nu)) line = torch.cat([a * nu, b * nu, c * nu], dim=1) # Bx3xN return line.permute(0, 2, 1) # BxNx3
def symmetric_epipolar_distance(pts0, pts1, E, K0, K1): """Squared symmetric epipolar distance. This can be seen as a biased estimation of the reprojection error. Args: pts0 (torch.Tensor): [N, 2] E (torch.Tensor): [3, 3] """ pts0 = (pts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None] pts1 = (pts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None] pts0 = convert_points_to_homogeneous(pts0) pts1 = convert_points_to_homogeneous(pts1) Ep0 = pts0 @ E.T # [N, 3] p1Ep0 = torch.sum(pts1 * Ep0, -1) # [N,] Etp1 = pts1 @ E # [N, 3] d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2) + 1.0 / (Etp1[:, 0]**2 + Etp1[:, 1]**2)) # N return d
def transform_points(trans_01: torch.Tensor, points_1: torch.Tensor) -> torch.Tensor: r"""Function that applies transformations to a set of points. Args: trans_01 (torch.Tensor): tensor for transformations of shape :math:`(B, D+1, D+1)`. points_1 (torch.Tensor): tensor of points of shape :math:`(B, N, D)`. Returns: torch.Tensor: tensor of N-dimensional points. Shape: - Output: :math:`(B, N, D)` Examples: >>> points_1 = torch.rand(2, 4, 3) # BxNx3 >>> trans_01 = torch.eye(4).view(1, 4, 4) # Bx4x4 >>> points_0 = transform_points(trans_01, points_1) # BxNx3 """ check_is_tensor(trans_01) check_is_tensor(points_1) if not (trans_01.device == points_1.device and trans_01.dtype == points_1.dtype): raise TypeError( "Tensor must be in the same device and dtype. " f"Got trans_01 with ({trans_01.dtype}, {points_1.dtype}) and " f"points_1 with ({points_1.dtype}, {points_1.dtype})") if not trans_01.shape[0] == points_1.shape[0] and trans_01.shape[0] != 1: raise ValueError( "Input batch size must be the same for both tensors or 1") if not trans_01.shape[-1] == (points_1.shape[-1] + 1): raise ValueError("Last input dimensions must differ by one unit") # We reshape to BxNxD in case we get more dimensions, e.g., MxBxNxD shape_inp = list(points_1.shape) points_1 = points_1.reshape(-1, points_1.shape[-2], points_1.shape[-1]) trans_01 = trans_01.reshape(-1, trans_01.shape[-2], trans_01.shape[-1]) # We expand trans_01 to match the dimensions needed for bmm trans_01 = torch.repeat_interleave(trans_01, repeats=points_1.shape[0] // trans_01.shape[0], dim=0) # to homogeneous points_1_h = convert_points_to_homogeneous(points_1) # BxNxD+1 # transform coordinates points_0_h = torch.bmm(points_1_h, trans_01.permute(0, 2, 1)) points_0_h = torch.squeeze(points_0_h, dim=-1) # to euclidean points_0 = convert_points_from_homogeneous(points_0_h) # BxNxD # reshape to the input shape shape_inp[-2] = points_0.shape[-2] shape_inp[-1] = points_0.shape[-1] points_0 = points_0.reshape(shape_inp) return points_0
def project_to_image(project, points): """ Project points to image Args: project [torch.tensor(..., 3, 4)]: Projection matrix points [torch.Tensor(..., 3)]: 3D points Returns: points_img [torch.Tensor(..., 2)]: Points in image points_depth [torch.Tensor(...)]: Depth of each point """ # Reshape tensors to expected shape points = convert_points_to_homogeneous(points) points = points.unsqueeze(dim=-1) project = project.unsqueeze(dim=1) # Transform points to image and get depths points_t = project @ points points_t = points_t.squeeze(dim=-1) points_img = convert_points_from_homogeneous(points_t) points_depth = points_t[..., -1] - project[..., 2, 3] return points_img, points_depth
def transform_points(trans_01: torch.Tensor, points_1: torch.Tensor) -> torch.Tensor: r"""Function that applies transformations to a set of points. Args: trans_01 (torch.Tensor): tensor for transformations of shape :math:`(B, D+1, D+1)`. points_1 (torch.Tensor): tensor of points of shape :math:`(B, N, D)`. Returns: torch.Tensor: tensor of N-dimensional points. Shape: - Output: :math:`(B, N, D)` Examples: >>> points_1 = torch.rand(2, 4, 3) # BxNx3 >>> trans_01 = torch.eye(4).view(1, 4, 4) # Bx4x4 >>> points_0 = kornia.transform_points(trans_01, points_1) # BxNx3 """ if not torch.is_tensor(trans_01) or not torch.is_tensor(points_1): raise TypeError("Input type is not a torch.Tensor") if not trans_01.device == points_1.device: raise TypeError("Tensor must be in the same device") if not trans_01.shape[0] == points_1.shape[0] and trans_01.shape[0] != 1: raise ValueError( "Input batch size must be the same for both tensors or 1") if not trans_01.shape[-1] == (points_1.shape[-1] + 1): raise ValueError("Last input dimensions must differe by one unit") # to homogeneous points_1_h = convert_points_to_homogeneous(points_1) # BxNxD+1 # transform coordinates points_0_h = torch.matmul(trans_01.unsqueeze(1), points_1_h.unsqueeze(-1)) points_0_h = torch.squeeze(points_0_h, dim=-1) # to euclidean points_0 = convert_points_from_homogeneous(points_0_h) # BxNxD return points_0
def unproject_points(point_2d: torch.Tensor, depth: torch.Tensor, camera_matrix: torch.Tensor, normalize: bool = False) -> torch.Tensor: r"""Unprojects a 2d point in 3d. Transform coordinates in the pixel frame to the camera frame. Args: point2d: tensor containing the 2d to be projected to world coordinates. The shape of the tensor can be :math:`(*, 2)`. depth: tensor containing the depth value of each 2d points. The tensor shape must be equal to point2d :math:`(*, 1)`. camera_matrix: tensor containing the intrinsics camera matrix. The tensor shape must be :math:`(*, 3, 3)`. normalize: whether to normalize the pointcloud. This must be set to `True` when the depth is represented as the Euclidean ray length from the camera position. Returns: tensor of (x, y, z) world coordinates with shape :math:`(*, 3)`. """ if not isinstance(point_2d, torch.Tensor): raise TypeError( "Input point_2d type is not a torch.Tensor. Got {}".format( type(point_2d))) if not isinstance(depth, torch.Tensor): raise TypeError( "Input depth type is not a torch.Tensor. Got {}".format( type(depth))) if not isinstance(camera_matrix, torch.Tensor): raise TypeError( "Input camera_matrix type is not a torch.Tensor. Got {}".format( type(camera_matrix))) if not (point_2d.device == depth.device == camera_matrix.device): raise ValueError("Input tensors must be all in the same device.") if not point_2d.shape[-1] == 2: raise ValueError("Input points_2d must be in the shape of (*, 2)." " Got {}".format(point_2d.shape)) if not depth.shape[-1] == 1: raise ValueError("Input depth must be in the shape of (*, 1)." " Got {}".format(depth.shape)) if not camera_matrix.shape[-2:] == (3, 3): raise ValueError( "Input camera_matrix must be in the shape of (*, 3, 3).") # projection eq. K_inv * [u v 1]' # x = (u - cx) * Z / fx # y = (v - cy) * Z / fy # unpack coordinates u_coord: torch.Tensor = point_2d[..., 0] v_coord: torch.Tensor = point_2d[..., 1] # unpack intrinsics fx: torch.Tensor = camera_matrix[..., 0, 0] fy: torch.Tensor = camera_matrix[..., 1, 1] cx: torch.Tensor = camera_matrix[..., 0, 2] cy: torch.Tensor = camera_matrix[..., 1, 2] # projective x_coord: torch.Tensor = (u_coord - cx) / fx y_coord: torch.Tensor = (v_coord - cy) / fy xyz: torch.Tensor = torch.stack([x_coord, y_coord], dim=-1) xyz = convert_points_to_homogeneous(xyz) if normalize: xyz = F.normalize(xyz, dim=-1, p=2.0) return xyz * depth
def _create_meshgrid(height: int, width: int) -> torch.Tensor: grid: torch.Tensor = create_meshgrid( height, width, normalized_coordinates=False) # 1xHxWx2 return convert_points_to_homogeneous(grid) # append ones to last dim
def solve_pnp_dlt( world_points: torch.Tensor, img_points: torch.Tensor, intrinsics: torch.Tensor, weights: Optional[torch.Tensor] = None, svd_eps: float = 1e-4, ) -> torch.Tensor: r"""This function attempts to solve the Perspective-n-Point (PnP) problem using Direct Linear Transform (DLT). Given a batch (where batch size is :math:`B`) of :math:`N` 3D points (where :math:`N \geq 6`) in the world space, a batch of :math:`N` corresponding 2D points in the image space and a batch of intrinsic matrices, this function tries to estimate a batch of world to camera transformation matrices. This implementation needs at least 6 points (i.e. :math:`N \geq 6`) to provide solutions. This function cannot be used if all the 3D world points (of any element of the batch) lie on a line or if all the 3D world points (of any element of the batch) lie on a plane. This function attempts to check for these conditions and throws an AssertionError if found. Do note that this check is sensitive to the value of the svd_eps parameter. Another bad condition occurs when the camera and the points lie on a twisted cubic. However, this function does not check for this condition. Args: world_points : A tensor with shape :math:`(B, N, 3)` representing the points in the world space. img_points : A tensor with shape :math:`(B, N, 2)` representing the points in the image space. intrinsics : A tensor with shape :math:`(B, 3, 3)` representing the intrinsic matrices. weights : This parameter is not used currently and is just a placeholder for API consistency. svd_eps : A small float value to avoid numerical precision issues. Returns: A tensor with shape :math:`(B, 3, 4)` representing the estimated world to camera transformation matrices (also known as the extrinsic matrices). Example: >>> world_points = torch.tensor([[ ... [ 5. , -5. , 0. ], [ 0. , 0. , 1.5], ... [ 2.5, 3. , 6. ], [ 9. , -2. , 3. ], ... [-4. , 5. , 2. ], [-5. , 5. , 1. ], ... ]], dtype=torch.float64) >>> >>> img_points = torch.tensor([[ ... [1409.1504, -800.936 ], [ 407.0207, -182.1229], ... [ 392.7021, 177.9428], [1016.838 , -2.9416], ... [ -63.1116, 142.9204], [-219.3874, 99.666 ], ... ]], dtype=torch.float64) >>> >>> intrinsics = torch.tensor([[ ... [ 500., 0., 250.], ... [ 0., 500., 250.], ... [ 0., 0., 1.], ... ]], dtype=torch.float64) >>> >>> print(world_points.shape, img_points.shape, intrinsics.shape) torch.Size([1, 6, 3]) torch.Size([1, 6, 2]) torch.Size([1, 3, 3]) >>> >>> pred_world_to_cam = kornia.geometry.solve_pnp_dlt(world_points, img_points, intrinsics) >>> >>> print(pred_world_to_cam.shape) torch.Size([1, 3, 4]) >>> >>> pred_world_to_cam tensor([[[ 0.9392, -0.3432, -0.0130, 1.6734], [ 0.3390, 0.9324, -0.1254, -4.3634], [ 0.0552, 0.1134, 0.9920, 3.7785]]], dtype=torch.float64) """ # This function was implemented based on ideas inspired from multiple references. # ============ # References: # ============ # 1. https://team.inria.fr/lagadic/camera_localization/tutorial-pose-dlt-opencv.html # 2. https://github.com/opencv/opencv/blob/68d15fc62edad980f1ffa15ee478438335f39cc3/modules/calib3d/src/calibration.cpp # noqa: E501 # 3. http://rpg.ifi.uzh.ch/docs/teaching/2020/03_camera_calibration.pdf # 4. http://www.cs.cmu.edu/~16385/s17/Slides/11.3_Pose_Estimation.pdf # 5. https://www.ece.mcmaster.ca/~shirani/vision/hartley_ch7.pdf if not isinstance(world_points, torch.Tensor): raise AssertionError( f"world_points is not an instance of torch.Tensor. Type of world_points is {type(world_points)}" ) if not isinstance(img_points, torch.Tensor): raise AssertionError( f"img_points is not an instance of torch.Tensor. Type of img_points is {type(img_points)}" ) if not isinstance(intrinsics, torch.Tensor): raise AssertionError( f"intrinsics is not an instance of torch.Tensor. Type of intrinsics is {type(intrinsics)}" ) if (weights is not None) and (not isinstance(weights, torch.Tensor)): raise AssertionError( f"If weights is not None, then weights should be an instance " f"of torch.Tensor. Type of weights is {type(weights)}") if type(svd_eps) is not float: raise AssertionError( f"Type of svd_eps is not float. Got {type(svd_eps)}") accepted_dtypes = (torch.float32, torch.float64) if world_points.dtype not in accepted_dtypes: raise AssertionError( f"world_points must have one of the following dtypes {accepted_dtypes}. " f"Currently it has {world_points.dtype}.") if img_points.dtype not in accepted_dtypes: raise AssertionError( f"img_points must have one of the following dtypes {accepted_dtypes}. " f"Currently it has {img_points.dtype}.") if intrinsics.dtype not in accepted_dtypes: raise AssertionError( f"intrinsics must have one of the following dtypes {accepted_dtypes}. " f"Currently it has {intrinsics.dtype}.") if (len(world_points.shape) != 3) or (world_points.shape[2] != 3): raise AssertionError( f"world_points must be of shape (B, N, 3). Got shape {world_points.shape}." ) if (len(img_points.shape) != 3) or (img_points.shape[2] != 2): raise AssertionError( f"img_points must be of shape (B, N, 2). Got shape {img_points.shape}." ) if (len(intrinsics.shape) != 3) or (intrinsics.shape[1:] != (3, 3)): raise AssertionError( f"intrinsics must be of shape (B, 3, 3). Got shape {intrinsics.shape}." ) if world_points.shape[1] != img_points.shape[1]: raise AssertionError( "world_points and img_points must have equal number of points.") if (world_points.shape[0] != img_points.shape[0]) or ( world_points.shape[0] != intrinsics.shape[0]): raise AssertionError( "world_points, img_points and intrinsics must have the same batch size." ) if world_points.shape[1] < 6: raise AssertionError( f"At least 6 points are required to use this function. " f"Got {world_points.shape[1]} points.") B, N = world_points.shape[:2] # Getting normalized world points. world_points_norm, world_transform_norm = _mean_isotropic_scale_normalize( world_points) # Checking if world_points_norm (of any element of the batch) has rank = 3. This # function cannot be used if all world points (of any element of the batch) lie # on a line or if all world points (of any element of the batch) lie on a plane. _, s, _ = torch.svd(world_points_norm) if torch.any(s[:, -1] < svd_eps): raise AssertionError( f"The last singular value of one/more of the elements of the batch is smaller " f"than {svd_eps}. This function cannot be used if all world_points (of any " f"element of the batch) lie on a line or if all world_points (of any " f"element of the batch) lie on a plane.") intrinsics_inv = torch.inverse(intrinsics) world_points_norm_h = convert_points_to_homogeneous(world_points_norm) # Transforming img_points with intrinsics_inv to get img_points_inv img_points_inv = transform_points(intrinsics_inv, img_points) # Normalizing img_points_inv img_points_norm, img_transform_norm = _mean_isotropic_scale_normalize( img_points_inv) inv_img_transform_norm = torch.inverse(img_transform_norm) # Setting up the system (the matrix A in Ax=0) system = torch.zeros((B, 2 * N, 12), dtype=world_points.dtype, device=world_points.device) system[:, 0::2, 0:4] = world_points_norm_h system[:, 1::2, 4:8] = world_points_norm_h system[:, 0::2, 8:12] = world_points_norm_h * (-1) * img_points_norm[..., 0:1] system[:, 1::2, 8:12] = world_points_norm_h * (-1) * img_points_norm[..., 1:2] # Getting the solution vectors. _, _, v = torch.svd(system) solution = v[..., -1] # Reshaping the solution vectors to the correct shape. solution = solution.reshape(B, 3, 4) # Creating solution_4x4 solution_4x4 = eye_like(4, solution) solution_4x4[:, :3, :] = solution # De-normalizing the solution intermediate = torch.bmm(solution_4x4, world_transform_norm) solution = torch.bmm(inv_img_transform_norm, intermediate[:, :3, :]) # We obtained one solution for each element of the batch. We may # need to multiply each solution with a scalar. This is because # if x is a solution to Ax=0, then cx is also a solution. We can # find the required scalars by using the properties of # rotation matrices. We do this in two parts: # First, we fix the sign by making sure that the determinant of # the all the rotation matrices are non negative (since determinant # of a rotation matrix should be 1). det = torch.det(solution[:, :3, :3]) ones = torch.ones_like(det) sign_fix = torch.where(det < 0, ones * -1, ones) solution = solution * sign_fix[:, None, None] # Then, we make sure that norm of the 0th columns of the rotation # matrices are 1. Do note that the norm of any column of a rotation # matrix should be 1. Here we use the 0th column to calculate norm_col. # We then multiply solution with mul_factor. norm_col = torch.norm(input=solution[:, :3, 0], p=2, dim=1) mul_factor = (1 / norm_col)[:, None, None] temp = solution * mul_factor # To make sure that the rotation matrix would be orthogonal, we apply # QR decomposition. ortho, right = linalg_qr(temp[:, :3, :3]) # We may need to fix the signs of the columns of the ortho matrix. # If right[i, j, j] is negative, then we need to flip the signs of # the column ortho[i, :, j]. The below code performs the necessary # operations in an better way. mask = eye_like(3, ortho) col_sign_fix = torch.sign(mask * right) rot_mat = torch.bmm(ortho, col_sign_fix) # Preparing the final output. pred_world_to_cam = torch.cat([rot_mat, temp[:, :3, 3:4]], dim=-1) # TODO: Implement algorithm to refine the solution. return pred_world_to_cam
def unproject_points( point_2d: torch.Tensor, depth: torch.Tensor, camera_matrix: torch.Tensor, normalize: Optional[bool] = False) -> torch.Tensor: r"""Unprojects a 2d point in 3d. Transform coordinates in the pixel frame to the camera frame. Args: point2d (torch.Tensor): tensor containing the 2d to be projected to world coordinates. The shape of the tensor can be :math:`(*, 2)`. depth (torch.Tensor): tensor containing the depth value of each 2d points. The tensor shape must be equal to point2d :math:`(*, 1)`. camera_matrix (torch.Tensor): tensor containing the intrinsics camera matrix. The tensor shape must be Bx4x4. normalize (Optional[bool]): wether to normalize the pointcloud. This must be set to `True` when the depth is represented as the Euclidean ray length from the camera position. Default is `False`. Returns: torch.Tensor: tensor of (x, y, z) world coordinates with shape :math:`(*, 3)`. """ if not torch.is_tensor(point_2d): raise TypeError("Input point_2d type is not a torch.Tensor. Got {}" .format(type(point_2d))) if not torch.is_tensor(depth): raise TypeError("Input depth type is not a torch.Tensor. Got {}" .format(type(depth))) if not torch.is_tensor(camera_matrix): raise TypeError("Input camera_matrix type is not a torch.Tensor. Got {}" .format(type(camera_matrix))) if not (point_2d.device == depth.device == camera_matrix.device): raise ValueError("Input tensors must be all in the same device.") if not point_2d.shape[-1] == 2: raise ValueError("Input points_2d must be in the shape of (*, 2)." " Got {}".format(point_2d.shape)) if not depth.shape[-1] == 1: raise ValueError("Input depth must be in the shape of (*, 1)." " Got {}".format(depth.shape)) if not camera_matrix.shape[-2:] == (3, 3): raise ValueError( "Input camera_matrix must be in the shape of (*, 3, 3).") # projection eq. K_inv * [u v 1]' # inverse the camera matrix camera_matrix_inv: torch.Tensor = torch.inverse(camera_matrix) # compute ray from center to camera uvw: torch.Tensor = convert_points_to_homogeneous(point_2d) # apply inverse intrinsics to points xyz: torch.Tensor = torch.matmul( camera_matrix_inv.view(-1, 3, 3), uvw.view(-1, 3, 1)) # back to input shape and normalize if specified xyz_norm: torch.Tensor = xyz.view((*point_2d.shape[:-1], 3)) if normalize: xyz_norm = F.normalize(xyz_norm, dim=-1, p=2) # apply depth return xyz_norm * depth