Beispiel #1
0
def symmetrical_epipolar_distance(pts1: torch.Tensor,
                                  pts2: torch.Tensor,
                                  Fm: torch.Tensor,
                                  squared: bool = True,
                                  eps: float = 1e-8) -> torch.Tensor:
    r"""Return symmetrical epipolar distance for correspondences given the fundamental matrix.

    Args:
       pts1: correspondences from the left images with shape
         (B, N, 2 or 3). If they are not homogeneous, converted automatically.
       pts2: correspondences from the right images with shape
         (B, N, 2 or 3). If they are not homogeneous, converted automatically.
       Fm: Fundamental matrices with shape :math:`(B, 3, 3)`. Called Fm to
         avoid ambiguity with torch.nn.functional.
       squared: if True (default), the squared distance is returned.
       eps: Small constant for safe sqrt.

    Returns:
        the computed Symmetrical distance with shape :math:`(B, N)`.

    """
    if not isinstance(Fm, torch.Tensor):
        raise TypeError(f"Fm type is not a torch.Tensor. Got {type(Fm)}")

    if (len(Fm.shape) != 3) or not Fm.shape[-2:] == (3, 3):
        raise ValueError(f"Fm must be a (*, 3, 3) tensor. Got {Fm.shape}")

    if pts1.size(-1) == 2:
        pts1 = convert_points_to_homogeneous(pts1)

    if pts2.size(-1) == 2:
        pts2 = convert_points_to_homogeneous(pts2)

    # From Hartley and Zisserman, symmetric epipolar distance (11.10)
    # sed = (x'^T F x) ** 2 /  (((Fx)_1**2) + (Fx)_2**2)) +  1/ (((F^Tx')_1**2) + (F^Tx')_2**2))

    # line1_in_2: torch.Tensor = (F @ pts1.permute(0,2,1)).permute(0,2,1)
    # line2_in_1: torch.Tensor = (F.permute(0,2,1) @ pts2.permute(0,2,1)).permute(0,2,1)

    # Instead we can just transpose F once and switch the order of multiplication
    F_t: torch.Tensor = Fm.permute(0, 2, 1)
    line1_in_2: torch.Tensor = pts1 @ F_t
    line2_in_1: torch.Tensor = pts2 @ Fm

    # numerator = (x'^T F x) ** 2
    numerator: torch.Tensor = (pts2 * line1_in_2).sum(2).pow(2)

    # denominator_inv =  1/ (((Fx)_1**2) + (Fx)_2**2)) +  1/ (((F^Tx')_1**2) + (F^Tx')_2**2))
    denominator_inv: torch.Tensor = 1.0 / (line1_in_2[..., :2].norm(
        2, dim=2).pow(2)) + 1.0 / (line2_in_1[..., :2].norm(2, dim=2).pow(2))
    out: torch.Tensor = numerator * denominator_inv
    if squared:
        return out
    return (out + eps).sqrt()
Beispiel #2
0
def compute_correspond_epilines(points: torch.Tensor,
                                F_mat: torch.Tensor) -> torch.Tensor:
    r"""Compute the corresponding epipolar line for a given set of points.

    Args:
        points: tensor containing the set of points to project in the shape of :math:`(B, N, 2)`.
        F_mat: the fundamental to use for projection the points in the shape of :math:`(B, 3, 3)`.

    Returns:
        a tensor with shape :math:`(B, N, 3)` containing a vector of the epipolar
        lines corresponding to the points to the other image. Each line is described as
        :math:`ax + by + c = 0` and encoding the vectors as :math:`(a, b, c)`.

    """
    if not (len(points.shape) == 3 and points.shape[2] == 2):
        raise AssertionError(points.shape)
    if not (len(F_mat.shape) == 3 and F_mat.shape[-2:] == (3, 3)):
        raise AssertionError(F_mat.shape)

    points_h: torch.Tensor = convert_points_to_homogeneous(points)

    # project points and retrieve lines components
    a, b, c = torch.chunk(F_mat @ points_h.permute(0, 2, 1), dim=1, chunks=3)

    # compute normal and compose equation line
    nu: torch.Tensor = a * a + b * b
    nu = torch.where(nu > 0.0, 1.0 / torch.sqrt(nu), torch.ones_like(nu))

    line = torch.cat([a * nu, b * nu, c * nu], dim=1)  # Bx3xN
    return line.permute(0, 2, 1)  # BxNx3
Beispiel #3
0
def symmetric_epipolar_distance(pts0, pts1, E, K0, K1):
    """Squared symmetric epipolar distance.
    This can be seen as a biased estimation of the reprojection error.
    Args:
        pts0 (torch.Tensor): [N, 2]
        E (torch.Tensor): [3, 3]
    """
    pts0 = (pts0 - K0[[0, 1], [2, 2]][None]) / K0[[0, 1], [0, 1]][None]
    pts1 = (pts1 - K1[[0, 1], [2, 2]][None]) / K1[[0, 1], [0, 1]][None]
    pts0 = convert_points_to_homogeneous(pts0)
    pts1 = convert_points_to_homogeneous(pts1)

    Ep0 = pts0 @ E.T  # [N, 3]
    p1Ep0 = torch.sum(pts1 * Ep0, -1)  # [N,]
    Etp1 = pts1 @ E  # [N, 3]

    d = p1Ep0**2 * (1.0 / (Ep0[:, 0]**2 + Ep0[:, 1]**2) + 1.0 /
                    (Etp1[:, 0]**2 + Etp1[:, 1]**2))  # N
    return d
Beispiel #4
0
def transform_points(trans_01: torch.Tensor,
                     points_1: torch.Tensor) -> torch.Tensor:
    r"""Function that applies transformations to a set of points.

    Args:
        trans_01 (torch.Tensor): tensor for transformations of shape
          :math:`(B, D+1, D+1)`.
        points_1 (torch.Tensor): tensor of points of shape :math:`(B, N, D)`.
    Returns:
        torch.Tensor: tensor of N-dimensional points.

    Shape:
        - Output: :math:`(B, N, D)`

    Examples:

        >>> points_1 = torch.rand(2, 4, 3)  # BxNx3
        >>> trans_01 = torch.eye(4).view(1, 4, 4)  # Bx4x4
        >>> points_0 = transform_points(trans_01, points_1)  # BxNx3
    """
    check_is_tensor(trans_01)
    check_is_tensor(points_1)
    if not (trans_01.device == points_1.device
            and trans_01.dtype == points_1.dtype):
        raise TypeError(
            "Tensor must be in the same device and dtype. "
            f"Got trans_01 with ({trans_01.dtype}, {points_1.dtype}) and "
            f"points_1 with ({points_1.dtype}, {points_1.dtype})")
    if not trans_01.shape[0] == points_1.shape[0] and trans_01.shape[0] != 1:
        raise ValueError(
            "Input batch size must be the same for both tensors or 1")
    if not trans_01.shape[-1] == (points_1.shape[-1] + 1):
        raise ValueError("Last input dimensions must differ by one unit")

    # We reshape to BxNxD in case we get more dimensions, e.g., MxBxNxD
    shape_inp = list(points_1.shape)
    points_1 = points_1.reshape(-1, points_1.shape[-2], points_1.shape[-1])
    trans_01 = trans_01.reshape(-1, trans_01.shape[-2], trans_01.shape[-1])
    # We expand trans_01 to match the dimensions needed for bmm
    trans_01 = torch.repeat_interleave(trans_01,
                                       repeats=points_1.shape[0] //
                                       trans_01.shape[0],
                                       dim=0)
    # to homogeneous
    points_1_h = convert_points_to_homogeneous(points_1)  # BxNxD+1
    # transform coordinates
    points_0_h = torch.bmm(points_1_h, trans_01.permute(0, 2, 1))
    points_0_h = torch.squeeze(points_0_h, dim=-1)
    # to euclidean
    points_0 = convert_points_from_homogeneous(points_0_h)  # BxNxD
    # reshape to the input shape
    shape_inp[-2] = points_0.shape[-2]
    shape_inp[-1] = points_0.shape[-1]
    points_0 = points_0.reshape(shape_inp)
    return points_0
Beispiel #5
0
def project_to_image(project, points):
    """
    Project points to image
    Args:
        project [torch.tensor(..., 3, 4)]: Projection matrix
        points [torch.Tensor(..., 3)]: 3D points
    Returns:
        points_img [torch.Tensor(..., 2)]: Points in image
        points_depth [torch.Tensor(...)]: Depth of each point
    """
    # Reshape tensors to expected shape
    points = convert_points_to_homogeneous(points)
    points = points.unsqueeze(dim=-1)
    project = project.unsqueeze(dim=1)

    # Transform points to image and get depths
    points_t = project @ points
    points_t = points_t.squeeze(dim=-1)
    points_img = convert_points_from_homogeneous(points_t)
    points_depth = points_t[..., -1] - project[..., 2, 3]

    return points_img, points_depth
Beispiel #6
0
def transform_points(trans_01: torch.Tensor,
                     points_1: torch.Tensor) -> torch.Tensor:
    r"""Function that applies transformations to a set of points.

    Args:
        trans_01 (torch.Tensor): tensor for transformations of shape
          :math:`(B, D+1, D+1)`.
        points_1 (torch.Tensor): tensor of points of shape :math:`(B, N, D)`.
    Returns:
        torch.Tensor: tensor of N-dimensional points.

    Shape:
        - Output: :math:`(B, N, D)`

    Examples:

        >>> points_1 = torch.rand(2, 4, 3)  # BxNx3
        >>> trans_01 = torch.eye(4).view(1, 4, 4)  # Bx4x4
        >>> points_0 = kornia.transform_points(trans_01, points_1)  # BxNx3
    """
    if not torch.is_tensor(trans_01) or not torch.is_tensor(points_1):
        raise TypeError("Input type is not a torch.Tensor")
    if not trans_01.device == points_1.device:
        raise TypeError("Tensor must be in the same device")
    if not trans_01.shape[0] == points_1.shape[0] and trans_01.shape[0] != 1:
        raise ValueError(
            "Input batch size must be the same for both tensors or 1")
    if not trans_01.shape[-1] == (points_1.shape[-1] + 1):
        raise ValueError("Last input dimensions must differe by one unit")
    # to homogeneous
    points_1_h = convert_points_to_homogeneous(points_1)  # BxNxD+1
    # transform coordinates
    points_0_h = torch.matmul(trans_01.unsqueeze(1), points_1_h.unsqueeze(-1))
    points_0_h = torch.squeeze(points_0_h, dim=-1)
    # to euclidean
    points_0 = convert_points_from_homogeneous(points_0_h)  # BxNxD
    return points_0
Beispiel #7
0
def unproject_points(point_2d: torch.Tensor,
                     depth: torch.Tensor,
                     camera_matrix: torch.Tensor,
                     normalize: bool = False) -> torch.Tensor:
    r"""Unprojects a 2d point in 3d.

    Transform coordinates in the pixel frame to the camera frame.

    Args:
        point2d: tensor containing the 2d to be projected to
            world coordinates. The shape of the tensor can be :math:`(*, 2)`.
        depth: tensor containing the depth value of each 2d
            points. The tensor shape must be equal to point2d :math:`(*, 1)`.
        camera_matrix: tensor containing the intrinsics camera
            matrix. The tensor shape must be :math:`(*, 3, 3)`.
        normalize: whether to normalize the pointcloud. This
            must be set to `True` when the depth is represented as the Euclidean
            ray length from the camera position.

    Returns:
        tensor of (x, y, z) world coordinates with shape :math:`(*, 3)`.
    """
    if not isinstance(point_2d, torch.Tensor):
        raise TypeError(
            "Input point_2d type is not a torch.Tensor. Got {}".format(
                type(point_2d)))

    if not isinstance(depth, torch.Tensor):
        raise TypeError(
            "Input depth type is not a torch.Tensor. Got {}".format(
                type(depth)))

    if not isinstance(camera_matrix, torch.Tensor):
        raise TypeError(
            "Input camera_matrix type is not a torch.Tensor. Got {}".format(
                type(camera_matrix)))

    if not (point_2d.device == depth.device == camera_matrix.device):
        raise ValueError("Input tensors must be all in the same device.")

    if not point_2d.shape[-1] == 2:
        raise ValueError("Input points_2d must be in the shape of (*, 2)."
                         " Got {}".format(point_2d.shape))

    if not depth.shape[-1] == 1:
        raise ValueError("Input depth must be in the shape of (*, 1)."
                         " Got {}".format(depth.shape))

    if not camera_matrix.shape[-2:] == (3, 3):
        raise ValueError(
            "Input camera_matrix must be in the shape of (*, 3, 3).")

    # projection eq. K_inv * [u v 1]'
    # x = (u - cx) * Z / fx
    # y = (v - cy) * Z / fy

    # unpack coordinates
    u_coord: torch.Tensor = point_2d[..., 0]
    v_coord: torch.Tensor = point_2d[..., 1]

    # unpack intrinsics
    fx: torch.Tensor = camera_matrix[..., 0, 0]
    fy: torch.Tensor = camera_matrix[..., 1, 1]
    cx: torch.Tensor = camera_matrix[..., 0, 2]
    cy: torch.Tensor = camera_matrix[..., 1, 2]

    # projective
    x_coord: torch.Tensor = (u_coord - cx) / fx
    y_coord: torch.Tensor = (v_coord - cy) / fy

    xyz: torch.Tensor = torch.stack([x_coord, y_coord], dim=-1)
    xyz = convert_points_to_homogeneous(xyz)

    if normalize:
        xyz = F.normalize(xyz, dim=-1, p=2.0)

    return xyz * depth
Beispiel #8
0
 def _create_meshgrid(height: int, width: int) -> torch.Tensor:
     grid: torch.Tensor = create_meshgrid(
         height, width, normalized_coordinates=False)  # 1xHxWx2
     return convert_points_to_homogeneous(grid)  # append ones to last dim
Beispiel #9
0
def solve_pnp_dlt(
    world_points: torch.Tensor,
    img_points: torch.Tensor,
    intrinsics: torch.Tensor,
    weights: Optional[torch.Tensor] = None,
    svd_eps: float = 1e-4,
) -> torch.Tensor:
    r"""This function attempts to solve the Perspective-n-Point (PnP)
    problem using Direct Linear Transform (DLT).

    Given a batch (where batch size is :math:`B`) of :math:`N` 3D points
    (where :math:`N \geq 6`) in the world space, a batch of :math:`N`
    corresponding 2D points in the image space and a batch of
    intrinsic matrices, this function tries to estimate a batch of
    world to camera transformation matrices.

    This implementation needs at least 6 points (i.e. :math:`N \geq 6`) to
    provide solutions.

    This function cannot be used if all the 3D world points (of any element
    of the batch) lie on a line or if all the 3D world points (of any element
    of the batch) lie on a plane. This function attempts to check for these
    conditions and throws an AssertionError if found. Do note that this check
    is sensitive to the value of the svd_eps parameter.

    Another bad condition occurs when the camera and the points lie on a
    twisted cubic. However, this function does not check for this condition.

    Args:
        world_points : A tensor with shape :math:`(B, N, 3)` representing
          the points in the world space.
        img_points : A tensor with shape :math:`(B, N, 2)` representing
          the points in the image space.
        intrinsics : A tensor with shape :math:`(B, 3, 3)` representing
          the intrinsic matrices.
        weights : This parameter is not used currently and is just a
          placeholder for API consistency.
        svd_eps : A small float value to avoid numerical precision issues.

    Returns:
        A tensor with shape :math:`(B, 3, 4)` representing the estimated world to
        camera transformation matrices (also known as the extrinsic matrices).

    Example:
        >>> world_points = torch.tensor([[
        ...     [ 5. , -5. ,  0. ], [ 0. ,  0. ,  1.5],
        ...     [ 2.5,  3. ,  6. ], [ 9. , -2. ,  3. ],
        ...     [-4. ,  5. ,  2. ], [-5. ,  5. ,  1. ],
        ... ]], dtype=torch.float64)
        >>>
        >>> img_points = torch.tensor([[
        ...     [1409.1504, -800.936 ], [ 407.0207, -182.1229],
        ...     [ 392.7021,  177.9428], [1016.838 ,   -2.9416],
        ...     [ -63.1116,  142.9204], [-219.3874,   99.666 ],
        ... ]], dtype=torch.float64)
        >>>
        >>> intrinsics = torch.tensor([[
        ...     [ 500.,    0.,  250.],
        ...     [   0.,  500.,  250.],
        ...     [   0.,    0.,    1.],
        ... ]], dtype=torch.float64)
        >>>
        >>> print(world_points.shape, img_points.shape, intrinsics.shape)
        torch.Size([1, 6, 3]) torch.Size([1, 6, 2]) torch.Size([1, 3, 3])
        >>>
        >>> pred_world_to_cam = kornia.geometry.solve_pnp_dlt(world_points, img_points, intrinsics)
        >>>
        >>> print(pred_world_to_cam.shape)
        torch.Size([1, 3, 4])
        >>>
        >>> pred_world_to_cam
        tensor([[[ 0.9392, -0.3432, -0.0130,  1.6734],
                 [ 0.3390,  0.9324, -0.1254, -4.3634],
                 [ 0.0552,  0.1134,  0.9920,  3.7785]]], dtype=torch.float64)
    """
    # This function was implemented based on ideas inspired from multiple references.
    # ============
    # References:
    # ============
    # 1. https://team.inria.fr/lagadic/camera_localization/tutorial-pose-dlt-opencv.html
    # 2. https://github.com/opencv/opencv/blob/68d15fc62edad980f1ffa15ee478438335f39cc3/modules/calib3d/src/calibration.cpp # noqa: E501
    # 3. http://rpg.ifi.uzh.ch/docs/teaching/2020/03_camera_calibration.pdf
    # 4. http://www.cs.cmu.edu/~16385/s17/Slides/11.3_Pose_Estimation.pdf
    # 5. https://www.ece.mcmaster.ca/~shirani/vision/hartley_ch7.pdf

    if not isinstance(world_points, torch.Tensor):
        raise AssertionError(
            f"world_points is not an instance of torch.Tensor. Type of world_points is {type(world_points)}"
        )

    if not isinstance(img_points, torch.Tensor):
        raise AssertionError(
            f"img_points is not an instance of torch.Tensor. Type of img_points is {type(img_points)}"
        )

    if not isinstance(intrinsics, torch.Tensor):
        raise AssertionError(
            f"intrinsics is not an instance of torch.Tensor. Type of intrinsics is {type(intrinsics)}"
        )

    if (weights is not None) and (not isinstance(weights, torch.Tensor)):
        raise AssertionError(
            f"If weights is not None, then weights should be an instance "
            f"of torch.Tensor. Type of weights is {type(weights)}")

    if type(svd_eps) is not float:
        raise AssertionError(
            f"Type of svd_eps is not float. Got {type(svd_eps)}")

    accepted_dtypes = (torch.float32, torch.float64)

    if world_points.dtype not in accepted_dtypes:
        raise AssertionError(
            f"world_points must have one of the following dtypes {accepted_dtypes}. "
            f"Currently it has {world_points.dtype}.")

    if img_points.dtype not in accepted_dtypes:
        raise AssertionError(
            f"img_points must have one of the following dtypes {accepted_dtypes}. "
            f"Currently it has {img_points.dtype}.")

    if intrinsics.dtype not in accepted_dtypes:
        raise AssertionError(
            f"intrinsics must have one of the following dtypes {accepted_dtypes}. "
            f"Currently it has {intrinsics.dtype}.")

    if (len(world_points.shape) != 3) or (world_points.shape[2] != 3):
        raise AssertionError(
            f"world_points must be of shape (B, N, 3). Got shape {world_points.shape}."
        )

    if (len(img_points.shape) != 3) or (img_points.shape[2] != 2):
        raise AssertionError(
            f"img_points must be of shape (B, N, 2). Got shape {img_points.shape}."
        )

    if (len(intrinsics.shape) != 3) or (intrinsics.shape[1:] != (3, 3)):
        raise AssertionError(
            f"intrinsics must be of shape (B, 3, 3). Got shape {intrinsics.shape}."
        )

    if world_points.shape[1] != img_points.shape[1]:
        raise AssertionError(
            "world_points and img_points must have equal number of points.")

    if (world_points.shape[0] != img_points.shape[0]) or (
            world_points.shape[0] != intrinsics.shape[0]):
        raise AssertionError(
            "world_points, img_points and intrinsics must have the same batch size."
        )

    if world_points.shape[1] < 6:
        raise AssertionError(
            f"At least 6 points are required to use this function. "
            f"Got {world_points.shape[1]} points.")

    B, N = world_points.shape[:2]

    # Getting normalized world points.
    world_points_norm, world_transform_norm = _mean_isotropic_scale_normalize(
        world_points)

    # Checking if world_points_norm (of any element of the batch) has rank = 3. This
    # function cannot be used if all world points (of any element of the batch) lie
    # on a line or if all world points (of any element of the batch) lie on a plane.
    _, s, _ = torch.svd(world_points_norm)
    if torch.any(s[:, -1] < svd_eps):
        raise AssertionError(
            f"The last singular value of one/more of the elements of the batch is smaller "
            f"than {svd_eps}. This function cannot be used if all world_points (of any "
            f"element of the batch) lie on a line or if all world_points (of any "
            f"element of the batch) lie on a plane.")

    intrinsics_inv = torch.inverse(intrinsics)
    world_points_norm_h = convert_points_to_homogeneous(world_points_norm)

    # Transforming img_points with intrinsics_inv to get img_points_inv
    img_points_inv = transform_points(intrinsics_inv, img_points)

    # Normalizing img_points_inv
    img_points_norm, img_transform_norm = _mean_isotropic_scale_normalize(
        img_points_inv)
    inv_img_transform_norm = torch.inverse(img_transform_norm)

    # Setting up the system (the matrix A in Ax=0)
    system = torch.zeros((B, 2 * N, 12),
                         dtype=world_points.dtype,
                         device=world_points.device)
    system[:, 0::2, 0:4] = world_points_norm_h
    system[:, 1::2, 4:8] = world_points_norm_h
    system[:, 0::2,
           8:12] = world_points_norm_h * (-1) * img_points_norm[..., 0:1]
    system[:, 1::2,
           8:12] = world_points_norm_h * (-1) * img_points_norm[..., 1:2]

    # Getting the solution vectors.
    _, _, v = torch.svd(system)
    solution = v[..., -1]

    # Reshaping the solution vectors to the correct shape.
    solution = solution.reshape(B, 3, 4)

    # Creating solution_4x4
    solution_4x4 = eye_like(4, solution)
    solution_4x4[:, :3, :] = solution

    # De-normalizing the solution
    intermediate = torch.bmm(solution_4x4, world_transform_norm)
    solution = torch.bmm(inv_img_transform_norm, intermediate[:, :3, :])

    # We obtained one solution for each element of the batch. We may
    # need to multiply each solution with a scalar. This is because
    # if x is a solution to Ax=0, then cx is also a solution. We can
    # find the required scalars by using the properties of
    # rotation matrices. We do this in two parts:

    # First, we fix the sign by making sure that the determinant of
    # the all the rotation matrices are non negative (since determinant
    # of a rotation matrix should be 1).
    det = torch.det(solution[:, :3, :3])
    ones = torch.ones_like(det)
    sign_fix = torch.where(det < 0, ones * -1, ones)
    solution = solution * sign_fix[:, None, None]

    # Then, we make sure that norm of the 0th columns of the rotation
    # matrices are 1. Do note that the norm of any column of a rotation
    # matrix should be 1. Here we use the 0th column to calculate norm_col.
    # We then multiply solution with mul_factor.
    norm_col = torch.norm(input=solution[:, :3, 0], p=2, dim=1)
    mul_factor = (1 / norm_col)[:, None, None]
    temp = solution * mul_factor

    # To make sure that the rotation matrix would be orthogonal, we apply
    # QR decomposition.
    ortho, right = linalg_qr(temp[:, :3, :3])

    # We may need to fix the signs of the columns of the ortho matrix.
    # If right[i, j, j] is negative, then we need to flip the signs of
    # the column ortho[i, :, j]. The below code performs the necessary
    # operations in an better way.
    mask = eye_like(3, ortho)
    col_sign_fix = torch.sign(mask * right)
    rot_mat = torch.bmm(ortho, col_sign_fix)

    # Preparing the final output.
    pred_world_to_cam = torch.cat([rot_mat, temp[:, :3, 3:4]], dim=-1)

    # TODO: Implement algorithm to refine the solution.

    return pred_world_to_cam
Beispiel #10
0
def unproject_points(
        point_2d: torch.Tensor,
        depth: torch.Tensor,
        camera_matrix: torch.Tensor,
        normalize: Optional[bool] = False) -> torch.Tensor:
    r"""Unprojects a 2d point in 3d.

    Transform coordinates in the pixel frame to the camera frame.

    Args:
        point2d (torch.Tensor): tensor containing the 2d to be projected to
            world coordinates. The shape of the tensor can be :math:`(*, 2)`.
        depth (torch.Tensor): tensor containing the depth value of each 2d
            points. The tensor shape must be equal to point2d :math:`(*, 1)`.
        camera_matrix (torch.Tensor): tensor containing the intrinsics camera
            matrix. The tensor shape must be Bx4x4.
        normalize (Optional[bool]): wether to normalize the pointcloud. This
            must be set to `True` when the depth is represented as the Euclidean
            ray length from the camera position. Default is `False`.

    Returns:
        torch.Tensor: tensor of (x, y, z) world coordinates with shape
        :math:`(*, 3)`.
    """
    if not torch.is_tensor(point_2d):
        raise TypeError("Input point_2d type is not a torch.Tensor. Got {}"
                        .format(type(point_2d)))
    if not torch.is_tensor(depth):
        raise TypeError("Input depth type is not a torch.Tensor. Got {}"
                        .format(type(depth)))
    if not torch.is_tensor(camera_matrix):
        raise TypeError("Input camera_matrix type is not a torch.Tensor. Got {}"
                        .format(type(camera_matrix)))
    if not (point_2d.device == depth.device == camera_matrix.device):
        raise ValueError("Input tensors must be all in the same device.")
    if not point_2d.shape[-1] == 2:
        raise ValueError("Input points_2d must be in the shape of (*, 2)."
                         " Got {}".format(point_2d.shape))
    if not depth.shape[-1] == 1:
        raise ValueError("Input depth must be in the shape of (*, 1)."
                         " Got {}".format(depth.shape))
    if not camera_matrix.shape[-2:] == (3, 3):
        raise ValueError(
            "Input camera_matrix must be in the shape of (*, 3, 3).")
    # projection eq. K_inv * [u v 1]'
    # inverse the camera matrix
    camera_matrix_inv: torch.Tensor = torch.inverse(camera_matrix)

    # compute ray from center to camera
    uvw: torch.Tensor = convert_points_to_homogeneous(point_2d)

    # apply inverse intrinsics to points
    xyz: torch.Tensor = torch.matmul(
        camera_matrix_inv.view(-1, 3, 3), uvw.view(-1, 3, 1))

    # back to input shape and normalize if specified
    xyz_norm: torch.Tensor = xyz.view((*point_2d.shape[:-1], 3))

    if normalize:
        xyz_norm = F.normalize(xyz_norm, dim=-1, p=2)

    # apply depth
    return xyz_norm * depth