Beispiel #1
0
def reproject_disparity_to_3D(disparity_tensor: torch.Tensor, Q_matrix: torch.Tensor) -> torch.Tensor:
    r"""Reproject the disparity tensor to a 3D point cloud.

    Args:
        disparity_tensor: Disparity tensor of shape :math:`(B, 1, H, W)`.
        Q_matrix: Tensor of Q matrices of shapes :math:`(B, 4, 4)`.

    Returns:
        The 3D point cloud of shape :math:`(B, H, W, 3)`
    """
    _check_Q_matrix(Q_matrix)
    _check_disparity_tensor(disparity_tensor)

    batch_size, rows, cols, _ = disparity_tensor.shape
    dtype = disparity_tensor.dtype
    device = disparity_tensor.device

    uv = create_meshgrid(rows, cols, normalized_coordinates=False, device=device, dtype=dtype)
    uv = uv.expand(batch_size, -1, -1, -1)
    v, u = torch.unbind(uv, dim=-1)
    v, u = torch.unsqueeze(v, -1), torch.unsqueeze(u, -1)
    uvd = torch.stack((u, v, disparity_tensor), 1).reshape(batch_size, 3, -1).permute(0, 2, 1)
    points = transform_points(Q_matrix, uvd).reshape(batch_size, rows, cols, 3)

    # Final check that everything went well.
    if not points.shape == (batch_size, rows, cols, 3):
        raise StereoException(
            f"Something went wrong in `reproject_disparity_to_3D`. Expected the final output"
            f"to be of shape {(batch_size, rows, cols, 3)}."
            f"But the computed point cloud had shape {points.shape}. "
            f"Please ensure input are correct. If this is an error, please submit an issue."
        )
    return points
Beispiel #2
0
    def track_next_frame(self, x: torch.Tensor) -> Tuple[torch.Tensor, bool]:
        """The frame `x` is prewarped according to the previous frame homography, matched with fast_matcher
        verified with ransac."""
        if self.previous_homography is not None:  # mypy, shut up
            Hwarp = self.previous_homography.clone()[None]
        # make a bit of border for safety
        Hwarp[:, 0:2, 0:2] = Hwarp[:, 0:2, 0:2] / 0.8
        Hwarp[:, 0:2, 2] -= 10.0
        Hinv = torch.inverse(Hwarp)
        h, w = self.target.shape[2:]
        frame_warped = warp_perspective(x, Hinv, (h, w))
        input_dict: Dict[str, torch.Tensor] = {
            "image0": self.target,
            "image1": frame_warped
        }
        for k, v in self.target_fast_representation.items():
            input_dict[f'{k}0'] = v

        match_dict = self.fast_matcher(input_dict)
        keypoints0 = match_dict['keypoints0'][match_dict['batch_indexes'] == 0]
        keypoints1 = match_dict['keypoints1'][match_dict['batch_indexes'] == 0]
        keypoints1 = transform_points(Hwarp, keypoints1)

        if len(keypoints0) < self.minimum_inliers_num:
            self.reset_tracking()
            return self.no_match()
        H, inliers = self.ransac(keypoints0, keypoints1)
        if inliers.sum().item() < self.minimum_inliers_num:
            self.reset_tracking()
            return self.no_match()
        self.previous_homography = H.clone()
        return H, True
Beispiel #3
0
    def warp_grid(self, dst_homo_src: torch.Tensor) -> torch.Tensor:
        r"""Computes the grid to warp the coordinates grid by an homography.

        Args:
            dst_homo_src (torch.Tensor): Homography or homographies (stacked) to
                              transform all points in the grid. Shape of the
                              homography has to be :math:`(N, 3, 3)`.

        Returns:
            torch.Tensor: the transformed grid of shape :math:`(N, H, W, 2)`.
        """
        batch_size: int = dst_homo_src.shape[0]
        device: torch.device = dst_homo_src.device
        dtype: torch.dtype = dst_homo_src.dtype
        # expand grid to match the input batch size
        grid: torch.Tensor = self.grid.expand(batch_size, -1, -1,
                                              -1)  # NxHxWx2
        if len(dst_homo_src.shape) == 3:  # local homography case
            dst_homo_src = dst_homo_src.view(batch_size, 1, 3, 3)  # NxHxWx3x3
        # perform the actual grid transformation,
        # the grid is copied to input device and casted to the same type
        flow: torch.Tensor = transform_points(
            dst_homo_src,
            grid.to(device).to(dtype))  # NxHxWx2
        return flow.view(batch_size, self.height, self.width, 2)  # NxHxWx2
Beispiel #4
0
def pixel2cam(depth: torch.Tensor, intrinsics_inv: torch.Tensor,
              pixel_coords: torch.Tensor) -> torch.Tensor:
    r"""Transform coordinates in the pixel frame to the camera frame.

    Args:
        depth (torch.Tensor): the source depth maps. Shape must be Bx1xHxW.
        intrinsics_inv (torch.Tensor): the inverse intrinsics camera matrix.
          Shape must be Bx4x4.
        pixel_coords (torch.Tensor): the grid with the homogeneous camera
          coordinates. Shape must be BxHxWx3.

    Returns:
        torch.Tensor: array of (u, v, 1) cam coordinates with shape BxHxWx3.
    """
    if not len(depth.shape) == 4 and depth.shape[1] == 1:
        raise ValueError("Input depth has to be in the shape of "
                         "Bx1xHxW. Got {}".format(depth.shape))
    if not len(intrinsics_inv.shape) == 3:
        raise ValueError("Input intrinsics_inv has to be in the shape of "
                         "Bx4x4. Got {}".format(intrinsics_inv.shape))
    if not len(pixel_coords.shape) == 4 and pixel_coords.shape[3] == 3:
        raise ValueError("Input pixel_coords has to be in the shape of "
                         "BxHxWx3. Got {}".format(intrinsics_inv.shape))
    cam_coords: torch.Tensor = transform_points(intrinsics_inv[:, None],
                                                pixel_coords)
    return cam_coords * depth.permute(0, 2, 3, 1)
    def warp_grid(self, src_homo_dst: torch.Tensor) -> torch.Tensor:
        r"""Compute the grid to warp the coordinates grid by the homography/ies.

        Args:
            src_homo_dst (torch.Tensor): Homography or homographies (stacked) to
              transform all points in the grid. Shape of the homography
              has to be :math:`(1, 3, 3)` or :math:`(N, 1, 3, 3)`.
              The homography assumes normalized coordinates [-1, 1] if
              normalized_coordinates is True.

        Returns:
            torch.Tensor: the transformed grid of shape :math:`(N, H, W, 2)`.
        """
        batch_size: int = src_homo_dst.shape[0]
        device: torch.device = src_homo_dst.device
        dtype: torch.dtype = src_homo_dst.dtype
        # expand grid to match the input batch size
        grid: torch.Tensor = self.grid.expand(batch_size, -1, -1,
                                              -1)  # NxHxWx2
        if len(src_homo_dst.shape) == 3:  # local homography case
            src_homo_dst = src_homo_dst.view(batch_size, 1, 3, 3)  # Nx1x3x3
        # perform the actual grid transformation,
        # the grid is copied to input device and casted to the same type
        flow: torch.Tensor = transform_points(
            src_homo_dst,
            grid.to(device).to(dtype))  # NxHxWx2
        return flow.view(batch_size, self.height, self.width, 2)  # NxHxWx2
Beispiel #6
0
def cam2pixel(cam_coords_src: torch.Tensor, dst_proj_src: torch.Tensor, eps: float = 1e-12) -> torch.Tensor:
    r"""Transform coordinates in the camera frame to the pixel frame.

    Args:
        cam_coords: (x, y, z) coordinates defined in the first camera coordinates system. Shape must be BxHxWx3.
        dst_proj_src: the projection matrix between the
          reference and the non reference camera frame. Shape must be Bx4x4.
        eps: small value to avoid division by zero error.

    Returns:
        tensor of shape BxHxWx2 with (u, v) pixel coordinates.
    """
    if not len(cam_coords_src.shape) == 4 and cam_coords_src.shape[3] == 3:
        raise ValueError(
            "Input cam_coords_src has to be in the shape of " "BxHxWx3. Got {}".format(cam_coords_src.shape)
        )
    if not len(dst_proj_src.shape) == 3 and dst_proj_src.shape[-2:] == (4, 4):
        raise ValueError("Input dst_proj_src has to be in the shape of " "Bx4x4. Got {}".format(dst_proj_src.shape))
    # apply projection matrix to points
    point_coords: torch.Tensor = transform_points(dst_proj_src[:, None], cam_coords_src)
    x_coord: torch.Tensor = point_coords[..., 0]
    y_coord: torch.Tensor = point_coords[..., 1]
    z_coord: torch.Tensor = point_coords[..., 2]

    # compute pixel coordinates
    u_coord: torch.Tensor = x_coord / (z_coord + eps)
    v_coord: torch.Tensor = y_coord / (z_coord + eps)

    # stack and return the coordinates, that's the actual flow
    pixel_coords_dst: torch.Tensor = torch.stack([u_coord, v_coord], dim=-1)
    return pixel_coords_dst  # BxHxWx2
Beispiel #7
0
def _transform_boxes(boxes: torch.Tensor, M: torch.Tensor) -> torch.Tensor:
    """Transforms 3D and 2D in kornia format by applying the transformation matrix M. Boxes and the transformation
    matrix could be batched or not.

    Args:
        boxes: 2D quadrilaterals or 3D hexahedrons in kornia format.
        M: the transformation matrix of shape :math:`(3, 3)` or :math:`(B, 3, 3)` for 2D and :math:`(4, 4)` or
            :math:`(B, 4, 4)` for 3D hexahedron.
    """
    M = M if M.is_floating_point() else M.float()

    # Work with batch as kornia.transform_points only supports a batch of points.
    boxes_per_batch, n_points_per_box, coordinates_dimension = boxes.shape[-3:]
    points = boxes.view(-1, n_points_per_box * boxes_per_batch,
                        coordinates_dimension)
    M = M if M.ndim == 3 else M.unsqueeze(0)

    if points.shape[0] != M.shape[0]:
        raise ValueError(
            f"Batch size mismatch. Got {points.shape[0]} for boxes and {M.shape[0]} for the transformation matrix."
        )

    transformed_boxes: torch.Tensor = transform_points(M, points)
    transformed_boxes = transformed_boxes.view_as(boxes)
    return transformed_boxes
Beispiel #8
0
def cam2pixel(cam_coords_src: torch.Tensor, dst_proj_src: torch.Tensor, eps: Optional[float] = 1e-6) -> torch.Tensor:
    r"""Transform coordinates in the camera frame to the pixel frame.

    Args:
        cam_coords (torch.Tensor): pixel coordinates defined in the first
          camera coordinates system. Shape must be BxHxWx3.
        dst_proj_src (torch.Tensor): the projection matrix between the
          reference and the non reference camera frame. Shape must be Bx4x4.

    Returns:
        torch.Tensor: array of [-1, 1] coordinates of shape BxHxWx2.
    """
    if not len(cam_coords_src.shape) == 4 and cam_coords_src.shape[3] == 3:
        raise ValueError(
            "Input cam_coords_src has to be in the shape of " "BxHxWx3. Got {}".format(cam_coords_src.shape)
        )
    if not len(dst_proj_src.shape) == 3 and dst_proj_src.shape[-2:] == (4, 4):
        raise ValueError("Input dst_proj_src has to be in the shape of " "Bx4x4. Got {}".format(dst_proj_src.shape))
    b, h, w, _ = cam_coords_src.shape
    # apply projection matrix to points
    point_coords: torch.Tensor = transform_points(dst_proj_src[:, None], cam_coords_src)
    x_coord: torch.Tensor = point_coords[..., 0]
    y_coord: torch.Tensor = point_coords[..., 1]
    z_coord: torch.Tensor = point_coords[..., 2]

    # compute pixel coordinates
    u_coord: torch.Tensor = x_coord / (z_coord + eps)
    v_coord: torch.Tensor = y_coord / (z_coord + eps)

    # stack and return the coordinates, that's the actual flow
    pixel_coords_dst: torch.Tensor = torch.stack([u_coord, v_coord], dim=-1)
    return pixel_coords_dst  # (B*N)xHxWx2
Beispiel #9
0
 def inverse_keypoints(self,
                       input: torch.Tensor,
                       module: nn.Module,
                       param: Optional[ParamItem] = None) -> torch.Tensor:
     if isinstance(module, GeometricAugmentationBase2D):
         transform = module.compute_inverse_transformation(
             module.get_transformation_matrix(
                 input, None if param is None else cast(Dict, param.data)))
         input = transform_points(
             torch.as_tensor(transform,
                             device=input.device,
                             dtype=input.dtype), input)
     return input
Beispiel #10
0
    def apply_to_keypoints(self,
                           input: torch.Tensor,
                           module: nn.Module,
                           param: Optional[ParamItem] = None) -> torch.Tensor:
        if param is not None:
            _param = cast(Dict[str, torch.Tensor], param.data)
        else:
            _param = None  # type: ignore

        if isinstance(module, GeometricAugmentationBase2D) and _param is None:
            raise ValueError(
                f"Transformation matrix for {module} has not been computed.")
        if isinstance(module,
                      GeometricAugmentationBase2D) and _param is not None:
            input = transform_points(
                module.get_transformation_matrix(input, _param), input)
        else:
            pass  # No need to update anything
        return input
Beispiel #11
0
def _mean_isotropic_scale_normalize(
        points: torch.Tensor,
        eps: float = 1e-8) -> Tuple[torch.Tensor, torch.Tensor]:
    r"""Normalizes points.

    Args:
       points : Tensor containing the points to be normalized with shape :math:`(B, N, D)`.
       eps : Small value to avoid division by zero error.

    Returns:
       Tuple containing the normalized points in the shape :math:`(B, N, D)` and the transformation matrix
       in the shape :math:`(B, D+1, D+1)`.

    """
    if not isinstance(points, torch.Tensor):
        raise AssertionError(
            f"points is not an instance of torch.Tensor. Type of points is {type(points)}"
        )

    if len(points.shape) != 3:
        raise AssertionError(
            f"points must be of shape (B, N, D). Got shape {points.shape}.")

    x_mean = torch.mean(points, dim=1, keepdim=True)  # Bx1xD
    scale = (points - x_mean).norm(dim=-1, p=2).mean(dim=-1)  # B

    D_int = points.shape[-1]
    D_float = torch.tensor(points.shape[-1],
                           dtype=torch.float64,
                           device=points.device)
    scale = torch.sqrt(D_float) / (scale + eps)  # B
    transform = eye_like(D_int + 1, points)  # (B, D+1, D+1)

    idxs = torch.arange(D_int, dtype=torch.int64, device=points.device)
    transform[:, idxs, idxs] = transform[:, idxs, idxs] * scale[:, None]
    transform[:, idxs,
              D_int] = transform[:, idxs, D_int] + (-scale[:, None] *
                                                    x_mean[:, 0, idxs])

    points_norm = transform_points(transform, points)  # BxNxD

    return (points_norm, transform)
Beispiel #12
0
def normalize_points(points: torch.Tensor,
                     eps: float = 1e-8) -> Tuple[torch.Tensor, torch.Tensor]:
    r"""Normalizes points (isotropic).

    Computes the transformation matrix such that the two principal moments of the set of points
    are equal to unity, forming an approximately symmetric circular cloud of points of radius 1
    about the origin. Reference: Hartley/Zisserman 4.4.4 pag.107

    This operation is an essential step before applying the DLT algorithm in order to consider
    the result as optimal.

    Args:
       points: Tensor containing the points to be normalized with shape :math:`(B, N, 2)`.
       eps: epsilon value to avoid numerical instabilities.

    Returns:
       tuple containing the normalized points in the shape :math:`(B, N, 2)` and the transformation matrix
       in the shape :math:`(B, 3, 3)`.

    """
    if len(points.shape) != 3:
        raise AssertionError(points.shape)
    if points.shape[-1] != 2:
        raise AssertionError(points.shape)

    x_mean = torch.mean(points, dim=1, keepdim=True)  # Bx1x2

    scale = (points - x_mean).norm(dim=-1, p=2).mean(dim=-1)  # B
    scale = torch.sqrt(torch.tensor(2.0)) / (scale + eps)  # B

    ones, zeros = torch.ones_like(scale), torch.zeros_like(scale)

    transform = torch.stack([
        scale, zeros, -scale * x_mean[..., 0, 0], zeros, scale,
        -scale * x_mean[..., 0, 1], zeros, zeros, ones
    ],
                            dim=-1)  # Bx9

    transform = transform.view(-1, 3, 3)  # Bx3x3
    points_norm = transform_points(transform, points)  # BxNx2

    return (points_norm, transform)
Beispiel #13
0
    def transform_grid(self, voxel_grid, grid_to_lidar, lidar_to_cam,
                       cam_to_img):
        """
        Transforms voxel sampling grid into frustum sampling grid
        Args:
            grid: (B, X, Y, Z, 3), Voxel sampling grid
            grid_to_lidar: (4, 4), Voxel grid to LiDAR unprojection matrix
            lidar_to_cam: (B, 4, 4), LiDAR to camera frame transformation
            cam_to_img: (B, 3, 4), Camera projection matrix
        Returns:
            frustum_grid: (B, X, Y, Z, 3), Frustum sampling grid
        """
        B = lidar_to_cam.shape[0]

        # Create transformation matricies
        V_G = grid_to_lidar  # Voxel Grid -> LiDAR (4, 4)
        C_V = lidar_to_cam  # LiDAR -> Camera (B, 4, 4)
        I_C = cam_to_img  # Camera -> Image (B, 3, 4)
        trans = C_V @ V_G

        # Reshape to match dimensions
        trans = trans.reshape(B, 1, 1, 4, 4)
        voxel_grid = voxel_grid.repeat_interleave(repeats=B, dim=0)

        # Transform to camera frame
        camera_grid = transform_points(trans_01=trans, points_1=voxel_grid)

        # Project to image
        I_C = I_C.reshape(B, 1, 1, 3, 4)
        image_grid, image_depths = transform_utils.project_to_image(
            project=I_C, points=camera_grid)

        # Convert depths to depth bins
        image_depths = transform_utils.bin_depths(depth_map=image_depths,
                                                  **self.disc_cfg)

        # Stack to form frustum grid
        image_depths = image_depths.unsqueeze(-1)
        frustum_grid = torch.cat((image_grid, image_depths), dim=-1)
        return frustum_grid
Beispiel #14
0
def warp_grid3d(grid: torch.Tensor, src_homo_dst: torch.Tensor) -> torch.Tensor:
    r"""Compute the grid to warp the coordinates grid by the homography/ies.

    Args:
        grid: Unwrapped grid of the shape :math:`(1, D, H, W, 3)`.
        src_homo_dst (torch.Tensor): Homography or homographies (stacked) to
          transform all points in the grid. Shape of the homography
          has to be :math:`(1, 4, 4)` or :math:`(N, 1, 4, 4)`.

    Returns:
        torch.Tensor: the transformed grid of shape :math:`(N, H, W, 3)`.
    """
    batch_size: int = src_homo_dst.size(0)
    _, depth, height, width, _ = grid.size()
    # expand grid to match the input batch size
    grid = grid.expand(batch_size, -1, -1, -1, -1)  # NxDxHxWx3
    if len(src_homo_dst.shape) == 3:  # local homography case
        src_homo_dst = src_homo_dst.view(batch_size, 1, 4, 4)  # Nx1x3x3
    # perform the actual grid transformation,
    # the grid is copied to input device and casted to the same type
    flow: torch.Tensor = transform_points(src_homo_dst, grid.to(src_homo_dst))  # NxDxHxWx3
    return flow.view(batch_size, depth, height, width, 3)  # NxDxHxWx3
Beispiel #15
0
def solve_pnp_dlt(
    world_points: torch.Tensor,
    img_points: torch.Tensor,
    intrinsics: torch.Tensor,
    weights: Optional[torch.Tensor] = None,
    svd_eps: float = 1e-4,
) -> torch.Tensor:
    r"""This function attempts to solve the Perspective-n-Point (PnP)
    problem using Direct Linear Transform (DLT).

    Given a batch (where batch size is :math:`B`) of :math:`N` 3D points
    (where :math:`N \geq 6`) in the world space, a batch of :math:`N`
    corresponding 2D points in the image space and a batch of
    intrinsic matrices, this function tries to estimate a batch of
    world to camera transformation matrices.

    This implementation needs at least 6 points (i.e. :math:`N \geq 6`) to
    provide solutions.

    This function cannot be used if all the 3D world points (of any element
    of the batch) lie on a line or if all the 3D world points (of any element
    of the batch) lie on a plane. This function attempts to check for these
    conditions and throws an AssertionError if found. Do note that this check
    is sensitive to the value of the svd_eps parameter.

    Another bad condition occurs when the camera and the points lie on a
    twisted cubic. However, this function does not check for this condition.

    Args:
        world_points : A tensor with shape :math:`(B, N, 3)` representing
          the points in the world space.
        img_points : A tensor with shape :math:`(B, N, 2)` representing
          the points in the image space.
        intrinsics : A tensor with shape :math:`(B, 3, 3)` representing
          the intrinsic matrices.
        weights : This parameter is not used currently and is just a
          placeholder for API consistency.
        svd_eps : A small float value to avoid numerical precision issues.

    Returns:
        A tensor with shape :math:`(B, 3, 4)` representing the estimated world to
        camera transformation matrices (also known as the extrinsic matrices).

    Example:
        >>> world_points = torch.tensor([[
        ...     [ 5. , -5. ,  0. ], [ 0. ,  0. ,  1.5],
        ...     [ 2.5,  3. ,  6. ], [ 9. , -2. ,  3. ],
        ...     [-4. ,  5. ,  2. ], [-5. ,  5. ,  1. ],
        ... ]], dtype=torch.float64)
        >>>
        >>> img_points = torch.tensor([[
        ...     [1409.1504, -800.936 ], [ 407.0207, -182.1229],
        ...     [ 392.7021,  177.9428], [1016.838 ,   -2.9416],
        ...     [ -63.1116,  142.9204], [-219.3874,   99.666 ],
        ... ]], dtype=torch.float64)
        >>>
        >>> intrinsics = torch.tensor([[
        ...     [ 500.,    0.,  250.],
        ...     [   0.,  500.,  250.],
        ...     [   0.,    0.,    1.],
        ... ]], dtype=torch.float64)
        >>>
        >>> print(world_points.shape, img_points.shape, intrinsics.shape)
        torch.Size([1, 6, 3]) torch.Size([1, 6, 2]) torch.Size([1, 3, 3])
        >>>
        >>> pred_world_to_cam = kornia.geometry.solve_pnp_dlt(world_points, img_points, intrinsics)
        >>>
        >>> print(pred_world_to_cam.shape)
        torch.Size([1, 3, 4])
        >>>
        >>> pred_world_to_cam
        tensor([[[ 0.9392, -0.3432, -0.0130,  1.6734],
                 [ 0.3390,  0.9324, -0.1254, -4.3634],
                 [ 0.0552,  0.1134,  0.9920,  3.7785]]], dtype=torch.float64)
    """
    # This function was implemented based on ideas inspired from multiple references.
    # ============
    # References:
    # ============
    # 1. https://team.inria.fr/lagadic/camera_localization/tutorial-pose-dlt-opencv.html
    # 2. https://github.com/opencv/opencv/blob/68d15fc62edad980f1ffa15ee478438335f39cc3/modules/calib3d/src/calibration.cpp # noqa: E501
    # 3. http://rpg.ifi.uzh.ch/docs/teaching/2020/03_camera_calibration.pdf
    # 4. http://www.cs.cmu.edu/~16385/s17/Slides/11.3_Pose_Estimation.pdf
    # 5. https://www.ece.mcmaster.ca/~shirani/vision/hartley_ch7.pdf

    if not isinstance(world_points, torch.Tensor):
        raise AssertionError(
            f"world_points is not an instance of torch.Tensor. Type of world_points is {type(world_points)}"
        )

    if not isinstance(img_points, torch.Tensor):
        raise AssertionError(
            f"img_points is not an instance of torch.Tensor. Type of img_points is {type(img_points)}"
        )

    if not isinstance(intrinsics, torch.Tensor):
        raise AssertionError(
            f"intrinsics is not an instance of torch.Tensor. Type of intrinsics is {type(intrinsics)}"
        )

    if (weights is not None) and (not isinstance(weights, torch.Tensor)):
        raise AssertionError(
            f"If weights is not None, then weights should be an instance "
            f"of torch.Tensor. Type of weights is {type(weights)}")

    if type(svd_eps) is not float:
        raise AssertionError(
            f"Type of svd_eps is not float. Got {type(svd_eps)}")

    accepted_dtypes = (torch.float32, torch.float64)

    if world_points.dtype not in accepted_dtypes:
        raise AssertionError(
            f"world_points must have one of the following dtypes {accepted_dtypes}. "
            f"Currently it has {world_points.dtype}.")

    if img_points.dtype not in accepted_dtypes:
        raise AssertionError(
            f"img_points must have one of the following dtypes {accepted_dtypes}. "
            f"Currently it has {img_points.dtype}.")

    if intrinsics.dtype not in accepted_dtypes:
        raise AssertionError(
            f"intrinsics must have one of the following dtypes {accepted_dtypes}. "
            f"Currently it has {intrinsics.dtype}.")

    if (len(world_points.shape) != 3) or (world_points.shape[2] != 3):
        raise AssertionError(
            f"world_points must be of shape (B, N, 3). Got shape {world_points.shape}."
        )

    if (len(img_points.shape) != 3) or (img_points.shape[2] != 2):
        raise AssertionError(
            f"img_points must be of shape (B, N, 2). Got shape {img_points.shape}."
        )

    if (len(intrinsics.shape) != 3) or (intrinsics.shape[1:] != (3, 3)):
        raise AssertionError(
            f"intrinsics must be of shape (B, 3, 3). Got shape {intrinsics.shape}."
        )

    if world_points.shape[1] != img_points.shape[1]:
        raise AssertionError(
            "world_points and img_points must have equal number of points.")

    if (world_points.shape[0] != img_points.shape[0]) or (
            world_points.shape[0] != intrinsics.shape[0]):
        raise AssertionError(
            "world_points, img_points and intrinsics must have the same batch size."
        )

    if world_points.shape[1] < 6:
        raise AssertionError(
            f"At least 6 points are required to use this function. "
            f"Got {world_points.shape[1]} points.")

    B, N = world_points.shape[:2]

    # Getting normalized world points.
    world_points_norm, world_transform_norm = _mean_isotropic_scale_normalize(
        world_points)

    # Checking if world_points_norm (of any element of the batch) has rank = 3. This
    # function cannot be used if all world points (of any element of the batch) lie
    # on a line or if all world points (of any element of the batch) lie on a plane.
    _, s, _ = torch.svd(world_points_norm)
    if torch.any(s[:, -1] < svd_eps):
        raise AssertionError(
            f"The last singular value of one/more of the elements of the batch is smaller "
            f"than {svd_eps}. This function cannot be used if all world_points (of any "
            f"element of the batch) lie on a line or if all world_points (of any "
            f"element of the batch) lie on a plane.")

    intrinsics_inv = torch.inverse(intrinsics)
    world_points_norm_h = convert_points_to_homogeneous(world_points_norm)

    # Transforming img_points with intrinsics_inv to get img_points_inv
    img_points_inv = transform_points(intrinsics_inv, img_points)

    # Normalizing img_points_inv
    img_points_norm, img_transform_norm = _mean_isotropic_scale_normalize(
        img_points_inv)
    inv_img_transform_norm = torch.inverse(img_transform_norm)

    # Setting up the system (the matrix A in Ax=0)
    system = torch.zeros((B, 2 * N, 12),
                         dtype=world_points.dtype,
                         device=world_points.device)
    system[:, 0::2, 0:4] = world_points_norm_h
    system[:, 1::2, 4:8] = world_points_norm_h
    system[:, 0::2,
           8:12] = world_points_norm_h * (-1) * img_points_norm[..., 0:1]
    system[:, 1::2,
           8:12] = world_points_norm_h * (-1) * img_points_norm[..., 1:2]

    # Getting the solution vectors.
    _, _, v = torch.svd(system)
    solution = v[..., -1]

    # Reshaping the solution vectors to the correct shape.
    solution = solution.reshape(B, 3, 4)

    # Creating solution_4x4
    solution_4x4 = eye_like(4, solution)
    solution_4x4[:, :3, :] = solution

    # De-normalizing the solution
    intermediate = torch.bmm(solution_4x4, world_transform_norm)
    solution = torch.bmm(inv_img_transform_norm, intermediate[:, :3, :])

    # We obtained one solution for each element of the batch. We may
    # need to multiply each solution with a scalar. This is because
    # if x is a solution to Ax=0, then cx is also a solution. We can
    # find the required scalars by using the properties of
    # rotation matrices. We do this in two parts:

    # First, we fix the sign by making sure that the determinant of
    # the all the rotation matrices are non negative (since determinant
    # of a rotation matrix should be 1).
    det = torch.det(solution[:, :3, :3])
    ones = torch.ones_like(det)
    sign_fix = torch.where(det < 0, ones * -1, ones)
    solution = solution * sign_fix[:, None, None]

    # Then, we make sure that norm of the 0th columns of the rotation
    # matrices are 1. Do note that the norm of any column of a rotation
    # matrix should be 1. Here we use the 0th column to calculate norm_col.
    # We then multiply solution with mul_factor.
    norm_col = torch.norm(input=solution[:, :3, 0], p=2, dim=1)
    mul_factor = (1 / norm_col)[:, None, None]
    temp = solution * mul_factor

    # To make sure that the rotation matrix would be orthogonal, we apply
    # QR decomposition.
    ortho, right = linalg_qr(temp[:, :3, :3])

    # We may need to fix the signs of the columns of the ortho matrix.
    # If right[i, j, j] is negative, then we need to flip the signs of
    # the column ortho[i, :, j]. The below code performs the necessary
    # operations in an better way.
    mask = eye_like(3, ortho)
    col_sign_fix = torch.sign(mask * right)
    rot_mat = torch.bmm(ortho, col_sign_fix)

    # Preparing the final output.
    pred_world_to_cam = torch.cat([rot_mat, temp[:, :3, 3:4]], dim=-1)

    # TODO: Implement algorithm to refine the solution.

    return pred_world_to_cam
Beispiel #16
0
def warp_perspective(
    src: torch.Tensor,
    M: torch.Tensor,
    dsize: Tuple[int, int],
    mode: str = 'bilinear',
    padding_mode: str = 'zeros',
    align_corners: Optional[bool] = None,
) -> torch.Tensor:
    r"""Applies a perspective transformation to an image.

    .. image:: https://kornia-tutorials.readthedocs.io/en/latest/_images/warp_perspective_10_2.png

    The function warp_perspective transforms the source image using
    the specified matrix:

    .. math::
        \text{dst} (x, y) = \text{src} \left(
        \frac{M^{-1}_{11} x + M^{-1}_{12} y + M^{-1}_{13}}{M^{-1}_{31} x + M^{-1}_{32} y + M^{-1}_{33}} ,
        \frac{M^{-1}_{21} x + M^{-1}_{22} y + M^{-1}_{23}}{M^{-1}_{31} x + M^{-1}_{32} y + M^{-1}_{33}}
        \right )

    Args:
        src: input image with shape :math:`(B, C, H, W)`.
        M: transformation matrix with shape :math:`(B, 3, 3)`.
        dsize: size of the output image (height, width).
        mode: interpolation mode to calculate output values ``'bilinear'`` | ``'nearest'``.
        padding_mode: padding mode for outside grid values ``'zeros'`` | ``'border'`` | ``'reflection'``.
        align_corners(bool, optional): interpolation flag.

    Returns:
        the warped input image :math:`(B, C, H, W)`.

    Example:
       >>> img = torch.rand(1, 4, 5, 6)
       >>> H = torch.eye(3)[None]
       >>> out = warp_perspective(img, H, (4, 2), align_corners=True)
       >>> print(out.shape)
       torch.Size([1, 4, 4, 2])

    .. note::
        This function is often used in conjuntion with :func:`get_perspective_transform`.

    .. note::
        See a working example `here <https://kornia-tutorials.readthedocs.io/en/
        latest/warp_perspective.html>`_.
    """
    if not isinstance(src, torch.Tensor):
        raise TypeError("Input src type is not a torch.Tensor. Got {}".format(
            type(src)))

    if not isinstance(M, torch.Tensor):
        raise TypeError("Input M type is not a torch.Tensor. Got {}".format(
            type(M)))

    if not len(src.shape) == 4:
        raise ValueError("Input src must be a BxCxHxW tensor. Got {}".format(
            src.shape))

    if not (len(M.shape) == 3 and M.shape[-2:] == (3, 3)):
        raise ValueError("Input M must be a Bx3x3 tensor. Got {}".format(
            M.shape))

    # TODO: remove the statement below in kornia v0.6
    if align_corners is None:
        message: str = (
            "The align_corners default value has been changed. By default now is set True "
            "in order to match cv2.warpPerspective. In case you want to keep your previous "
            "behaviour set it to False. This warning will disappear in kornia > v0.6."
        )
        warnings.warn(message)
        # set default value for align corners
        align_corners = True

    B, C, H, W = src.size()
    h_out, w_out = dsize

    # we normalize the 3x3 transformation matrix and convert to 3x4
    dst_norm_trans_src_norm: torch.Tensor = normalize_homography(
        M, (H, W), (h_out, w_out))  # Bx3x3

    src_norm_trans_dst_norm = _torch_inverse_cast(
        dst_norm_trans_src_norm)  # Bx3x3

    # this piece of code substitutes F.affine_grid since it does not support 3x3
    grid = (create_meshgrid(h_out,
                            w_out,
                            normalized_coordinates=True,
                            device=src.device).to(src.dtype).repeat(
                                B, 1, 1, 1))
    grid = transform_points(src_norm_trans_dst_norm[:, None, None], grid)

    return F.grid_sample(src,
                         grid,
                         align_corners=align_corners,
                         mode=mode,
                         padding_mode=padding_mode)
Beispiel #17
0
def perspective_transform_lafs(trans_01: torch.Tensor,
                               lafs_1: torch.Tensor) -> torch.Tensor:
    r"""Function that applies perspective transformations to a set of local affine frames (LAFs).

    Args:
        trans_01: tensor for perspective transformations of shape :math:`(B, 3, 3)`.
        lafs_1: tensor of lafs of shape :math:`(B, N, 2, 3)`.

    Returns:
        tensor of N-dimensional points of shape :math:`(B, N, 2, 3)`.

    Examples:
        >>> rng = torch.manual_seed(0)
        >>> lafs_1 = torch.rand(2, 4, 2, 3)  # BxNx2x3
        >>> lafs_1
        tensor([[[[0.4963, 0.7682, 0.0885],
                  [0.1320, 0.3074, 0.6341]],
        <BLANKLINE>
                 [[0.4901, 0.8964, 0.4556],
                  [0.6323, 0.3489, 0.4017]],
        <BLANKLINE>
                 [[0.0223, 0.1689, 0.2939],
                  [0.5185, 0.6977, 0.8000]],
        <BLANKLINE>
                 [[0.1610, 0.2823, 0.6816],
                  [0.9152, 0.3971, 0.8742]]],
        <BLANKLINE>
        <BLANKLINE>
                [[[0.4194, 0.5529, 0.9527],
                  [0.0362, 0.1852, 0.3734]],
        <BLANKLINE>
                 [[0.3051, 0.9320, 0.1759],
                  [0.2698, 0.1507, 0.0317]],
        <BLANKLINE>
                 [[0.2081, 0.9298, 0.7231],
                  [0.7423, 0.5263, 0.2437]],
        <BLANKLINE>
                 [[0.5846, 0.0332, 0.1387],
                  [0.2422, 0.8155, 0.7932]]]])
        >>> trans_01 = torch.eye(3).repeat(2, 1, 1)  # Bx3x3
        >>> trans_01.shape
        torch.Size([2, 3, 3])
        >>> lafs_0 = perspective_transform_lafs(trans_01, lafs_1)  # BxNx2x3
    """
    raise_error_if_laf_is_not_valid(lafs_1)
    if not torch.is_tensor(trans_01):
        raise TypeError("Input type is not a torch.Tensor")

    if not trans_01.device == lafs_1.device:
        raise TypeError("Tensor must be in the same device")

    if not trans_01.shape[0] == lafs_1.shape[0]:
        raise ValueError("Input batch size must be the same for both tensors")

    if (not (trans_01.shape[-1] == 3)) or (not (trans_01.shape[-2] == 3)):
        raise ValueError("Transformation should be homography")

    bs, n, _, _ = lafs_1.size()
    # First, we convert LAF to points
    threepts_1 = laf_to_three_points(lafs_1)
    points_1 = threepts_1.permute(0, 1, 3, 2).reshape(bs, n * 3, 2)

    # First, transform the points
    points_0 = transform_points(trans_01, points_1)

    # Back to LAF format
    threepts_0 = points_0.view(bs, n, 3, 2).permute(0, 1, 3, 2)
    return laf_from_three_points(threepts_0)
Beispiel #18
0
def undistort_points(points: torch.Tensor, K: torch.Tensor,
                     dist: torch.Tensor) -> torch.Tensor:
    r"""Compensate for lens distortion a set of 2D image points.

    Radial :math:`(k_1, k_2, k_3, k_4, k_4, k_6)`,
    tangential :math:`(p_1, p_2)`, thin prism :math:`(s_1, s_2, s_3, s_4)`, and tilt :math:`(\tau_x, \tau_y)`
    distortion models are considered in this function.

    Args:
        points: Input image points with shape :math:`(*, N, 2)`.
        K: Intrinsic camera matrix with shape :math:`(*, 3, 3)`.
        dist: Distortion coefficients
            :math:`(k_1,k_2,p_1,p_2[,k_3[,k_4,k_5,k_6[,s_1,s_2,s_3,s_4[,\tau_x,\tau_y]]]])`. This is
            a vector with 4, 5, 8, 12 or 14 elements with shape :math:`(*, n)`.

    Returns:
        Undistorted 2D points with shape :math:`(*, N, 2)`.

    Example:
        >>> _ = torch.manual_seed(0)
        >>> x = torch.rand(1, 4, 2)
        >>> K = torch.eye(3)[None]
        >>> dist = torch.rand(1, 4)
        >>> undistort_points(x, K, dist)
        tensor([[[-0.1513, -0.1165],
                 [ 0.0711,  0.1100],
                 [-0.0697,  0.0228],
                 [-0.1843, -0.1606]]])
    """
    if points.dim() < 2 and points.shape[-1] != 2:
        raise ValueError(f'points shape is invalid. Got {points.shape}.')

    if K.shape[-2:] != (3, 3):
        raise ValueError(f'K matrix shape is invalid. Got {K.shape}.')

    if dist.shape[-1] not in [4, 5, 8, 12, 14]:
        raise ValueError(
            f"Invalid number of distortion coefficients. Got {dist.shape[-1]}")

    # Adding zeros to obtain vector with 14 coeffs.
    if dist.shape[-1] < 14:
        dist = torch.nn.functional.pad(dist, [0, 14 - dist.shape[-1]])

    # Convert 2D points from pixels to normalized camera coordinates
    cx: torch.Tensor = K[..., 0:1, 2]  # princial point in x (Bx1)
    cy: torch.Tensor = K[..., 1:2, 2]  # princial point in y (Bx1)
    fx: torch.Tensor = K[..., 0:1, 0]  # focal in x (Bx1)
    fy: torch.Tensor = K[..., 1:2, 1]  # focal in y (Bx1)
    # This is equivalent to K^-1 [u,v,1]^T
    x: torch.Tensor = (points[..., 0] - cx) / fx  # (BxN - Bx1)/Bx1 -> BxN
    y: torch.Tensor = (points[..., 1] - cy) / fy  # (BxN - Bx1)/Bx1 -> BxN

    # Compensate for tilt distortion
    if torch.any(dist[..., 12] != 0) or torch.any(dist[..., 13] != 0):
        inv_tilt = tilt_projection(dist[..., 12], dist[..., 13], True)

        # Transposed untilt points (instead of [x,y,1]^T, we obtain [x,y,1])
        x, y = transform_points(inv_tilt, torch.stack([x, y],
                                                      dim=-1)).unbind(-1)

    # Iteratively undistort points
    x0, y0 = x, y
    for _ in range(5):
        r2 = x * x + y * y

        inv_rad_poly = (1 + dist[..., 5:6] * r2 + dist[..., 6:7] * r2 * r2 +
                        dist[..., 7:8] * r2**3) / (1 + dist[..., 0:1] * r2 +
                                                   dist[..., 1:2] * r2 * r2 +
                                                   dist[..., 4:5] * r2**3)
        deltaX = (2 * dist[..., 2:3] * x * y + dist[..., 3:4] *
                  (r2 + 2 * x * x) + dist[..., 8:9] * r2 +
                  dist[..., 9:10] * r2 * r2)
        deltaY = (dist[..., 2:3] * (r2 + 2 * y * y) +
                  2 * dist[..., 3:4] * x * y + dist[..., 10:11] * r2 +
                  dist[..., 11:12] * r2 * r2)

        x = (x0 - deltaX) * inv_rad_poly
        y = (y0 - deltaY) * inv_rad_poly

    # Convert points from normalized camera coordinates to pixel coordinates
    x = fx * x + cx
    y = fy * y + cy

    return torch.stack([x, y], -1)
Beispiel #19
0
def warp_frame_depth(
    image_src: torch.Tensor,
    depth_dst: torch.Tensor,
    src_trans_dst: torch.Tensor,
    camera_matrix: torch.Tensor,
    normalize_points: bool = False,
) -> torch.Tensor:
    """Warp a tensor from a source to destination frame by the depth in the destination.

    Compute 3d points from the depth, transform them using given transformation, then project the point cloud to an
    image plane.

    Args:
        image_src: image tensor in the source frame with shape :math:`(B,D,H,W)`.
        depth_dst: depth tensor in the destination frame with shape :math:`(B,1,H,W)`.
        src_trans_dst: transformation matrix from destination to source with shape :math:`(B,4,4)`.
        camera_matrix: tensor containing the camera intrinsics with shape :math:`(B,3,3)`.
        normalize_points: whether to normalise the pointcloud. This must be set to ``True`` when the depth
           is represented as the Euclidean ray length from the camera position.

    Return:
        the warped tensor in the source frame with shape :math:`(B,3,H,W)`.
    """
    if not isinstance(image_src, torch.Tensor):
        raise TypeError(
            f"Input image_src type is not a torch.Tensor. Got {type(image_src)}."
        )

    if not len(image_src.shape) == 4:
        raise ValueError(
            f"Input image_src musth have a shape (B, D, H, W). Got: {image_src.shape}"
        )

    if not isinstance(depth_dst, torch.Tensor):
        raise TypeError(
            f"Input depht_dst type is not a torch.Tensor. Got {type(depth_dst)}."
        )

    if not len(depth_dst.shape) == 4 and depth_dst.shape[-3] == 1:
        raise ValueError(
            f"Input depth_dst musth have a shape (B, 1, H, W). Got: {depth_dst.shape}"
        )

    if not isinstance(src_trans_dst, torch.Tensor):
        raise TypeError(f"Input src_trans_dst type is not a torch.Tensor. "
                        f"Got {type(src_trans_dst)}.")

    if not len(src_trans_dst.shape) == 3 and src_trans_dst.shape[-2:] == (3,
                                                                          3):
        raise ValueError(f"Input src_trans_dst must have a shape (B, 3, 3). "
                         f"Got: {src_trans_dst.shape}.")

    if not isinstance(camera_matrix, torch.Tensor):
        raise TypeError(f"Input camera_matrix type is not a torch.Tensor. "
                        f"Got {type(camera_matrix)}.")

    if not len(camera_matrix.shape) == 3 and camera_matrix.shape[-2:] == (3,
                                                                          3):
        raise ValueError(f"Input camera_matrix must have a shape (B, 3, 3). "
                         f"Got: {camera_matrix.shape}.")
    # unproject source points to camera frame
    points_3d_dst: torch.Tensor = depth_to_3d(depth_dst, camera_matrix,
                                              normalize_points)  # Bx3xHxW

    # transform points from source to destionation
    points_3d_dst = points_3d_dst.permute(0, 2, 3, 1)  # BxHxWx3

    # apply transformation to the 3d points
    points_3d_src = transform_points(src_trans_dst[:, None],
                                     points_3d_dst)  # BxHxWx3

    # project back to pixels
    camera_matrix_tmp: torch.Tensor = camera_matrix[:, None, None]  # Bx1x1xHxW
    points_2d_src: torch.Tensor = project_points(points_3d_src,
                                                 camera_matrix_tmp)  # BxHxWx2

    # normalize points between [-1 / 1]
    height, width = depth_dst.shape[-2:]
    points_2d_src_norm: torch.Tensor = normalize_pixel_coordinates(
        points_2d_src, height, width)  # BxHxWx2

    return F.grid_sample(image_src, points_2d_src_norm,
                         align_corners=True)  # type: ignore