Example #1
0
    def project_kitti_box_to_image(
            box: Box, p_left: np.ndarray,
            imsize: Tuple[int, int]) -> Union[None, Tuple[int, int, int, int]]:
        """Projects 3D box into KITTI image FOV.

        Args:
            box: 3D box in KITTI reference frame.
            p_left: <np.float: 3, 4>. Projection matrix.
            imsize: (width, height). Image size.

        Returns: (xmin, ymin, xmax, ymax). Bounding box in image plane or None if box is not in the image.

        """
        # Create a new box.
        box = box.copy()

        # KITTI defines the box center as the bottom center of the object.
        # We use the true center, so we need to adjust half height in negative y direction.
        box.translate(np.array([0, -box.wlh[2] / 2, 0]))

        # Check that some corners are inside the image.
        corners = np.array(
            [corner for corner in box.corners().T if corner[2] > 0]).T
        if len(corners) == 0:
            return None

        # Project corners that are in front of the camera to 2d to get bbox in pixel coords.
        imcorners = view_points(corners, p_left, normalize=True)[:2]
        bbox = (np.min(imcorners[0]), np.min(imcorners[1]),
                np.max(imcorners[0]), np.max(imcorners[1]))

        # Crop bbox to prevent it extending outside image.
        bbox_crop = tuple(max(0, b) for b in bbox)
        bbox_crop = (
            min(imsize[0], bbox_crop[0]),
            min(imsize[0], bbox_crop[1]),
            min(imsize[0], bbox_crop[2]),
            min(imsize[1], bbox_crop[3]),
        )

        # Detect if a cropped box is empty.
        if bbox_crop[0] >= bbox_crop[2] or bbox_crop[1] >= bbox_crop[3]:
            return None

        return bbox_crop
Example #2
0
    def box_nuscenes_to_kitti(
        box: Box,
        velo_to_cam_rot: Quaternion,
        velo_to_cam_trans: np.ndarray,
        r0_rect: Quaternion,
        kitti_to_nu_lidar_inv: Quaternion = Quaternion(axis=(0, 0, 1),
                                                       angle=np.pi /
                                                       2).inverse,
    ) -> Box:
        """Transform from nuScenes lidar frame to KITTI reference frame.

        Args:
            box: Instance in nuScenes lidar frame.
            velo_to_cam_rot: Quaternion to rotate from lidar to camera frame.
            velo_to_cam_trans: <np.float: 3>. Translate from lidar to camera frame.
            r0_rect: Quaternion to rectify camera frame.
            kitti_to_nu_lidar_inv: Quaternion to rotate nuScenes to KITTI LIDAR.

        Returns: Box instance in KITTI reference frame.

        """
        # Copy box to avoid side-effects.
        box = box.copy()

        # Rotate to KITTI lidar.
        box.rotate(kitti_to_nu_lidar_inv)

        # Transform to KITTI camera.
        box.rotate(velo_to_cam_rot)
        box.translate(velo_to_cam_trans)

        # Rotate to KITTI rectified camera.
        box.rotate(r0_rect)

        # KITTI defines the box center as the bottom center of the object.
        # We use the true center, so we need to adjust half height in y direction.
        box.translate(np.array([0, box.wlh[2] / 2, 0]))

        return box
Example #3
0
    def get_boxes(self,
                  token: str,
                  filter_classes: List[str] = None,
                  max_dist: float = None) -> List[Box]:
        """Load up all the boxes associated with a sample.
            Boxes are in nuScenes lidar frame.

        Args:
            token: KittiDB unique id.
            filter_classes: List of Kitti classes to use or None to use all.
            max_dist: List of Kitti classes to use or None to use all.

        Returns: Boxes in nuScenes lidar reference frame.

        """
        # Get transforms for this sample
        transforms = self.get_transforms(token, root=self.root)

        boxes = []
        if token.startswith("test_"):
            # No boxes to return for the test set.
            return boxes

        with open(KittiDB.get_filepath(token, "label_2", root=self.root),
                  "r") as f:
            for line in f:
                # Parse this line into box information.
                parsed_line = self.parse_label_line(line)

                if parsed_line["name"] in {"DontCare", "Misc"}:
                    continue

                center = parsed_line["xyz_camera"]
                wlh = parsed_line["wlh"]
                yaw_camera = parsed_line["yaw_camera"]
                name = parsed_line["name"]
                score = parsed_line["score"]

                # Optional: Filter classes.
                if filter_classes is not None and name not in filter_classes:
                    continue

                # The Box class coord system is oriented the same way as as KITTI LIDAR: x forward, y left, z up.
                # For orientation confer: http://www.cvlibs.net/datasets/kitti/setup.php.

                # 1: Create box in Box coordinate system with center at origin.
                # The second quaternion in yaw_box transforms the coordinate frame from the object frame
                # to KITTI camera frame. The equivalent cannot be naively done afterwards, as it's a rotation
                # around the local object coordinate frame, rather than the camera frame.
                quat_box = Quaternion(axis=(0, 1, 0),
                                      angle=yaw_camera) * Quaternion(
                                          axis=(1, 0, 0), angle=np.pi / 2)
                box = Box([0.0, 0.0, 0.0], wlh, quat_box, name=name)

                # 2: Translate: KITTI defines the box center as the bottom center of the vehicle. We use true center,
                # so we need to add half height in negative y direction, (since y points downwards), to adjust. The
                # center is already given in camera coord system.
                box.translate(center + np.array([0, -wlh[2] / 2, 0]))

                # 3: Transform to KITTI LIDAR coord system. First transform from rectified camera to camera, then
                # camera to KITTI lidar.

                box.rotate(Quaternion(matrix=transforms["r0_rect"]).inverse)
                box.translate(-transforms["velo_to_cam"]["T"])
                box.rotate(
                    Quaternion(matrix=transforms["velo_to_cam"]["R"]).inverse)
                # 4: Transform to nuScenes LIDAR coord system.
                box.rotate(self.kitti_to_nu_lidar)

                # Set score or NaN.
                box.score = score

                # Set dummy velocity.
                box.velocity = np.array((0.0, 0.0, 0.0))

                # Optional: Filter by max_dist
                if max_dist is not None:
                    dist = np.sqrt(np.sum(box.center[:2]**2))
                    if dist > max_dist:
                        continue

                boxes.append(box)

        return boxes