Example #1
0
def select_annotation_boxes(sample_token, lyftd: LyftDataset, box_vis_level: BoxVisibility = BoxVisibility.ALL,
                            camera_type=['CAM_FRONT', 'CAM_BACK', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT',
                                         'CAM_BACK_LEFT']) -> (str, str, Box):
    """
    Select annotations that is a camera image defined by box_vis_level


    :param sample_token:
    :param box_vis_level:BoxVisbility.ALL or BoxVisibility.ANY
    :param camera_type: a list of camera that the token should be selected from
    :return: yield (sample_token,cam_token, Box)
    """
    sample_record = lyftd.get('sample', sample_token)

    cams = [key for key in sample_record["data"].keys() if "CAM" in key]
    cams = [cam for cam in cams if cam in camera_type]
    for ann_token in sample_record['anns']:
        for cam in cams:
            cam_token = sample_record["data"][cam]
            _, boxes_in_sensor_coord, cam_intrinsic = lyftd.get_sample_data(
                cam_token, box_vis_level=box_vis_level, selected_anntokens=[ann_token]
            )
            if len(boxes_in_sensor_coord) > 0:
                assert len(boxes_in_sensor_coord) == 1
                box_in_world_coord = lyftd.get_box(boxes_in_sensor_coord[0].token)
                yield sample_token, cam_token, box_in_world_coord
Example #2
0
def select_annotation_boxes(sample_token, lyftd: LyftDataset, box_vis_level: BoxVisibility = BoxVisibility.ALL,
                            camera_type=['CAM_FRONT', 'CAM_BACK','CAM_FRONT_LEFT','CAM_FRONT_RIGHT','CAM_BACK_RIGHT','CAM_BACK_LEFT']) -> (str, str, Box):
    """
    Select annotations that is a camera image defined by box_vis_level


    :param sample_token:
    :param box_vis_level:BoxVisbility.ALL or BoxVisibility.ANY
    :param camera_type: a list of camera that the token should be selected from
    :return: yield (sample_token,cam_token, Box)
    """
    sample_record = lyftd.get('sample', sample_token)

    cams = [key for key in sample_record["data"].keys() if "CAM" in key]
    cams = [cam for cam in cams if cam in camera_type]
    for cam in cams:

        # This step selects all the annotations in a camera image that matches box_vis_level
        cam_token = sample_record["data"][cam]
        image_filepath, boxes_in_sensor_coord, cam_intrinsic = lyftd.get_sample_data(
            cam_token, box_vis_level=box_vis_level, selected_anntokens=sample_record['anns']
        )

        sd_record = lyftd.get('sample_data', cam_token)
        img_width = sd_record['width']  # typically 1920
        img_height = sd_record['height']  # typically 1080

        CORNER_NUMBER = 4
        corner_list = np.empty((len(boxes_in_sensor_coord), CORNER_NUMBER))
        for idx, box_in_sensor_coord in enumerate(boxes_in_sensor_coord):
            # For perspective transformation, the normalization should set to be True
            box_corners_on_image = view_points(box_in_sensor_coord.corners(), view=cam_intrinsic, normalize=True)

            corners_2d = get_2d_corners_from_projected_box_coordinates(box_corners_on_image)
            corner_list[idx, :] = corners_2d

        yield image_filepath, cam_token, corner_list, boxes_in_sensor_coord, img_width, img_height
class KittiConverter:
    def __init__(self, store_dir: str = "~/lyft_kitti/train/"):
        """

        Args:
            store_dir: Where to write the KITTI-style annotations.
        """
        self.store_dir = Path(store_dir).expanduser()

        # Create store_dir.
        if not self.store_dir.is_dir():
            self.store_dir.mkdir(parents=True)

    def nuscenes_gt_to_kitti(
        self,
        lyft_dataroot: str,
        table_folder: str,
        store_dataroot: str,
        lidar_name: str = "LIDAR_TOP",
        get_all_detections: bool = False,
        parallel_n_jobs: int = 8,
        samples_count: Optional[int] = None,
    ) -> None:
        """Converts nuScenes GT formatted annotations to KITTI format.
        Args:
            lyft_dataroot: folder with tables (json files).
            table_folder: folder with tables (json files).
            lidar_name: Name of the lidar sensor.
                Only one lidar allowed at this moment.
            get_all_detections: If True, will write all
                bboxes in PointCloud and use only FrontCamera.
            parallel_n_jobs: Number of threads to parralel processing.
            samples_count: Number of samples to convert.
        """
        self.lyft_dataroot = lyft_dataroot
        self.table_folder = table_folder
        self.lidar_name = lidar_name
        self.get_all_detections = get_all_detections
        self.samples_count = samples_count
        self.parallel_n_jobs = parallel_n_jobs
        self.store_dir = Path(store_dataroot)

        if not self.store_dir.is_dir():
            self.store_dir.mkdir(parents=True)

        # Select subset of the data to look at.
        self.lyft_ds = LyftDataset(self.lyft_dataroot, self.table_folder)

        self.kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi)
        self.kitti_to_nu_lidar_inv = self.kitti_to_nu_lidar.inverse

        # Get assignment of scenes to splits.
        split_logs = [
            self.lyft_ds.get("log", scene["log_token"])["logfile"]
            for scene in self.lyft_ds.scene
        ]
        if self.get_all_detections:
            self.cams_to_see = ["CAM_FRONT"]
        else:
            self.cams_to_see = [
                "CAM_FRONT",
                "CAM_FRONT_LEFT",
                "CAM_FRONT_RIGHT",
                "CAM_BACK",
                "CAM_BACK_LEFT",
                "CAM_BACK_RIGHT",
            ]

        # Create output folders.
        self.label_folder = self.store_dir.joinpath("label_2")
        self.calib_folder = self.store_dir.joinpath("calib")
        self.image_folder = self.store_dir.joinpath("image_2")
        self.lidar_folder = self.store_dir.joinpath("velodyne")
        for folder in [
                self.label_folder, self.calib_folder, self.image_folder,
                self.lidar_folder
        ]:
            if not folder.is_dir():
                folder.mkdir(parents=True)

        # Use only the samples from the current split.
        sample_tokens = self._split_to_samples(split_logs)
        if self.samples_count is not None:
            sample_tokens = sample_tokens[:self.samples_count]

        with parallel_backend("threading", n_jobs=self.parallel_n_jobs):
            Parallel()(delayed(self.process_token_to_kitti)(sample_token)
                       for sample_token in tqdm(sample_tokens))

    def process_token_to_kitti(self, sample_token: str) -> None:
        # Get sample data.
        sample = self.lyft_ds.get("sample", sample_token)
        sample_annotation_tokens = sample["anns"]

        lidar_token = sample["data"][self.lidar_name]
        sd_record_lid = self.lyft_ds.get("sample_data", lidar_token)
        cs_record_lid = self.lyft_ds.get(
            "calibrated_sensor", sd_record_lid["calibrated_sensor_token"])
        for cam_name in self.cams_to_see:
            cam_front_token = sample["data"][cam_name]
            if self.get_all_detections:
                token_to_write = sample_token
            else:
                token_to_write = cam_front_token

            # Retrieve sensor records.
            sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token)
            cs_record_cam = self.lyft_ds.get(
                "calibrated_sensor", sd_record_cam["calibrated_sensor_token"])
            cam_height = sd_record_cam["height"]
            cam_width = sd_record_cam["width"]
            imsize = (cam_width, cam_height)

            # Combine transformations and convert to KITTI format.
            # Note: cam uses same conventions in KITTI and nuScenes.
            lid_to_ego = transform_matrix(cs_record_lid["translation"],
                                          Quaternion(
                                              cs_record_lid["rotation"]),
                                          inverse=False)
            ego_to_cam = transform_matrix(cs_record_cam["translation"],
                                          Quaternion(
                                              cs_record_cam["rotation"]),
                                          inverse=True)
            velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

            # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
            velo_to_cam_kitti = np.dot(
                velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix)

            # Currently not used.
            imu_to_velo_kitti = np.zeros((3, 4))  # Dummy values.
            r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

            # Projection matrix.
            p_left_kitti = np.zeros((3, 4))
            # Cameras are always rectified.
            p_left_kitti[:3, :3] = cs_record_cam["camera_intrinsic"]

            # Create KITTI style transforms.
            velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
            velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

            # Check that the rotation has the same format as in KITTI.
            if self.lyft_ds.get(
                    "sensor",
                    cs_record_cam["sensor_token"])["channel"] == "CAM_FRONT":
                expected_kitti_velo_to_cam_rot = np.array([[0, -1, 0],
                                                           [0, 0, -1],
                                                           [1, 0, 0]])
                assert (
                    velo_to_cam_rot.round(0) == expected_kitti_velo_to_cam_rot
                ).all(), velo_to_cam_rot.round(0)
            assert (velo_to_cam_trans[1:3] < 0).all()

            # Retrieve the token from the lidar.
            # Note that this may be confusing as the filename of the camera will
            # include the timestamp of the lidar,
            # not the camera.
            filename_cam_full = sd_record_cam["filename"]
            filename_lid_full = sd_record_lid["filename"]

            # Convert image (jpg to png).
            src_im_path = self.lyft_ds.data_path.joinpath(filename_cam_full)
            dst_im_path = self.image_folder.joinpath(f"{token_to_write}.png")
            if not dst_im_path.exists():
                im = Image.open(src_im_path)
                im.save(dst_im_path, "PNG")

            # Convert lidar.
            # Note that we are only using a single sweep, instead of the commonly used n sweeps.
            src_lid_path = self.lyft_ds.data_path.joinpath(filename_lid_full)
            dst_lid_path = self.lidar_folder.joinpath(f"{token_to_write}.bin")

            pcl = LidarPointCloud.from_file(Path(src_lid_path))
            # In KITTI lidar frame.
            pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix)
            with open(dst_lid_path, "w") as lid_file:
                pcl.points.T.tofile(lid_file)

            # Add to tokens.
            # tokens.append(token_to_write)

            # Create calibration file.
            kitti_transforms = dict()
            kitti_transforms["P0"] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms["P1"] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms["P2"] = p_left_kitti  # Left camera transform.
            kitti_transforms["P3"] = np.zeros((3, 4))  # Dummy values.
            # Cameras are already rectified.
            kitti_transforms["R0_rect"] = r0_rect.rotation_matrix
            kitti_transforms["Tr_velo_to_cam"] = np.hstack(
                (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1)))
            kitti_transforms["Tr_imu_to_velo"] = imu_to_velo_kitti
            calib_path = self.calib_folder.joinpath(f"{token_to_write}.txt")

            with open(calib_path, "w") as calib_file:
                for (key, val) in kitti_transforms.items():
                    val = val.flatten()
                    val_str = "%.12e" % val[0]
                    for v in val[1:]:
                        val_str += " %.12e" % v
                    calib_file.write("%s: %s\n" % (key, val_str))

            # Write label file.
            label_path = self.label_folder.joinpath(f"{token_to_write}.txt")
            if label_path.exists():
                print("Skipping existing file: %s" % label_path)
                continue
            with open(label_path, "w") as label_file:
                for sample_annotation_token in sample_annotation_tokens:
                    sample_annotation = self.lyft_ds.get(
                        "sample_annotation", sample_annotation_token)

                    # Get box in LIDAR frame.
                    _, box_lidar_nusc, _ = self.lyft_ds.get_sample_data(
                        lidar_token,
                        box_vis_level=BoxVisibility.NONE,
                        selected_anntokens=[sample_annotation_token])
                    box_lidar_nusc = box_lidar_nusc[0]

                    # Truncated: Set all objects to 0 which means untruncated.
                    truncated = 0.0

                    # Occluded: Set all objects to full visibility as this information is
                    # not available in nuScenes.
                    occluded = 0

                    detection_name = sample_annotation["category_name"]

                    # Convert from nuScenes to KITTI box format.
                    box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                        box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                        velo_to_cam_trans, r0_rect)

                    # Project 3d box to 2d box in image, ignore box if it does not fall inside.
                    bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti,
                                                                 p_left_kitti,
                                                                 imsize=imsize)
                    if bbox_2d is None and not self.get_all_detections:
                        continue
                    elif bbox_2d is None and self.get_all_detections:
                        # default KITTI bbox
                        bbox_2d = (-1.0, -1.0, -1.0, -1.0)

                    # Set dummy score so we can use this file as result.
                    box_cam_kitti.score = 0

                    # Convert box to output string format.
                    output = KittiDB.box_to_string(
                        name=detection_name,
                        box=box_cam_kitti,
                        bbox_2d=bbox_2d,
                        truncation=truncated,
                        occlusion=occluded,
                    )

                    # Write to disk.
                    label_file.write(output + "\n")

    def render_kitti(self,
                     render_2d: bool = False,
                     store_dataroot: str = "~/lyft_kitti/train/"):
        """Renders the annotations in the KITTI dataset from a lidar and a camera view.
        Args:
            render_2d: Whether to render 2d boxes (only works for camera data).
        Returns:
        """
        if render_2d:
            print("Rendering 2d boxes from KITTI format")
        else:
            print("Rendering 3d boxes projected from 3d KITTI format")

        self.store_dir = Path(store_dataroot)

        # Load the KITTI dataset.
        kitti = KittiDB(root=self.store_dir)

        # Create output folder.
        render_dir = self.store_dir.joinpath("render")
        if not render_dir.is_dir():
            render_dir.mkdir(parents=True)

        # Render each image.
        tokens = kitti.tokens
        i = 0

        # currently supports only single thread processing
        for token in tqdm(tokens):

            for sensor in ["lidar", "camera"]:
                out_path = render_dir.joinpath(f"{token}_{sensor}.png")
                kitti.render_sample_data(token,
                                         sensor_modality=sensor,
                                         out_path=out_path,
                                         render_2d=render_2d)
                # Close the windows to avoid a warning of too many open windows.
                plt.close()

            if (i > 10):
                break
            i += 1

    def _split_to_samples(self, split_logs: List[str]) -> List[str]:
        """Convenience function to get the samples in a particular split.

        Args:
            split_logs: A list of the log names in this split.

        Returns: The list of samples.

        """
        samples = []
        for sample in self.lyft_ds.sample:
            scene = self.lyft_ds.get("scene", sample["scene_token"])
            log = self.lyft_ds.get("log", scene["log_token"])
            logfile = log["logfile"]
            if logfile in split_logs:
                samples.append(sample["token"])
        return samples
Example #4
0
def render_sample_data(sample_data_token: str,
                       with_anns: bool = True,
                       box_vis_level: BoxVisibility = BoxVisibility.ANY,
                       axes_limit: float = 40,
                       ax: Axes = None,
                       num_sweeps: int = 1,
                       out_path: str = None,
                       underlay_map: bool = False,
                       detections: list = [],
                       categories: list = [],
                       valLyft: LyftDataset = None):
    """Render sample data onto axis.

    Args:
        sample_data_token: Sample_data token.
        with_anns: Whether to draw annotations.
        box_vis_level: If sample_data is an image, this sets required visibility for boxes.
        axes_limit: Axes limit for lidar and radar (measured in meters).
        ax: Axes onto which to render.
        num_sweeps: Number of sweeps for lidar and radar.
        out_path: Optional path to save the rendered figure to disk.
        underlay_map: When set to true, LIDAR data is plotted onto the map. This can be slow.

    """

    # Get sensor modality.
    sd_record = valLyft.get("sample_data", sample_data_token)
    sensor_modality = sd_record["sensor_modality"]

    if sensor_modality == "camera":
        # Load boxes and image.
        data_path, _, camera_intrinsic = valLyft.get_sample_data(
            sample_data_token, box_vis_level=box_vis_level)
        data = Image.open(data_path)

        # Init axes.
        if ax is None:
            _, ax = plt.subplots(1, 1, figsize=(9, 16))

        # Show image.
        ax.imshow(data)
        #categories = ['car', 'pedestrian', 'truck', 'bicycle', 'bus', 'other_vehicle', 'motorcycle', 'emergency_vehicle', 'animal']
        # Show boxes.
        if with_anns:
            boxes = []
            for c1, detection in enumerate(detections):
                #print(categories)
                cat = categories[c1]
                #print(cat)
                #import pdb; pdb.set_trace()
                box = Box(detection[:3],
                          detection[3:6],
                          Quaternion(np.array(detection[6:10])),
                          name=cat)
                boxes.append(box)
            for box in boxes:
                c = np.array(get_color(box.name)) / 255.0
                box.render(ax,
                           view=camera_intrinsic,
                           normalize=True,
                           colors=(c, c, c))

        # Limit visible range.
        ax.set_xlim(0, data.size[0])
        ax.set_ylim(data.size[1], 0)

    ax.axis("off")
    ax.set_title(sd_record["channel"])
    ax.set_aspect("equal")

    if out_path is not None:
        num = len([name for name in os.listdir(out_path)])
        out_path = out_path + str(num).zfill(
            5) + "_" + sample_data_token + ".png"
        plt.savefig(out_path)
        plt.close("all")
        return out_path
    lyft_data.render_sample_3d_interactive(sample['token'])

# Sample data
sample_data = sample['data']
# print("sample data keys are, ", sample_data.keys())

# One sensor in sample data
# lidar: type 1
lidar_top_channel = 'LIDAR_TOP'
lidar_data = sample_data[lidar_top_channel]
lidar_token = lyft_data.get('sample_data', lidar_data)
# print("lidar data keys: \n", lidar_data.keys())
# start = time.time()
# lyft_data.render_sample_data(lidar_data['token'], out_path=cfg.data.image)
# print("lidar render time: {:.3f}".format(time.time() - start))
data_path, boxes, camera_intrinsic = lyft_data.get_sample_data(
    lidar_token['token'])
pc = LidarPointCloud.from_file(data_path)
cs_record = lyft_data.get("calibrated_sensor",
                          lidar_token["calibrated_sensor_token"])
# Points live in the point sensor frame
# Transform the points to the ego vehicle frame
pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix)
pc.translate(np.array(cs_record["translation"]))

# cam: type 2
cam_front_channel = 'CAM_FRONT'
cam_token = sample_data[cam_front_channel]
# start = time.time()
# lyft_data.render_sample_data(cam_token, out_path=cfg.data.image)
# print("image render time: {:.3f}".format(time.time() - start))
cam = lyft_data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
Example #6
0
def convert(dir_output: Path, lidar_range: int, is_train: bool):

    dir_output.mkdir(exist_ok=True)

    dataset_type = 'train' if is_train else 'test'

    list_token = list()
    list_x = list()
    list_y = list()
    list_z = list()
    list_length = list()
    list_width = list()
    list_height = list()
    list_rotate = list()
    list_name = list()

    if is_train:
        dataset_df = pd.read_csv('../../dataset/train.csv')
    else:
        dataset_df = pd.read_csv('../../dataset/sample_submission.csv')

    tokens = dataset_df['Id'].tolist()

    # tokens = tokens[:200]

    list_jobs = list()

    num_core = cpu_count()
    random.shuffle(tokens)
    num_batch = -(-len(tokens) // num_core)

    img_size = 2048

    for c in range(num_core):

        p = Process(target=process_job,
                    args=(dataset_type, dir_output,
                          tokens[num_batch * c:num_batch * (c + 1)],
                          lidar_range, img_size, c))
        list_jobs.append(p)

    for job in list_jobs:
        job.start()

    for job in list_jobs:
        job.join()

    if is_train:

        level5data = LyftDataset(
            data_path='../../dataset/{}'.format(dataset_type),
            json_path='../../dataset/{}/data/'.format(dataset_type),
            verbose=True)

        for token in tqdm(tokens):

            my_sample = level5data.get('sample', token)
            my_sample_data = level5data.get('sample_data',
                                            my_sample['data']['LIDAR_TOP'])

            try:
                _, boxes, _ = level5data.get_sample_data(
                    my_sample_data['token'])
            except Exception:
                print('failed to load: {}'.format(token))
                continue

            for box in boxes:

                x = round((box.center[0] + lidar_range) * (img_size - 1) /
                          (lidar_range * 2))
                y = round((box.center[1] + lidar_range) * (img_size - 1) /
                          (lidar_range * 2))
                z = round((box.center[2] + lidar_range) * (img_size - 1) /
                          (lidar_range * 2))

                rotate = -box.orientation.yaw_pitch_roll[0]

                if rotate > math.pi / 2:
                    rotate -= math.pi
                if rotate < -math.pi / 2:
                    rotate += math.pi

                width = box.wlh[0] * (img_size - 1) / (lidar_range * 2)
                length = box.wlh[1] * (img_size - 1) / (lidar_range * 2)
                height = box.wlh[2] * (img_size - 1) / (lidar_range * 2)

                list_token.append(token)
                list_x.append(x)
                list_y.append(y)
                list_z.append(z)
                list_length.append(length)
                list_width.append(width)
                list_height.append(height)
                list_rotate.append(rotate)
                list_name.append(box.name)

        result = pd.DataFrame()
        result['token'] = list_token
        result['x'] = list_x
        result['y'] = list_y
        result['z'] = list_z
        result['length'] = list_length
        result['width'] = list_width
        result['height'] = list_height
        result['rotate'] = list_rotate
        result['name'] = list_name

        result.to_csv(dir_output.parent / 'coordinates.csv',
                      index=False,
                      float_format='%.4f')