Exemple #1
0
def generateXYZRGB(ds: LyftDataset, sample_token: str, config: dict,
                   folder: str):

    sample = ds.get("sample", sample_token)

    sample_lidar_token = sample["data"]["LIDAR_TOP"]
    lidar_data = ds.get("sample_data", sample_lidar_token)
    lidar_filepath = ds.get_sample_data_path(sample_lidar_token)

    ego_pose = ds.get("ego_pose", lidar_data["ego_pose_token"])
    calibrated_sensor = ds.get("calibrated_sensor",
                               lidar_data["calibrated_sensor_token"])

    global_from_car = transform_matrix(ego_pose['translation'],
                                       Quaternion(ego_pose['rotation']),
                                       inverse=False)

    car_from_sensor = transform_matrix(calibrated_sensor['translation'],
                                       Quaternion(
                                           calibrated_sensor['rotation']),
                                       inverse=False)

    try:
        lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)
        pc = copy.deepcopy(lidar_pointcloud)
        lidar_pointcloud.transform(car_from_sensor)

    except Exception as e:
        print("Failed to load Lidar Pointcloud for {}: {}:".format(
            sample_token, e))
        pass

    XYZRGB = getXYZRGB(ds, pc, sample, lidar_data)
    np.savez(folder + sample_lidar_token + ".xyzrgb")
def get_sensor_to_world_transform_matrix_from_sample_data_token(sample_data_token, lyftd: LyftDataset):
    sd_record = lyftd.get("sample_data", sample_data_token)
    cs_record = lyftd.get("calibrated_sensor", sd_record["calibrated_sensor_token"])
    sensor_record = lyftd.get("sensor", cs_record["sensor_token"])
    pose_record = lyftd.get("ego_pose", sd_record["ego_pose_token"])

    sensor_to_ego_mtx = transform_matrix(translation=np.array(cs_record["translation"]),
                                         rotation=Quaternion(cs_record["rotation"]))

    ego_to_world_mtx = transform_matrix(translation=np.array(pose_record["translation"]),
                                        rotation=Quaternion(pose_record["rotation"]))

    return np.dot(ego_to_world_mtx, sensor_to_ego_mtx)
Exemple #3
0
    def __getitem__(self, item):
        sample_idx = item // len(CAMS)
        sample = self.lyft_data.sample[sample_idx]

        sd_lidar = self.lyft_data.get('sample_data',
                                      sample['data']['LIDAR_TOP'])
        cs_lidar = self.lyft_data.get('calibrated_sensor',
                                      sd_lidar['calibrated_sensor_token'])

        pc = LidarPointCloud.from_file(self.lyft_data.data_path /
                                       sd_lidar['filename'])

        cam = CAMS[item % len(CAMS)]
        cam_token = sample['data'][cam]
        sd_cam = self.lyft_data.get('sample_data', cam_token)
        cs_cam = self.lyft_data.get('calibrated_sensor',
                                    sd_cam['calibrated_sensor_token'])

        img = Image.open(str(self.lyft_data.data_path / cam["filename"]))
        img = transform(img)

        lidar_2_ego = transform_matrix(cs_lidar['translation'],
                                       Quaternion(cs_lidar['rotation']),
                                       inverse=False)
        ego_2_cam = transform_matrix(cs_cam['translation'],
                                     Quaternion(cs_cam['rotation']),
                                     inverse=True)
        cam_2_bev = Quaternion(axis=[1, 0, 0],
                               angle=-np.pi / 2).transformation_matrix
        # lidar_2_cam = ego_2_cam @ lidar_2_ego
        lidar_2_bevcam = cam_2_bev @ ego_2_cam @ lidar_2_ego

        points = view_points(pc.points[:3, :], lidar_2_bevcam, normalize=False)
        points = clip_points(points)

        points = pd.DataFrame(np.swapaxes(points, 0, 1),
                              columns=['x', 'y', 'z'])
        points = PyntCloud(points)
        voxelgrid_id = points.add_structure("voxelgrid",
                                            size_x=0.1,
                                            size_y=0.1,
                                            size_z=0.2,
                                            regular_bounding_box=False)
        voxelgrid = points.structures[voxelgrid_id]

        occ_map = voxelgrid.get_feature_vector(mode='binary')
        padded_occ = np.zeros((800, 700, 30))
        padded_occ[:occ_map.shape[0], :occ_map.shape[1], :occ_map.
                   shape[2]] = occ_map

        return img, padded_occ
Exemple #4
0
def prepare_training_data_for_scene(level5data, first_sample_token, output_folder, bev_shape, voxel_size, z_offset, box_scale, classes):
    """
    Given a first sample token (in a scene), output rasterized input volumes and targets in birds-eye-view perspective.

    """
    sample_token = first_sample_token

    while sample_token:

        sample = level5data.get("sample", sample_token)

        sample_lidar_token = sample["data"]["LIDAR_TOP"]
        lidar_data = level5data.get("sample_data", sample_lidar_token)
        lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)

        ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
        calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])

        global_from_car = transform_matrix(ego_pose['translation'],
                                           Quaternion(ego_pose['rotation']), inverse=False)

        car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']),
                                           inverse=False)

        try:
            lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)
            lidar_pointcloud.transform(car_from_sensor)
        except Exception as e:
            print("Failed to load Lidar Pointcloud for {}: {}:".format(sample_token, e))
            sample_token = sample["next"]
            continue

        bev = create_voxel_pointcloud(lidar_pointcloud.points, bev_shape, voxel_size=voxel_size, z_offset=z_offset)
        bev = normalize_voxel_intensities(bev)

        boxes = level5data.get_boxes(sample_lidar_token)

        target = np.zeros_like(bev)

        move_boxes_to_car_space(boxes, ego_pose)
        scale_boxes(boxes, box_scale)
        draw_boxes(target, voxel_size, boxes=boxes, classes=classes, z_offset=z_offset)

        bev_im = np.round(bev * 255).astype(np.uint8)
        target_im = target[:, :, 0]  # take one channel only

        cv2.imwrite(os.path.join(output_folder, "{}_input.png".format(sample_token)), bev_im)
        cv2.imwrite(os.path.join(output_folder, "{}_target.png".format(sample_token)), target_im)

        sample_token = sample["next"]
Exemple #5
0
    def load_position_raw(self, sample_lidar_token):
        lidar_data = self.level5data.get("sample_data", sample_lidar_token)

        ego_pose = self.level5data.get("ego_pose", lidar_data["ego_pose_token"])
        calibrated_sensor = self.level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])

        # Homogeneous transformation matrix from car frame to world frame.
        global_from_car = transform_matrix(ego_pose['translation'],
                                        Quaternion(ego_pose['rotation']), inverse=False)

        return global_from_car
def prepare_test_data_for_scene(first_sample_token, output_folder, level5data, bev_shape, voxel_size, z_offset, box_scale):
    """
    Given a first sample token (in a scene), output rasterized input volumes and targets in birds-eye-view perspective.

    """
    # get the first sample tken for the scene
    sample_token = first_sample_token
    
    while sample_token: 
        sample = level5data.get("sample", sample_token)
        sample_lidar_token = sample["data"]["LIDAR_TOP"]
        lidar_data = level5data.get("sample_data", sample_lidar_token)
        lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)

        ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
        calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])


        global_from_car = transform_matrix(ego_pose['translation'],
                                           Quaternion(ego_pose['rotation']), inverse=False)

        car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']),
                                            inverse=False)

        try:
            lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)
            lidar_pointcloud.transform(car_from_sensor)
        except Exception as e:
            print ("Failed to load Lidar Pointcloud for {}: {}:".format(sample_token, e))
            sample_token = sample["next"]
            continue
        
        bev = create_voxel_pointcloud(lidar_pointcloud.points, bev_shape, voxel_size=voxel_size, z_offset=z_offset)
        bev = normalize_voxel_intensities(bev)
        bev_im = np.round(bev*255).astype(np.uint8)
        cv2.imwrite(os.path.join(output_folder, "{}_input.png".format(sample_token)), bev_im)
        
        # go to the next sample token
        sample_token = sample["next"]
Exemple #7
0
    def load_cloud_raw(self, sample_lidar_token):  
        lidar_data = self.level5data.get("sample_data", sample_lidar_token)  
        lidar_filepath = self.level5data.get_sample_data_path(sample_lidar_token)
        ego_pose = self.level5data.get("ego_pose", lidar_data["ego_pose_token"])
        calibrated_sensor = self.level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])

        # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
        car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']),
                                            inverse=False)

        lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)

        # The lidar pointcloud is defined in the sensor's reference frame.
        # We want it in the car's reference frame, so we transform each point
        lidar_pointcloud.transform(car_from_sensor)

        return lidar_pointcloud.points[:3,:].T    
Exemple #8
0
def make_box_db(train_df,l5d):
    
    first_samples = train_df.first_sample_token.values
    box_db = {}
    box_scale = cfg.DATA.BOX_SCALE
    num_sweeps = cfg.DATA.NUM_SWEEPS
    min_distance = cfg.DATA.MIN_DISTANCE

    for sample_token in tqdm(first_samples):
        
        while sample_token:

            sample = l5d.get('sample',sample_token)
            sample_lidar_token = sample['data']['LIDAR_TOP']
            lidar_data = l5d.get('sample_data',sample_lidar_token)
            
            ego_pose = l5d.get("ego_pose", lidar_data["ego_pose_token"])
            calibrated_sensor = l5d.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
            car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']),
                                                inverse=False)

            try:
                lidar_pointcloud = LidarPointCloud.from_file_multisweep(l5d,sample,'LIDAR_TOP','LIDAR_TOP',num_sweeps=num_sweeps,min_distance=min_distance)[0]
                lidar_pointcloud.transform(car_from_sensor)
            except Exception as e:
                print ("Failed to load Lidar Pointcloud for {}: {}:".format(sample_token, e))
                sample_token = sample["next"]
                continue


            boxes = l5d.get_boxes(sample_lidar_token)
            move_boxes_to_car_space(boxes, ego_pose)
            scale_boxes(boxes, box_scale)
            for box in boxes:
                point_mask = points_in_box(box,lidar_pointcloud.points[:3,:])
                box_points = lidar_pointcloud.points[:,point_mask]
                if box.name not in box_db:
                    box_db[box.name] = [{'lidar':box_points,'box':box}]
                else:
                    box_db[box.name].append({'lidar':box_points,'box':box})

            sample_token = sample['next']
        
    pickle.dump(box_db,open(cfg.DATA.BOX_DB_FILE,'wb'))
def get_pointcloud(sample_token, level5data):  
    """Get sample of lidar point cloud
    Transform it to car coordinates
    """
    sample = level5data.get("sample", sample_token)
    sample_lidar_token = sample["data"]["LIDAR_TOP"]
    lidar_data = level5data.get("sample_data", sample_lidar_token)
    lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)
    lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)

    # sensor (lidar) token
    calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])

    # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
    car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']),
                                        inverse=False)
    # The lidar pointcloud is defined in the sensor's reference frame.
    # We want it in the car's reference frame, so we transform each point
    lidar_pointcloud.transform(car_from_sensor)

    return lidar_pointcloud
    def process_token_to_kitti(self, sample_token: str) -> None:
        # Get sample data.
        sample = self.lyft_ds.get("sample", sample_token)
        sample_annotation_tokens = sample["anns"]

        lidar_token = sample["data"][self.lidar_name]
        sd_record_lid = self.lyft_ds.get("sample_data", lidar_token)
        cs_record_lid = self.lyft_ds.get(
            "calibrated_sensor", sd_record_lid["calibrated_sensor_token"])
        for cam_name in self.cams_to_see:
            cam_front_token = sample["data"][cam_name]
            if self.get_all_detections:
                token_to_write = sample_token
            else:
                token_to_write = cam_front_token

            # Retrieve sensor records.
            sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token)
            cs_record_cam = self.lyft_ds.get(
                "calibrated_sensor", sd_record_cam["calibrated_sensor_token"])
            cam_height = sd_record_cam["height"]
            cam_width = sd_record_cam["width"]
            imsize = (cam_width, cam_height)

            # Combine transformations and convert to KITTI format.
            # Note: cam uses same conventions in KITTI and nuScenes.
            lid_to_ego = transform_matrix(cs_record_lid["translation"],
                                          Quaternion(
                                              cs_record_lid["rotation"]),
                                          inverse=False)
            ego_to_cam = transform_matrix(cs_record_cam["translation"],
                                          Quaternion(
                                              cs_record_cam["rotation"]),
                                          inverse=True)
            velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

            # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
            velo_to_cam_kitti = np.dot(
                velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix)

            # Currently not used.
            imu_to_velo_kitti = np.zeros((3, 4))  # Dummy values.
            r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

            # Projection matrix.
            p_left_kitti = np.zeros((3, 4))
            # Cameras are always rectified.
            p_left_kitti[:3, :3] = cs_record_cam["camera_intrinsic"]

            # Create KITTI style transforms.
            velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
            velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

            # Check that the rotation has the same format as in KITTI.
            if self.lyft_ds.get(
                    "sensor",
                    cs_record_cam["sensor_token"])["channel"] == "CAM_FRONT":
                expected_kitti_velo_to_cam_rot = np.array([[0, -1, 0],
                                                           [0, 0, -1],
                                                           [1, 0, 0]])
                assert (
                    velo_to_cam_rot.round(0) == expected_kitti_velo_to_cam_rot
                ).all(), velo_to_cam_rot.round(0)
            assert (velo_to_cam_trans[1:3] < 0).all()

            # Retrieve the token from the lidar.
            # Note that this may be confusing as the filename of the camera will
            # include the timestamp of the lidar,
            # not the camera.
            filename_cam_full = sd_record_cam["filename"]
            filename_lid_full = sd_record_lid["filename"]

            # Convert image (jpg to png).
            src_im_path = self.lyft_ds.data_path.joinpath(filename_cam_full)
            dst_im_path = self.image_folder.joinpath(f"{token_to_write}.png")
            if not dst_im_path.exists():
                im = Image.open(src_im_path)
                im.save(dst_im_path, "PNG")

            # Convert lidar.
            # Note that we are only using a single sweep, instead of the commonly used n sweeps.
            src_lid_path = self.lyft_ds.data_path.joinpath(filename_lid_full)
            dst_lid_path = self.lidar_folder.joinpath(f"{token_to_write}.bin")

            pcl = LidarPointCloud.from_file(Path(src_lid_path))
            # In KITTI lidar frame.
            pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix)
            with open(dst_lid_path, "w") as lid_file:
                pcl.points.T.tofile(lid_file)

            # Add to tokens.
            # tokens.append(token_to_write)

            # Create calibration file.
            kitti_transforms = dict()
            kitti_transforms["P0"] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms["P1"] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms["P2"] = p_left_kitti  # Left camera transform.
            kitti_transforms["P3"] = np.zeros((3, 4))  # Dummy values.
            # Cameras are already rectified.
            kitti_transforms["R0_rect"] = r0_rect.rotation_matrix
            kitti_transforms["Tr_velo_to_cam"] = np.hstack(
                (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1)))
            kitti_transforms["Tr_imu_to_velo"] = imu_to_velo_kitti
            calib_path = self.calib_folder.joinpath(f"{token_to_write}.txt")

            with open(calib_path, "w") as calib_file:
                for (key, val) in kitti_transforms.items():
                    val = val.flatten()
                    val_str = "%.12e" % val[0]
                    for v in val[1:]:
                        val_str += " %.12e" % v
                    calib_file.write("%s: %s\n" % (key, val_str))

            # Write label file.
            label_path = self.label_folder.joinpath(f"{token_to_write}.txt")
            if label_path.exists():
                print("Skipping existing file: %s" % label_path)
                continue
            with open(label_path, "w") as label_file:
                for sample_annotation_token in sample_annotation_tokens:
                    sample_annotation = self.lyft_ds.get(
                        "sample_annotation", sample_annotation_token)

                    # Get box in LIDAR frame.
                    _, box_lidar_nusc, _ = self.lyft_ds.get_sample_data(
                        lidar_token,
                        box_vis_level=BoxVisibility.NONE,
                        selected_anntokens=[sample_annotation_token])
                    box_lidar_nusc = box_lidar_nusc[0]

                    # Truncated: Set all objects to 0 which means untruncated.
                    truncated = 0.0

                    # Occluded: Set all objects to full visibility as this information is
                    # not available in nuScenes.
                    occluded = 0

                    detection_name = sample_annotation["category_name"]

                    # Convert from nuScenes to KITTI box format.
                    box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                        box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                        velo_to_cam_trans, r0_rect)

                    # Project 3d box to 2d box in image, ignore box if it does not fall inside.
                    bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti,
                                                                 p_left_kitti,
                                                                 imsize=imsize)
                    if bbox_2d is None and not self.get_all_detections:
                        continue
                    elif bbox_2d is None and self.get_all_detections:
                        # default KITTI bbox
                        bbox_2d = (-1.0, -1.0, -1.0, -1.0)

                    # Set dummy score so we can use this file as result.
                    box_cam_kitti.score = 0

                    # Convert box to output string format.
                    output = KittiDB.box_to_string(
                        name=detection_name,
                        box=box_cam_kitti,
                        bbox_2d=bbox_2d,
                        truncation=truncated,
                        occlusion=occluded,
                    )

                    # Write to disk.
                    label_file.write(output + "\n")
Exemple #11
0
def fill_trainval_infos(data_path, lyft, train_scenes, val_scenes, test=False):
    train_lyft_infos = []
    val_lyft_infos = []
    progress_bar = tqdm.tqdm(total=len(lyft.sample),
                             desc='create_info',
                             dynamic_ncols=True)

    ref_chans = ["LIDAR_TOP", "LIDAR_FRONT_LEFT", "LIDAR_FRONT_RIGHT"]

    for index, sample in enumerate(lyft.sample):
        progress_bar.update()

        ref_info = {}
        for ref_chan in ref_chans:
            if ref_chan not in sample["data"]:
                continue
            ref_sd_token = sample["data"][ref_chan]
            ref_sd_rec = lyft.get("sample_data", ref_sd_token)
            ref_cs_token = ref_sd_rec["calibrated_sensor_token"]
            ref_cs_rec = lyft.get("calibrated_sensor", ref_cs_token)

            ref_to_car = transform_matrix(
                ref_cs_rec["translation"],
                Quaternion(ref_cs_rec["rotation"]),
                inverse=False,
            )

            ref_from_car = transform_matrix(
                ref_cs_rec["translation"],
                Quaternion(ref_cs_rec["rotation"]),
                inverse=True,
            )

            ref_lidar_path = lyft.get_sample_data_path(ref_sd_token)

            ref_info[ref_chan] = {
                "lidar_path":
                Path(ref_lidar_path).relative_to(data_path).__str__(),
                "ref_from_car": ref_from_car,
                "ref_to_car": ref_to_car,
            }

            if ref_chan == "LIDAR_TOP":
                ref_boxes, ref_pose_rec = get_sample_data(lyft, ref_sd_token)
                ref_time = 1e-6 * ref_sd_rec["timestamp"]
                car_from_global = transform_matrix(
                    ref_pose_rec["translation"],
                    Quaternion(ref_pose_rec["rotation"]),
                    inverse=True,
                )

        info = {
            "ref_info": ref_info,
            'token': sample['token'],
            'car_from_global': car_from_global,
            'timestamp': ref_time,
        }

        if not test:
            annotations = [
                lyft.get("sample_annotation", token)
                for token in sample["anns"]
            ]

            locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3)
            dims = np.array([b.wlh for b in ref_boxes]).reshape(-1,
                                                                3)[:,
                                                                   [1, 0, 2]]
            rots = np.array([quaternion_yaw(b.orientation)
                             for b in ref_boxes]).reshape(-1, 1)
            velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3)
            names = np.array([b.name for b in ref_boxes])
            tokens = np.array([b.token for b in ref_boxes]).reshape(-1, 1)
            gt_boxes = np.concatenate([locs, dims, rots], axis=1)

            assert len(annotations) == len(gt_boxes)

            info["gt_boxes"] = gt_boxes
            info["gt_boxes_velocity"] = velocity
            info["gt_names"] = names
            info["gt_boxes_token"] = tokens

        if sample["scene_token"] in train_scenes:
            train_lyft_infos.append(info)
        else:
            val_lyft_infos.append(info)

    progress_bar.close()
    return train_lyft_infos, val_lyft_infos
lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)

print(lidar_filepath)
print(os.path.exists(lidar_filepath))

# 在ego_pose 表中拿到 lidar_data 对应的ego_pose
ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])

# 拿到calibrated_sensor 这些token 都可以在https://www.kaggle.com/c/3d-object-detection-for-autonomous-vehicles/discussion/110257#latest-636505的sample_data.json里面找到
calibrated_sensor = level5data.get("calibrated_sensor",
                                   lidar_data["calibrated_sensor_token"])

# 坐标系的转换 从车坐标系转到世界坐标系
# Homogeneous transformation matrix from car frame to world frame.
global_from_car = transform_matrix(ego_pose['translation'],
                                   Quaternion(ego_pose['rotation']),
                                   inverse=False)
# 从雷达转到车坐标系
# Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
car_from_sensor = transform_matrix(calibrated_sensor['translation'],
                                   Quaternion(calibrated_sensor['rotation']),
                                   inverse=False)

# 拿到的坐标是相机坐标系的坐标  所以不是以车为中心
lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)

# 将相机坐标系 转到 车的坐标系,  点就以原点为中心  可以通过直方图看出来,  X代表在车的X轴上的坐标 Y代表Y轴的坐标, 可以看到XY都以0为中心
# The lidar pointcloud is defined in the sensor's reference frame.
# We want it in the car's reference frame, so we transform each point
lidar_pointcloud.transform(car_from_sensor)
print(lidar_pointcloud.nbr_points())
Exemple #13
0
def _fill_trainval_infos(lyft, train_scenes, val_scenes, test=False):
    from lyft_dataset_sdk.utils.geometry_utils import transform_matrix

    train_lyft_infos = []
    val_lyft_infos = []

    ref_chans = ["LIDAR_TOP", "LIDAR_FRONT_LEFT", "LIDAR_FRONT_RIGHT"]

    for sample in tqdm(lyft.sample):
        ref_info = {}
        for ref_chan in ref_chans:
            if ref_chan not in sample["data"]:
                continue
            ref_sd_token = sample["data"][ref_chan]
            ref_sd_rec = lyft.get("sample_data", ref_sd_token)
            ref_cs_token = ref_sd_rec["calibrated_sensor_token"]
            ref_cs_rec = lyft.get("calibrated_sensor", ref_cs_token)

            ref_to_car = transform_matrix(
                ref_cs_rec["translation"],
                Quaternion(ref_cs_rec["rotation"]),
                inverse=False,
            )

            ref_from_car = transform_matrix(
                ref_cs_rec["translation"],
                Quaternion(ref_cs_rec["rotation"]),
                inverse=True,
            )

            ref_lidar_path = lyft.get_sample_data_path(ref_sd_token)

            ref_info[ref_chan] = {
                "lidar_path": ref_lidar_path,
                "ref_from_car": ref_from_car,
                "ref_to_car": ref_to_car,
            }

            if ref_chan == "LIDAR_TOP":
                ref_boxes, ref_pose_rec = get_sample_data(lyft, ref_sd_token)
                ref_time = 1e-6 * ref_sd_rec["timestamp"]
                car_from_global = transform_matrix(
                    ref_pose_rec["translation"],
                    Quaternion(ref_pose_rec["rotation"]),
                    inverse=True,
                )

        info = {
            "ref_info": ref_info,
            "token": sample["token"],
            "car_from_global": car_from_global,
            "timestamp": ref_time,
        }

        if not test:
            annotations = [
                lyft.get("sample_annotation", token) for token in sample["anns"]
            ]

            locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3)
            dims = np.array([b.wlh for b in ref_boxes]).reshape(-1, 3)
            rots = np.array([quaternion_yaw(b.orientation) for b in ref_boxes]).reshape(
                -1, 1
            )
            velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3)
            names = np.array([b.name for b in ref_boxes])
            tokens = np.array([b.token for b in ref_boxes]).reshape(-1, 1)
            gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)

            assert len(annotations) == len(gt_boxes)

            info["gt_boxes"] = gt_boxes
            info["gt_boxes_velocity"] = velocity
            info["gt_names"] = names
            info["gt_boxes_token"] = tokens

        if sample["scene_token"] in train_scenes:
            train_lyft_infos.append(info)
        else:
            val_lyft_infos.append(info)

    return train_lyft_infos, val_lyft_infos
Exemple #14
0
def make_prediction_boxes(model, dataloader, num_inputs, batch_size,
                          level5data):

    classes = cfg.DATA.CLASSES
    height = cfg.DATA.BEV_SHAPE[0]
    width = cfg.DATA.BEV_SHAPE[1]
    device = cfg.TRAIN.DEVICE
    bev_shape = cfg.DATA.BEV_SHAPE
    voxel_size = cfg.DATA.VOXEL_SIZE
    z_offset = cfg.DATA.Z_OFFSET

    # we quantize to uint8 here to conserve memory. we're allocating >20GB of memory otherwise.
    predictions = np.zeros((num_inputs, 1 + len(classes), height, width),
                           dtype=np.uint8)  # [N,C,H,W]

    sample_tokens = []
    progress_bar = tqdm(dataloader)

    # evaluate samples with loaded model - predictions are gathered in 'predictions'
    with torch.no_grad():
        model.eval()
        for ii, (X, target, batch_sample_tokens) in enumerate(progress_bar):

            offset = ii * batch_size
            sample_tokens.extend(batch_sample_tokens)

            X = X.to(device)  # [N, 1, H, W]
            prediction = model(X)  # [N, 2, H, W]

            prediction = F.softmax(prediction, dim=1)

            prediction_cpu = prediction.cpu().numpy()
            predictions[offset:offset + batch_size] = np.round(
                prediction_cpu * 255).astype(np.uint8)

    predictions_non_class0 = 255 - predictions[:, 0]  # [N,H,W]
    background_threshold = 255 // 2

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    predictions_opened = np.zeros((predictions_non_class0.shape),
                                  dtype=np.uint8)  # [N,H,W]

    for i, p in enumerate(tqdm(predictions_non_class0)):
        thresholded_p = (p > background_threshold).astype(np.uint8)
        predictions_opened[i] = cv2.morphologyEx(thresholded_p, cv2.MORPH_OPEN,
                                                 kernel)  # [H,W]

    detection_boxes = []
    detection_scores = []
    detection_classes = []

    for i in tqdm(range(len(predictions))):
        prediction_opened = predictions_opened[i]  # [H,W]
        probability_non_class0 = predictions_non_class0[i]  # [H,W]
        class_probability = predictions[i]  # [C,H,W]

        sample_boxes = []
        sample_detection_scores = []
        sample_detection_classes = []

        contours, hierarchy = cv2.findContours(prediction_opened,
                                               cv2.RETR_EXTERNAL,
                                               cv2.CHAIN_APPROX_NONE)

        for cnt in contours:
            rect = cv2.minAreaRect(cnt)
            box = cv2.boxPoints(rect)

            # Let's take the center pixel value as the confidence value
            box_center_index = np.int0(np.mean(box, axis=0))

            for class_index in range(len(classes)):
                box_center_value = class_probability[class_index + 1,
                                                     box_center_index[1],
                                                     box_center_index[0]]

                # Let's remove candidates with very low probability
                if box_center_value < 0.01:
                    continue

                box_center_class = classes[class_index]

                box_detection_score = box_center_value
                sample_detection_classes.append(box_center_class)
                sample_detection_scores.append(box_detection_score)
                sample_boxes.append(box)

        detection_boxes.append(np.array(sample_boxes))
        detection_scores.append(sample_detection_scores)
        detection_classes.append(sample_detection_classes)

    pred_box3ds = []
    height_dict = cfg.DATA.AVG_CAT_HEIGHT

    # This could use some refactoring..
    for (sample_token, sample_boxes, sample_detection_scores,
         sample_detection_class) in tqdm(zip(sample_tokens, detection_boxes,
                                             detection_scores,
                                             detection_classes),
                                         total=len(sample_tokens)):
        sample_boxes = sample_boxes.reshape(-1, 2)  # (N, 4, 2) -> (N*4, 2)
        sample_boxes = sample_boxes.transpose(1, 0)  # (N*4, 2) -> (2, N*4)

        # Add Z dimension
        sample_boxes = np.vstack((
            sample_boxes,
            np.zeros(sample_boxes.shape[1]),
        ))  # (2, N*4) -> (3, N*4)

        sample = level5data.get("sample", sample_token)
        sample_lidar_token = sample["data"]["LIDAR_TOP"]
        lidar_data = level5data.get("sample_data", sample_lidar_token)
        lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)
        ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
        ego_translation = np.array(ego_pose['translation'])

        global_from_car = transform_matrix(ego_pose['translation'],
                                           Quaternion(ego_pose['rotation']),
                                           inverse=False)

        car_from_voxel = np.linalg.inv(
            create_transformation_matrix_to_voxel_space(
                bev_shape, voxel_size, (0, 0, z_offset)))

        global_from_voxel = np.dot(global_from_car, car_from_voxel)
        sample_boxes = transform_points(sample_boxes, global_from_voxel)

        # We don't know at where the boxes are in the scene on the z-axis (up-down), let's assume all of them are at
        # the same height as the ego vehicle.
        sample_boxes[2, :] = ego_pose["translation"][2]

        # (3, N*4) -> (N, 4, 3)
        sample_boxes = sample_boxes.transpose(1, 0).reshape(-1, 4, 3)

        box_height = [height_dict[name] for name in sample_detection_class]

        # Note: Each of these boxes describes the ground corners of a 3D box.
        # To get the center of the box in 3D, we'll have to add half the height to it.
        sample_boxes_centers = sample_boxes.mean(axis=1)
        sample_boxes_centers[:, 2] += box_height / 2

        # Width and height is arbitrary - we don't know what way the vehicles are pointing from our prediction segmentation
        # It doesn't matter for evaluation, so no need to worry about that here.
        # Note: We scaled our targets to be 0.8 the actual size, we need to adjust for that
        sample_lengths = np.linalg.norm(
            sample_boxes[:, 0, :] - sample_boxes[:, 1, :],
            axis=1) * 1 / box_scale
        sample_widths = np.linalg.norm(
            sample_boxes[:, 1, :] - sample_boxes[:, 2, :],
            axis=1) * 1 / box_scale

        sample_boxes_dimensions = np.zeros_like(sample_boxes_centers)
        sample_boxes_dimensions[:, 0] = sample_widths
        sample_boxes_dimensions[:, 1] = sample_lengths
        sample_boxes_dimensions[:, 2] = box_height

        for i in range(len(sample_boxes)):
            translation = sample_boxes_centers[i]
            size = sample_boxes_dimensions[i]
            class_name = sample_detection_class[i]
            ego_distance = float(np.linalg.norm(ego_translation - translation))

            # Determine the rotation of the box
            v = (sample_boxes[i, 0] - sample_boxes[i, 1])
            v /= np.linalg.norm(v)
            r = R.from_dcm([
                [v[0], -v[1], 0],
                [v[1], v[0], 0],
                [0, 0, 1],
            ])
            quat = r.as_quat()
            # XYZW -> WXYZ order of elements
            quat = quat[[3, 0, 1, 2]]

            detection_score = float(sample_detection_scores[i])

            box3d = Box3D(sample_token=sample_token,
                          translation=list(translation),
                          size=list(size),
                          rotation=list(quat),
                          name=class_name,
                          score=detection_score)
            pred_box3ds.append(box3d)

    return pred_box3ds
Exemple #15
0
    def __getitem__(self,ind):
        
        """
        This function takes an index into the list of tokens and creates the pillar tensor, 
        scatter tensor and training targets for the model. 

        self.num_sweeps lidar point clouds are aggerated and transformed to the current 
        reference coordinates. The resulting point cloud is then used to create the pillar
        and scatter tensors (pillars, indices).
        """

        token = self.tokens[ind]
        # get the reference pose for the sample at token and create transformation matrix from the global
        # coordinate system to the reference car's coordinate system
        ref_pose   = self.data_dict[token]['ego_pose']
        ref_car_from_global = transform_matrix(ref_pose['translation'],Quaternion(ref_pose['rotation']),
                                               inverse=True)
        agg_pc = np.zeros((4,0))
        curr_token = token

        # aggregate num_sweeps previous lidars and move them to the reference car's coordinate system 
        for _ in range(self.num_sweeps):
            
            if not self.data_dict[curr_token]['lidar_fp']:
                curr_token = self.data_dict[curr_token]['prev_token']
                continue 
            # get the current point cloud for curr_token
            lidar_fp = pathlib.Path(self.data_dict[curr_token]['lidar_fp'])
            curr_pc  = LidarPointCloud.from_file(lidar_fp)
            curr_pose = self.data_dict[curr_token]['ego_pose']
            curr_sensor = self.data_dict[curr_token]['cal_sensor']

            # create transformation matrices from current sensor to current car and from
            # current car to global
            global_from_curr_car = transform_matrix(curr_pose['translation'],Quaternion(curr_pose['rotation']),
                                                     inverse=False)
            curr_car_from_curr_sensor = transform_matrix(curr_sensor['translation'],
                                                        Quaternion(curr_sensor['rotation']),
                                                        inverse=False)
            # combine transformation matrices into a single matrix that moves the point cloud 
            # from current sensor -> current car -> global -> reference car
            transmat = reduce(np.dot,[ref_car_from_global,global_from_curr_car,curr_car_from_curr_sensor])
            curr_pc.transform(transmat)
            curr_pc.remove_close(self.min_dist)
            # aggreate the resulting pointcloud 
            agg_pc = np.hstack((agg_pc,curr_pc.points))
            curr_token = self.data_dict[curr_token]['prev_token']
            if not curr_token:
                break

        # create pillar tensor and scatter tensor
        lidar_points = agg_pc.transpose([1,0])
        pillar = np.zeros((cfg.DATA.MAX_PILLARS,cfg.DATA.MAX_POINTS_PER_PILLAR,9))
        indices = np.zeros((cfg.DATA.MAX_PILLARS,3))
        
        pillars.create_pillars(lidar_points,pillar,
                            indices,cfg.DATA.MAX_POINTS_PER_PILLAR,
                            cfg.DATA.MAX_PILLARS,cfg.DATA.X_STEP,cfg.DATA.Y_STEP,
                            cfg.DATA.X_MIN,cfg.DATA.Y_MIN,cfg.DATA.Z_MIN,
                            cfg.DATA.X_MAX,cfg.DATA.Y_MAX,cfg.DATA.Z_MAX,
                            cfg.DATA.CANVAS_HEIGHT)

        pillar = pillar.transpose([2,0,1])
        pillar_size = pillar.shape
        pillar = torch.from_numpy(pillar).float()
        if self.data_mean is not None:
            # subtract the data mean from the pillar tensor
            pillar = pillar.reshape(-1) - self.data_mean
            pillar = pillar.reshape(pillar_size)
        indices = torch.from_numpy(indices).long()
        if self.training:
            # get the ground truth boxes of the token 
            box_fp = self.data_dict[token]['boxes']
            boxes = pickle.load(open(box_fp,'rb'))
            gt_centers,gt_corners = boxes_to_image_space(boxes)

            # create training labels
            c_target,r_target = create_target(self.anchor_corners,gt_corners,
                                    self.anchor_centers,gt_centers,
                                    self.anchor_boxes,boxes)
            c_target = torch.from_numpy(c_target).float()
            r_target = torch.from_numpy(r_target).float()
            gc.collect()
            return (pillar,indices,c_target,r_target)
        
        return (pillar,indices)
    def process_token_to_kitti(self, sample_token: str) -> None:
        rotate = False
        # Get sample data.
        sample = self.lyft_ds.get("sample", sample_token)
        sample_annotation_tokens = sample["anns"]

        norm = np.array([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
        norm_4 = np.eye(4)
        norm_4[:3, :3] = norm

        lidar_token = sample["data"][self.lidar_name]
        sd_record_lid = self.lyft_ds.get("sample_data", lidar_token)
        cs_record_lid = self.lyft_ds.get(
            "calibrated_sensor", sd_record_lid["calibrated_sensor_token"])
        ego_record_lid = self.lyft_ds.get("ego_pose",
                                          sd_record_lid["ego_pose_token"])

        for idx, cam_name in enumerate(self.cams_to_see):
            cam_front_token = sample["data"][cam_name]
            if self.get_all_detections:
                token_to_write = sample_token
            else:
                token_to_write = cam_front_token
            sample_name = "%06d" % self.tokens.index(token_to_write)

            # Retrieve sensor records.
            sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token)
            cs_record_cam = self.lyft_ds.get(
                "calibrated_sensor", sd_record_cam["calibrated_sensor_token"])
            ego_record_cam = self.lyft_ds.get("ego_pose",
                                              sd_record_cam["ego_pose_token"])
            cam_height = sd_record_cam["height"]
            cam_width = sd_record_cam["width"]
            imsize = (cam_width, cam_height)

            # Combine transformations and convert to KITTI format.
            # Note: cam uses same conventions in KITTI and nuScenes.
            lid_to_ego = transform_matrix(cs_record_lid["translation"],
                                          Quaternion(
                                              cs_record_lid["rotation"]),
                                          inverse=False)
            lid_ego_to_world = transform_matrix(
                ego_record_lid["translation"],
                Quaternion(ego_record_lid["rotation"]),
                inverse=False)
            world_to_cam_ego = transform_matrix(
                ego_record_cam["translation"],
                Quaternion(ego_record_cam["rotation"]),
                inverse=True)
            ego_to_cam = transform_matrix(cs_record_cam["translation"],
                                          Quaternion(
                                              cs_record_cam["rotation"]),
                                          inverse=True)
            velo_to_cam = np.dot(
                ego_to_cam,
                np.dot(world_to_cam_ego, np.dot(lid_ego_to_world, lid_to_ego)))

            # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
            velo_to_cam_kitti = np.dot(
                velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix)

            # Currently not used.
            imu_to_velo_kitti = np.zeros((3, 4))  # Dummy values.
            r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

            # Projection matrix.
            p_left_kitti = np.zeros((3, 4))
            # Cameras are always rectified.
            p_left_kitti[:3, :3] = cs_record_cam["camera_intrinsic"]

            # Create KITTI style transforms.
            velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
            velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

            # Check that the rotation has the same format as in KITTI.
            if self.lyft_ds.get(
                    "sensor",
                    cs_record_cam["sensor_token"])["channel"] == "CAM_FRONT":
                expected_kitti_velo_to_cam_rot = np.array([[0, -1, 0],
                                                           [0, 0, -1],
                                                           [1, 0, 0]])
                assert (
                    velo_to_cam_rot.round(0) == expected_kitti_velo_to_cam_rot
                ).all(), velo_to_cam_rot.round(0)

            # Retrieve the token from the lidar.
            # Note that this may be confusing as the filename of the camera will
            # include the timestamp of the lidar,
            # not the camera.
            filename_cam_full = sd_record_cam["filename"]
            filename_lid_full = sd_record_lid["filename"]

            # Convert image (jpg to png).
            src_im_path = self.lyft_ds.data_path.joinpath(filename_cam_full)
            dst_im_path = self.image_folder.joinpath(f"{sample_name}.png")
            if not dst_im_path.exists():
                im = Image.open(src_im_path)
                im.save(dst_im_path, "PNG")

            # Convert lidar.
            # Note that we are only using a single sweep, instead of the commonly used n sweeps.
            src_lid_path = self.lyft_ds.data_path.joinpath(filename_lid_full)
            dst_lid_path = self.lidar_folder.joinpath(f"{sample_name}.bin")

            pcl = LidarPointCloud.from_file(Path(src_lid_path))
            # In KITTI lidar frame.
            pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix)
            if rotate:
                pcl = np.dot(norm_4,
                             pcl.points).T.astype(np.float32).reshape(-1)
            with open(dst_lid_path, "w") as lid_file:
                pcl.points.T.tofile(lid_file)

            # Add to tokens.
            # tokens.append(token_to_write)

            # Create calibration file.
            kitti_transforms = dict()
            kitti_transforms["P0"] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms["P1"] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms["P2"] = p_left_kitti  # Left camera transform.
            kitti_transforms["P3"] = np.zeros((3, 4))  # Dummy values.
            # Cameras are already rectified.
            kitti_transforms["R0_rect"] = r0_rect.rotation_matrix
            if rotate:
                velo_to_cam_rot = np.dot(velo_to_cam_rot, norm.T)
            kitti_transforms["Tr_velo_to_cam"] = np.hstack(
                (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1)))
            kitti_transforms["Tr_imu_to_velo"] = imu_to_velo_kitti
            calib_path = self.calib_folder.joinpath(f"{sample_name}.txt")

            with open(calib_path, "w") as calib_file:
                for (key, val) in kitti_transforms.items():
                    val = val.flatten()
                    val_str = "%.12e" % val[0]
                    for v in val[1:]:
                        val_str += " %.12e" % v
                    calib_file.write("%s: %s\n" % (key, val_str))

            # Write label file.
            label_path = self.label_folder.joinpath(f"{sample_name}.txt")
            if label_path.exists():
                # print("Skipping existing file: %s" % label_path)
                continue

            objects = []
            for sample_annotation_token in sample_annotation_tokens:
                sample_annotation = self.lyft_ds.get("sample_annotation",
                                                     sample_annotation_token)

                # Get box in LIDAR frame.
                _, box_lidar_nusc, _ = self.lyft_ds.get_sample_data(
                    lidar_token,
                    box_vis_level=BoxVisibility.NONE,
                    selected_anntokens=[sample_annotation_token])
                box_lidar_nusc = box_lidar_nusc[0]

                # Truncated: Set all objects to 0 which means untruncated.
                truncated = 0.0

                # Occluded: Set all objects to full visibility as this information is
                # not available in nuScenes.
                occluded = 0

                obj = dict()

                obj["detection_name"] = sample_annotation["category_name"]
                if obj["detection_name"] is None or obj[
                        "detection_name"] not in CLASS_MAP.keys():
                    continue

                obj["detection_name"] = CLASS_MAP[obj["detection_name"]]

                # Convert from nuScenes to KITTI box format.
                obj["box_cam_kitti"] = KittiDB.box_nuscenes_to_kitti(
                    box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                    velo_to_cam_trans, r0_rect)

                # Project 3d box to 2d box in image, ignore box if it does not fall inside.
                bbox_2d = project_to_2d(obj["box_cam_kitti"], p_left_kitti,
                                        cam_height, cam_width)
                if bbox_2d is None:
                    continue
                obj["bbox_2d"] = bbox_2d["bbox"]
                obj["truncated"] = bbox_2d["truncated"]

                # Set dummy score so we can use this file as result.
                obj["box_cam_kitti"].score = 0

                v = np.dot(obj["box_cam_kitti"].rotation_matrix,
                           np.array([1, 0, 0]))
                rot_y = -np.arctan2(v[2], v[0])
                obj["alpha"] = -np.arctan2(
                    obj["box_cam_kitti"].center[0],
                    obj["box_cam_kitti"].center[2]) + rot_y
                obj["depth"] = np.linalg.norm(
                    np.array(obj["box_cam_kitti"].center[:3]))
                objects.append(obj)

            objects = postprocessing(objects, cam_height, cam_width)

            with open(label_path, "w") as label_file:
                for obj in objects:
                    # Convert box to output string format.
                    output = box_to_string(name=obj["detection_name"],
                                           box=obj["box_cam_kitti"],
                                           bbox_2d=obj["bbox_2d"],
                                           truncation=obj["truncated"],
                                           occlusion=obj["occluded"],
                                           alpha=obj["alpha"])
                    label_file.write(output + '\n')
            with open(self.split_file, "a") as f:
                f.write(sample_name + "\n")
Exemple #17
0
def main():
    # get data
    level5data = LyftDataset(data_path = '.', json_path='../../input/train_data', verbose=True)
    classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"]

    df = pd.read_csv('host_scenes.csv')
    host_count_df = df.groupby("host")['scene_token'].count()
    print(host_count_df)

    sample_token = df.first_sample_token.values[0]
    sample = level5data.get("sample", sample_token)
    sample_lidar_token = sample["data"]["LIDAR_TOP"]
    lidar_data = level5data.get("sample_data", sample_lidar_token)
    lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)

    # car and sensor coords
    ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
    print("ego_pose: ", ego_pose)
    calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
    
    # Homogeneous transformation matrix from car frame to world frame.
    global_from_car = transform_matrix(ego_pose['translation'],
                                    Quaternion(ego_pose['rotation']), inverse=False)

    # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
    car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']),
                                        inverse=False)
    lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)

    # The lidar pointcloud is defined in the sensor's reference frame.
    # We want it in the car's reference frame, so we transform each point
    lidar_pointcloud.transform(car_from_sensor)

    # Define hyper parameters 
    #bev_shape = (768, 768, 3)
    #voxel_size = (0.2,0.2,1.5)
    bev_shape = (1024, 1024, 3)
    voxel_size = (0.2,0.2,1.5)

    z_offset = -2.0
    box_scale = 0.8

    #map_mask = level5data.map[0]["mask"]
    #print(map_mask)
    #plot_semantic_map(map_mask, ego_pose)
    #sample_token = train_df.first_sample_token.values[10]
    #bev = plot_bev_and_boxes(sample_token, level5data)

    train_data_folder = os.path.join(OUTPUT_ROOT, "bev_data_1024")
    validation_data_folder = os.path.join(OUTPUT_ROOT, "bev_data_1024")
    os.makedirs(train_data_folder, exist_ok=True)
    os.makedirs(validation_data_folder, exist_ok=True)

    # test on a single scene
    prepare_training_data_for_scene(df.first_sample_token.values[50], 
                                    train_data_folder, 
                                    level5data, 
                                    classes,
                                    bev_shape, voxel_size, z_offset, box_scale)
    test_prepare_training_data(df.first_sample_token.values[50], train_data_folder)

    # get for all scenes
    first_samples = df.first_sample_token.values
    print(first_samples)

    for sample in first_samples:
        print(sample)
        prepare_training_data_for_scene(sample, 
                                    train_data_folder, 
                                    level5data, 
                                    classes,
                                    bev_shape, voxel_size, z_offset, box_scale)
    # multiprocessing option
    #for df, data_folder in [(train_df, train_data_folder), (validation_df, validation_data_folder)]:
    #    prepare_data_pool(df, data_folder, bev_shape, voxel_size, z_offset, box_scale)
    print('Mission accomplished!')
Exemple #18
0
def fill_trainval_infos(data_path,
                        lyft,
                        train_scenes,
                        val_scenes,
                        test=False,
                        max_sweeps=10):
    train_lyft_infos = []
    val_lyft_infos = []
    progress_bar = tqdm.tqdm(total=len(lyft.sample),
                             desc='create_info',
                             dynamic_ncols=True)

    # ref_chans = ["LIDAR_TOP", "LIDAR_FRONT_LEFT", "LIDAR_FRONT_RIGHT"]
    ref_chan = "LIDAR_TOP"

    for index, sample in enumerate(lyft.sample):
        progress_bar.update()

        ref_info = {}
        ref_sd_token = sample["data"][ref_chan]
        ref_sd_rec = lyft.get("sample_data", ref_sd_token)
        ref_cs_token = ref_sd_rec["calibrated_sensor_token"]
        ref_cs_rec = lyft.get("calibrated_sensor", ref_cs_token)

        ref_to_car = transform_matrix(
            ref_cs_rec["translation"],
            Quaternion(ref_cs_rec["rotation"]),
            inverse=False,
        )

        ref_from_car = transform_matrix(
            ref_cs_rec["translation"],
            Quaternion(ref_cs_rec["rotation"]),
            inverse=True,
        )

        ref_lidar_path = lyft.get_sample_data_path(ref_sd_token)

        ref_boxes, ref_pose_rec = get_sample_data(lyft, ref_sd_token)
        ref_time = 1e-6 * ref_sd_rec["timestamp"]
        car_from_global = transform_matrix(
            ref_pose_rec["translation"],
            Quaternion(ref_pose_rec["rotation"]),
            inverse=True,
        )

        car_to_global = transform_matrix(
            ref_pose_rec["translation"],
            Quaternion(ref_pose_rec["rotation"]),
            inverse=False,
        )

        info = {
            "lidar_path":
            Path(ref_lidar_path).relative_to(data_path).__str__(),
            "ref_from_car": ref_from_car,
            "ref_to_car": ref_to_car,
            'token': sample['token'],
            'car_from_global': car_from_global,
            'car_to_global': car_to_global,
            'timestamp': ref_time,
            'sweeps': []
        }

        sample_data_token = sample['data'][ref_chan]
        curr_sd_rec = lyft.get('sample_data', sample_data_token)
        sweeps = []

        while len(sweeps) < max_sweeps - 1:
            if curr_sd_rec['prev'] == '':
                if len(sweeps) == 0:
                    sweep = {
                        'lidar_path':
                        Path(ref_lidar_path).relative_to(data_path).__str__(),
                        'sample_data_token':
                        curr_sd_rec['token'],
                        'transform_matrix':
                        None,
                        'time_lag':
                        curr_sd_rec['timestamp'] * 0,
                    }
                    sweeps.append(sweep)
                else:
                    sweeps.append(sweeps[-1])
            else:
                curr_sd_rec = lyft.get('sample_data', curr_sd_rec['prev'])

                # Get past pose
                current_pose_rec = lyft.get('ego_pose',
                                            curr_sd_rec['ego_pose_token'])
                global_from_car = transform_matrix(
                    current_pose_rec['translation'],
                    Quaternion(current_pose_rec['rotation']),
                    inverse=False,
                )

                # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
                current_cs_rec = lyft.get(
                    'calibrated_sensor',
                    curr_sd_rec['calibrated_sensor_token'])
                car_from_current = transform_matrix(
                    current_cs_rec['translation'],
                    Quaternion(current_cs_rec['rotation']),
                    inverse=False,
                )

                tm = reduce(np.dot, [
                    ref_from_car, car_from_global, global_from_car,
                    car_from_current
                ])

                lidar_path = lyft.get_sample_data_path(curr_sd_rec['token'])

                time_lag = ref_time - 1e-6 * curr_sd_rec['timestamp']

                sweep = {
                    'lidar_path':
                    Path(lidar_path).relative_to(data_path).__str__(),
                    'sample_data_token': curr_sd_rec['token'],
                    'transform_matrix': tm,
                    'global_from_car': global_from_car,
                    'car_from_current': car_from_current,
                    'time_lag': time_lag,
                }
                sweeps.append(sweep)

        info['sweeps'] = sweeps

        if not test:
            annotations = [
                lyft.get("sample_annotation", token)
                for token in sample["anns"]
            ]

            locs = np.array([b.center for b in ref_boxes]).reshape(-1, 3)
            dims = np.array([b.wlh for b in ref_boxes]).reshape(-1,
                                                                3)[:,
                                                                   [1, 0, 2]]
            rots = np.array([quaternion_yaw(b.orientation)
                             for b in ref_boxes]).reshape(-1, 1)
            velocity = np.array([b.velocity for b in ref_boxes]).reshape(-1, 3)
            names = np.array([b.name for b in ref_boxes])
            tokens = np.array([b.token for b in ref_boxes]).reshape(-1, 1)
            gt_boxes = np.concatenate([locs, dims, rots], axis=1)

            assert len(annotations) == len(gt_boxes)

            info["gt_boxes"] = gt_boxes
            info["gt_boxes_velocity"] = velocity
            info["gt_names"] = names
            info["gt_boxes_token"] = tokens

        if sample["scene_token"] in train_scenes:
            train_lyft_infos.append(info)
        else:
            val_lyft_infos.append(info)

    progress_bar.close()
    return train_lyft_infos, val_lyft_infos
Exemple #19
0
def create_submit():    
    pred_box3ds = []

    # This could use some refactoring..
    for (sample_token, sample_boxes, sample_detection_scores, sample_detection_class) in tqdm_notebook(
            zip(sample_tokens, detection_boxes, detection_scores, detection_classes), total=len(sample_tokens)):
        sample_boxes = sample_boxes.reshape(-1, 2)  # (N, 4, 2) -> (N*4, 2)
        sample_boxes = sample_boxes.transpose(1, 0)  # (N*4, 2) -> (2, N*4)

        # Add Z dimension
        sample_boxes = np.vstack((sample_boxes, np.zeros(sample_boxes.shape[1]),))  # (2, N*4) -> (3, N*4)

        sample = level5data.get("sample", sample_token)
        sample_lidar_token = sample["data"]["LIDAR_TOP"]
        lidar_data = level5data.get("sample_data", sample_lidar_token)
        lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)
        ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
        ego_translation = np.array(ego_pose['translation'])

        global_from_car = transform_matrix(ego_pose['translation'],
                                           Quaternion(ego_pose['rotation']), inverse=False)

        car_from_voxel = np.linalg.inv(create_transformation_matrix_to_voxel_space(bev_shape, voxel_size, (0, 0, z_offset)))
        global_from_voxel = np.dot(global_from_car, car_from_voxel)
        sample_boxes = transform_points(sample_boxes, global_from_voxel)

        # We don't know at where the boxes are in the scene on the z-axis (up-down), let's assume all of them are at
        # the same height as the ego vehicle.
        sample_boxes[2, :] = ego_pose["translation"][2]

        # (3, N*4) -> (N, 4, 3)
        sample_boxes = sample_boxes.transpose(1, 0).reshape(-1, 4, 3)

        # empirical heights
        box_height = np.array([class_heights[cls] for cls in sample_detection_class])

        # To get the center of the box in 3D, we'll have to add half the height to it.
        sample_boxes_centers = sample_boxes.mean(axis=1)
        sample_boxes_centers[:, 2] += box_height / 2

        # Width and height is arbitrary - we don't know what way the vehicles are pointing from our prediction segmentation
        # It doesn't matter for evaluation, so no need to worry about that here.
        # Note: We scaled our targets to be 0.8 the actual size, we need to adjust for that
        sample_lengths = np.linalg.norm(sample_boxes[:, 0, :] - sample_boxes[:, 1, :], axis=1) * 1 / box_scale
        sample_widths = np.linalg.norm(sample_boxes[:, 1, :] - sample_boxes[:, 2, :], axis=1) * 1 / box_scale

        sample_boxes_dimensions = np.zeros_like(sample_boxes_centers)
        sample_boxes_dimensions[:, 0] = sample_widths
        sample_boxes_dimensions[:, 1] = sample_lengths
        sample_boxes_dimensions[:, 2] = box_height

        for i in range(len(sample_boxes)):
            translation = sample_boxes_centers[i]
            size = sample_boxes_dimensions[i]
            class_name = sample_detection_class[i]
            ego_distance = float(np.linalg.norm(ego_translation - translation))

            # Determine the rotation of the box
            v = (sample_boxes[i, 0] - sample_boxes[i, 1])
            v /= np.linalg.norm(v)
            r = R.from_dcm([
                [v[0], -v[1], 0],
                [v[1], v[0], 0],
                [0, 0, 1],
            ])
            quat = r.as_quat()
            # XYZW -> WXYZ order of elements
            quat = quat[[3, 0, 1, 2]]

            detection_score = float(sample_detection_scores[i])

            box3d = Box3D(
                sample_token=sample_token,
                translation=list(translation),
                size=list(size),
                rotation=list(quat),
                name=class_name,
                score=detection_score
            )
            pred_box3ds.append(box3d)
    sub = {}
    for i in tqdm_notebook(range(len(pred_box3ds))):
        #     yaw = -np.arctan2(pred_box3ds[i].rotation[2], pred_box3ds[i].rotation[0])
        yaw = 2 * np.arccos(pred_box3ds[i].rotation[0]);
        pred = str(pred_box3ds[i].score / 255) + ' ' + str(pred_box3ds[i].center_x) + ' ' + \
            str(pred_box3ds[i].center_y) + ' ' + str(pred_box3ds[i].center_z) + ' ' + \
            str(pred_box3ds[i].width) + ' ' \
            + str(pred_box3ds[i].length) + ' ' + str(pred_box3ds[i].height) + ' ' + str(yaw) + ' ' \
            + str(pred_box3ds[i].name) + ' '

        if pred_box3ds[i].sample_token in sub.keys():
            sub[pred_box3ds[i].sample_token] += pred
        else:
            sub[pred_box3ds[i].sample_token] = pred

    sample_sub = pd.read_csv(base_path + 'sample_submission.csv')
    for token in set(sample_sub.Id.values).difference(sub.keys()):
        print(token)
        sub[token] = ''

    sub = pd.DataFrame(list(sub.items()))
    sub.columns = sample_sub.columns
    sub.head()
    sub.tail()
    sub.to_csv(f'{submit_name}.csv', index=False)
    def process_token_to_groundtruth(self, sample_tokens, sample_n_in_seqs):
        ### output groundtruth pose to the scence folder

        started = False

        # loop through all the samples
        for sample_token, sample_n in zip(sample_tokens, sample_n_in_seqs):
            # Get sample data.
            sample = self.lyft_ds.get("sample", sample_token)

            # Get lidar pose data
            lidar_token = sample["data"][self.lidar_name]
            sd_record_lid = self.lyft_ds.get("sample_data", lidar_token)
            cs_record_lid = self.lyft_ds.get("calibrated_sensor", sd_record_lid["calibrated_sensor_token"])
            ego_record_lid = self.lyft_ds.get("ego_pose", sd_record_lid["ego_pose_token"])
            # cam_front_token = sample["data"]["CAM_FRONT"]
            # sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token)
            # cs_record_cam = self.lyft_ds.get("calibrated_sensor", sd_record_cam["calibrated_sensor_token"])
            # ego_record_cam = self.lyft_ds.get("ego_pose", sd_record_cam["ego_pose_token"])

            lid_to_ego = transform_matrix(
                cs_record_lid["translation"], Quaternion(cs_record_lid["rotation"]), inverse=False
            )
            lid_ego_to_world = transform_matrix(
                ego_record_lid["translation"], Quaternion(ego_record_lid["rotation"]), inverse=False
            )            
            # world_to_cam_ego = transform_matrix(
            #     ego_record_cam["translation"], Quaternion(ego_record_cam["rotation"]), inverse=True
            # )
            # ego_to_cam = transform_matrix(
            #     cs_record_cam["translation"], Quaternion(cs_record_cam["rotation"]), inverse=True
            # )
            # velo_to_cam = np.dot(ego_to_cam, np.dot(world_to_cam_ego, np.dot(lid_ego_to_world, lid_to_ego)))

            # # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
            # velo_to_cam_kitti = np.dot(velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix)


            velo_pose = np.dot(lid_ego_to_world, lid_to_ego)
            velo_pose_kitti = np.dot(velo_pose, self.kitti_to_nu_lidar.transformation_matrix)
            
            # If this is the start of the new sequence, update scence name, and update the first pose
            if sample_n == 0:             
                ### get scene information
                scene = self.lyft_ds.get("scene", sample["scene_token"] )
                scene_name = scene["name"]
                scene_folder = self.store_dir.joinpath(scene_name)
                if not scene_folder.is_dir():
                    scene_folder.mkdir(parents=True)
                if started:
                    gt_file.close() 
                gt_path = scene_folder.joinpath("groundtruth.txt")
                gt_file = open(gt_path, "w")
                car_pose = np.identity(4)
                first_pose = velo_pose
                started = True
            else:
                car_pose = np.dot(np.linalg.inv(first_pose), velo_pose)

            # Write to disk.
            gt_file.write(" ".join(str(value) for pose_row in car_pose[0:3, :] for value in pose_row))
            gt_file.write('\n')
Exemple #21
0
    # Add Z dimension
    sample_boxes = np.vstack((
        sample_boxes,
        np.zeros(sample_boxes.shape[1]),
    ))  # (2, N*4) -> (3, N*4)

    sample = level5data.get("sample", sample_token)
    sample_lidar_token = sample["data"]["LIDAR_TOP"]
    lidar_data = level5data.get("sample_data", sample_lidar_token)
    lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)
    ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
    ego_translation = np.array(ego_pose['translation'])

    global_from_car = transform_matrix(ego_pose['translation'],
                                       Quaternion(ego_pose['rotation']),
                                       inverse=False)

    car_from_voxel = np.linalg.inv(
        create_transformation_matrix_to_voxel_space(bev_shape, voxel_size,
                                                    (0, 0, z_offset)))

    global_from_voxel = np.dot(global_from_car, car_from_voxel)
    sample_boxes = transform_points(sample_boxes, global_from_voxel)

    # We don't know at where the boxes are in the scene on the z-axis (up-down), let's assume all of them are at
    # the same height as the ego vehicle.
    sample_boxes[2, :] = ego_pose["translation"][2]

    # (3, N*4) -> (N, 4, 3)
    sample_boxes = sample_boxes.transpose(1, 0).reshape(-1, 4, 3)
     data_dict[token] = {
         'lidar_fp': None,
         'ego_pose': None,
         'cal_sensor': None,
         'boxes': None,
         'prev_token': prev_token
     }
     token = sample['next']
     continue
 # get the lidar, ego and sensor objects for the token
 lidar_data = l5d.get('sample_data', lidar_token)
 ego_pose = l5d.get('ego_pose', lidar_data['ego_pose_token'])
 cal_sensor = l5d.get('calibrated_sensor',
                      lidar_data['calibrated_sensor_token'])
 car_from_sensor = transform_matrix(cal_sensor['translation'],
                                    Quaternion(
                                        cal_sensor['rotation']),
                                    inverse=False)
 lidar_pointcloud.transform(car_from_sensor)
 lidar_points = lidar_pointcloud.points[:3, :]
 boxes = l5d.get_boxes(lidar_token)
 # collect the ground truth boxes
 canv_boxes = move_boxes_to_canvas_space(boxes, ego_pose,
                                         lidar_points)
 boxes_fp = osp.join(box_dir, token + '_boxes.pkl')
 pickle.dump(canv_boxes, open(boxes_fp, 'wb'))
 prev_token = sample['prev']
 data_dict[token] = {
     'lidar_fp': str(lidar_filepath),
     'ego_pose': ego_pose,
     'cal_sensor': cal_sensor,
     'boxes': boxes_fp,
Exemple #23
0
    def from_file_multisweep(
            cls,
            lyftd,
            sample_rec: Dict,
            chan: str,
            ref_chan: str,
            num_sweeps: int = 26,
            min_distance: float = 1.0) -> Tuple["PointCloud", np.ndarray]:
        """Return a point cloud that aggregates multiple sweeps.
        As every sweep is in a different coordinate frame, we need to map the coordinates to a single reference frame.
        As every sweep has a different timestamp, we need to account for that in the transformations and timestamps.

        Args:
            lyftd: A LyftDataset instance.
            sample_rec: The current sample.
            chan: The radar channel from which we track back n sweeps to aggregate the point cloud.
            ref_chan: The reference channel of the current sample_rec that the point clouds are mapped to.
            num_sweeps: Number of sweeps to aggregated.
            min_distance: Distance below which points are discarded.

        Returns: (all_pc, all_times). The aggregated point cloud and timestamps.

        """

        # Init
        points = np.zeros((cls.nbr_dims(), 0))
        all_pc = cls(points)
        all_times = np.zeros((1, 0))

        # Get reference pose and timestamp
        ref_sd_token = sample_rec["data"][ref_chan]
        ref_sd_rec = lyftd.get("sample_data", ref_sd_token)
        ref_pose_rec = lyftd.get("ego_pose", ref_sd_rec["ego_pose_token"])
        ref_cs_rec = lyftd.get("calibrated_sensor",
                               ref_sd_rec["calibrated_sensor_token"])
        ref_time = 1e-6 * ref_sd_rec["timestamp"]

        # Homogeneous transform from ego car frame to reference frame
        ref_from_car = transform_matrix(ref_cs_rec["translation"],
                                        Quaternion(ref_cs_rec["rotation"]),
                                        inverse=True)

        # Homogeneous transformation matrix from global to _current_ ego car frame
        car_from_global = transform_matrix(ref_pose_rec["translation"],
                                           Quaternion(
                                               ref_pose_rec["rotation"]),
                                           inverse=True)

        # Aggregate current and previous sweeps.
        sample_data_token = sample_rec["data"][chan]
        current_sd_rec = lyftd.get("sample_data", sample_data_token)
        for _ in range(num_sweeps):
            # Load up the pointcloud.
            current_pc = cls.from_file(lyftd.data_path /
                                       current_sd_rec["filename"])

            # Get past pose.
            current_pose_rec = lyftd.get("ego_pose",
                                         current_sd_rec["ego_pose_token"])
            global_from_car = transform_matrix(
                current_pose_rec["translation"],
                Quaternion(current_pose_rec["rotation"]),
                inverse=False)

            # Homogeneous transformation matrix from sensor coordinate frame to ego car frame.
            current_cs_rec = lyftd.get(
                "calibrated_sensor", current_sd_rec["calibrated_sensor_token"])
            car_from_current = transform_matrix(
                current_cs_rec["translation"],
                Quaternion(current_cs_rec["rotation"]),
                inverse=False)

            # Fuse four transformation matrices into one and perform transform.
            trans_matrix = reduce(np.dot, [
                ref_from_car, car_from_global, global_from_car,
                car_from_current
            ])
            current_pc.transform(trans_matrix)

            # Remove close points and add timevector.
            current_pc.remove_close(min_distance)
            time_lag = ref_time - 1e-6 * current_sd_rec[
                "timestamp"]  # positive difference
            times = time_lag * np.ones((1, current_pc.nbr_points()))
            all_times = np.hstack((all_times, times))

            # Merge with key pc.
            all_pc.points = np.hstack((all_pc.points, current_pc.points))

            # Abort if there are no previous sweeps.
            if current_sd_rec["prev"] == "":
                break
            else:
                current_sd_rec = lyftd.get("sample_data",
                                           current_sd_rec["prev"])

        return all_pc, all_times
    def prepare_training_data_for_scene(sample_token,
                                        level5data,
                                        output_folder,
                                        bev_shape=bev_shape,
                                        voxel_size=voxel_size,
                                        z_offset=z_offset,
                                        box_scale=box_scale):
        """
        Given a sample token (in a scene), output rasterized input volumes in birds-eye-view perspective.

        """
        if (os.path.exists(
                os.path.join(output_folder,
                             "{}_target.png".format(sample_token)))):
            print("file processed, skip")

        else:

            sample = level5data.get("sample", sample_token)

            sample_lidar_token = sample["data"]["LIDAR_TOP"]
            lidar_data = level5data.get("sample_data", sample_lidar_token)
            lidar_filepath = level5data.get_sample_data_path(
                sample_lidar_token)

            ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
            calibrated_sensor = level5data.get(
                "calibrated_sensor", lidar_data["calibrated_sensor_token"])

            global_from_car = transform_matrix(ego_pose['translation'],
                                               Quaternion(
                                                   ego_pose['rotation']),
                                               inverse=False)

            car_from_sensor = transform_matrix(
                calibrated_sensor['translation'],
                Quaternion(calibrated_sensor['rotation']),
                inverse=False)

            try:
                # generate input images which is the bird eye view data
                lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)
                lidar_pointcloud.transform(car_from_sensor)
                bev = create_voxel_pointcloud(lidar_pointcloud.points,
                                              bev_shape,
                                              voxel_size=voxel_size,
                                              z_offset=z_offset)
                bev = normalize_voxel_intensities(bev)

                bev_im = np.round(bev * 255).astype(np.uint8)

                cv2.imwrite(
                    os.path.join(output_folder,
                                 "{}_input.png".format(sample_token)), bev_im)

                # generate target images which is the bird eye view label data
                boxes = level5data.get_boxes(sample_lidar_token)
                target = np.zeros_like(bev)

                move_boxes_to_car_space(boxes, ego_pose)
                scale_boxes(boxes, box_scale)
                draw_boxes(target,
                           voxel_size,
                           boxes=boxes,
                           classes=classes,
                           z_offset=z_offset)
                target_im = target[:, :, 0]  # take one channel only

                cv2.imwrite(
                    os.path.join(output_folder,
                                 "{}_target.png".format(sample_token)),
                    target_im)

                # generate map mask image for better performance
                map_mask = level5data.map[0]["mask"]
                semantic_im = get_semantic_map_around_ego(
                    map_mask, ego_pose, voxel_size[0], target_im.shape)
                semantic_im = np.round(semantic_im * 255).astype(np.uint8)
                cv2.imwrite(
                    os.path.join(output_folder,
                                 "{}_map.png".format(sample_token)),
                    semantic_im)

            except Exception as e:
                print("Failed to load Lidar Pointcloud for {}: {}:".format(
                    sample_token, e))
def main():
    # get data
    #level5data = LyftDataset(data_path = '../input/', json_path='../input/train_data', verbose=True) # local laptop
    level5data = LyftDataset(data_path = '.', json_path='../../input/train_data', verbose=True) # server
    classes = ["car", "motorcycle", "bus", "bicycle", "truck", "pedestrian", "other_vehicle", "animal", "emergency_vehicle"]

    df = pd.read_csv('host_scenes.csv')
    host_count_df = df.groupby("host")['scene_token'].count()
    print(host_count_df)

    sample_token = df.first_sample_token.values[0]
    sample = level5data.get("sample", sample_token)
    sample_lidar_token = sample["data"]["LIDAR_TOP"]
    lidar_data = level5data.get("sample_data", sample_lidar_token)
    lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)

    # car and sensor coords
    ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
    print("ego_pose: ", ego_pose)
    calibrated_sensor = level5data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
    
    # Homogeneous transformation matrix from car frame to world frame.
    global_from_car = transform_matrix(ego_pose['translation'],
                                    Quaternion(ego_pose['rotation']), inverse=False)

    # Homogeneous transformation matrix from sensor coordinate frame to ego car frame
    car_from_sensor = transform_matrix(calibrated_sensor['translation'], Quaternion(calibrated_sensor['rotation']),
                                        inverse=False)
    lidar_pointcloud = LidarPointCloud.from_file(lidar_filepath)    
    lidar_pointcloud.transform(car_from_sensor)

    # Define hyper parameters 
    bev_shape = (768, 768, 3)
    voxel_size = (0.2,0.2,1.5)
    #bev_shape = (2048, 2048, 3)
    #voxel_size = (0.1,0.1,1.5)
    z_offset = -2.0
    box_scale = 0.8

    test_coco_targets_for_sample(sample_token=sample_token, output_folder=OUTPUT_ROOT, level5data=level5data, 
                                 classes=classes, bev_shape=bev_shape, voxel_size=voxel_size, 
                                 z_offset=z_offset, box_scale=box_scale)

    train_data_folder = os.path.join(OUTPUT_ROOT, "coco_768")
    os.makedirs(train_data_folder, exist_ok=True)

    """
    # test on a single scene
    prepare_coco_targets_for_scene(df.first_sample_token.values[50], 
                                   train_data_folder, 
                                   level5data, 
                                   classes,
                                   bev_shape, voxel_size, z_offset, box_scale)

    test_prepare_coco_targets(df.first_sample_token.values[50], 
                              train_data_folder, 
                              level5data, 
                              bev_shape, voxel_size, z_offset)  
    """
    # get for all scenes
    first_samples = df.first_sample_token.values
    
    for sample in first_samples:
        print(sample)
        prepare_coco_targets_for_scene(sample, 
                                       train_data_folder, 
                                       level5data, 
                                       classes,
                                       bev_shape, voxel_size, z_offset, box_scale)
    
    print('Mission accomplished!')