def render_kitti(self, render_2d: bool = False) -> None: """ Renders the annotations in the KITTI dataset from a lidar and a camera view. :param render_2d: Whether to render 2d boxes (only works for camera data). """ if render_2d: print("Rendering 2d boxes from KITTI format") else: print("Rendering 3d boxes projected from 3d KITTI format") # Load the KITTI dataset. kitti = KittiDB(root=self.store_dir) # Create output folder. render_dir = self.store_dir.joinpath("render") if not render_dir.is_dir(): render_dir.mkdir(parents=True) # Render each image. tokens = kitti.tokens # currently supports only single thread processing for token in tqdm(tokens): for sensor in ["lidar", "camera"]: out_path = render_dir.joinpath("{}_{}.png".format( token, sensor)) kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path, render_2d=render_2d) # Close the windows to avoid a warning of too many open windows. plt.close()
def render_kitti(self, render_2d: bool = False, store_dataroot: str = "~/lyft_kitti/train/"): """Renders the annotations in the KITTI dataset from a lidar and a camera view. Args: render_2d: Whether to render 2d boxes (only works for camera data). Returns: """ if render_2d: print("Rendering 2d boxes from KITTI format") else: print("Rendering 3d boxes projected from 3d KITTI format") self.store_dir = Path(store_dataroot) # Load the KITTI dataset. kitti = KittiDB(root=self.store_dir) # Create output folder. render_dir = self.store_dir.joinpath("render") if not render_dir.is_dir(): render_dir.mkdir(parents=True) # Render each image. tokens = kitti.tokens i = 0 # currently supports only single thread processing for token in tqdm(tokens): for sensor in ["lidar", "camera"]: out_path = render_dir.joinpath(f"{token}_{sensor}.png") kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path, render_2d=render_2d) # Close the windows to avoid a warning of too many open windows. plt.close() if (i > 10): break i += 1
def process_token_to_kitti(self, sample_token: str) -> None: # Get sample data. sample = self.lyft_ds.get("sample", sample_token) sample_annotation_tokens = sample["anns"] lidar_token = sample["data"][self.lidar_name] sd_record_lid = self.lyft_ds.get("sample_data", lidar_token) cs_record_lid = self.lyft_ds.get( "calibrated_sensor", sd_record_lid["calibrated_sensor_token"]) for cam_name in self.cams_to_see: cam_front_token = sample["data"][cam_name] if self.get_all_detections: token_to_write = sample_token else: token_to_write = cam_front_token # Retrieve sensor records. sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token) cs_record_cam = self.lyft_ds.get( "calibrated_sensor", sd_record_cam["calibrated_sensor_token"]) cam_height = sd_record_cam["height"] cam_width = sd_record_cam["width"] imsize = (cam_width, cam_height) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid["translation"], Quaternion( cs_record_lid["rotation"]), inverse=False) ego_to_cam = transform_matrix(cs_record_cam["translation"], Quaternion( cs_record_cam["rotation"]), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot( velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) # Cameras are always rectified. p_left_kitti[:3, :3] = cs_record_cam["camera_intrinsic"] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. if self.lyft_ds.get( "sensor", cs_record_cam["sensor_token"])["channel"] == "CAM_FRONT": expected_kitti_velo_to_cam_rot = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) assert ( velo_to_cam_rot.round(0) == expected_kitti_velo_to_cam_rot ).all(), velo_to_cam_rot.round(0) assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will # include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam["filename"] filename_lid_full = sd_record_lid["filename"] # Convert image (jpg to png). src_im_path = self.lyft_ds.data_path.joinpath(filename_cam_full) dst_im_path = self.image_folder.joinpath(f"{token_to_write}.png") if not dst_im_path.exists(): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = self.lyft_ds.data_path.joinpath(filename_lid_full) dst_lid_path = self.lidar_folder.joinpath(f"{token_to_write}.bin") pcl = LidarPointCloud.from_file(Path(src_lid_path)) # In KITTI lidar frame. pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix) with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. # tokens.append(token_to_write) # Create calibration file. kitti_transforms = dict() kitti_transforms["P0"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P1"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P2"] = p_left_kitti # Left camera transform. kitti_transforms["P3"] = np.zeros((3, 4)) # Dummy values. # Cameras are already rectified. kitti_transforms["R0_rect"] = r0_rect.rotation_matrix kitti_transforms["Tr_velo_to_cam"] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms["Tr_imu_to_velo"] = imu_to_velo_kitti calib_path = self.calib_folder.joinpath(f"{token_to_write}.txt") with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = "%.12e" % val[0] for v in val[1:]: val_str += " %.12e" % v calib_file.write("%s: %s\n" % (key, val_str)) # Write label file. label_path = self.label_folder.joinpath(f"{token_to_write}.txt") if label_path.exists(): print("Skipping existing file: %s" % label_path) continue with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.lyft_ds.get( "sample_annotation", sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.lyft_ds.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is # not available in nuScenes. occluded = 0 detection_name = sample_annotation["category_name"] # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None and not self.get_all_detections: continue elif bbox_2d is None and self.get_all_detections: # default KITTI bbox bbox_2d = (-1.0, -1.0, -1.0, -1.0) # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string( name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded, ) # Write to disk. label_file.write(output + "\n")
def process_token_to_kitti(self, sample_token: str) -> None: rotate = False # Get sample data. sample = self.lyft_ds.get("sample", sample_token) sample_annotation_tokens = sample["anns"] norm = np.array([[0, 1, 0], [-1, 0, 0], [0, 0, 1]]) norm_4 = np.eye(4) norm_4[:3, :3] = norm lidar_token = sample["data"][self.lidar_name] sd_record_lid = self.lyft_ds.get("sample_data", lidar_token) cs_record_lid = self.lyft_ds.get( "calibrated_sensor", sd_record_lid["calibrated_sensor_token"]) ego_record_lid = self.lyft_ds.get("ego_pose", sd_record_lid["ego_pose_token"]) for idx, cam_name in enumerate(self.cams_to_see): cam_front_token = sample["data"][cam_name] if self.get_all_detections: token_to_write = sample_token else: token_to_write = cam_front_token sample_name = "%06d" % self.tokens.index(token_to_write) # Retrieve sensor records. sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token) cs_record_cam = self.lyft_ds.get( "calibrated_sensor", sd_record_cam["calibrated_sensor_token"]) ego_record_cam = self.lyft_ds.get("ego_pose", sd_record_cam["ego_pose_token"]) cam_height = sd_record_cam["height"] cam_width = sd_record_cam["width"] imsize = (cam_width, cam_height) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid["translation"], Quaternion( cs_record_lid["rotation"]), inverse=False) lid_ego_to_world = transform_matrix( ego_record_lid["translation"], Quaternion(ego_record_lid["rotation"]), inverse=False) world_to_cam_ego = transform_matrix( ego_record_cam["translation"], Quaternion(ego_record_cam["rotation"]), inverse=True) ego_to_cam = transform_matrix(cs_record_cam["translation"], Quaternion( cs_record_cam["rotation"]), inverse=True) velo_to_cam = np.dot( ego_to_cam, np.dot(world_to_cam_ego, np.dot(lid_ego_to_world, lid_to_ego))) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot( velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) # Cameras are always rectified. p_left_kitti[:3, :3] = cs_record_cam["camera_intrinsic"] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. if self.lyft_ds.get( "sensor", cs_record_cam["sensor_token"])["channel"] == "CAM_FRONT": expected_kitti_velo_to_cam_rot = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) assert ( velo_to_cam_rot.round(0) == expected_kitti_velo_to_cam_rot ).all(), velo_to_cam_rot.round(0) # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will # include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam["filename"] filename_lid_full = sd_record_lid["filename"] # Convert image (jpg to png). src_im_path = self.lyft_ds.data_path.joinpath(filename_cam_full) dst_im_path = self.image_folder.joinpath(f"{sample_name}.png") if not dst_im_path.exists(): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = self.lyft_ds.data_path.joinpath(filename_lid_full) dst_lid_path = self.lidar_folder.joinpath(f"{sample_name}.bin") pcl = LidarPointCloud.from_file(Path(src_lid_path)) # In KITTI lidar frame. pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix) if rotate: pcl = np.dot(norm_4, pcl.points).T.astype(np.float32).reshape(-1) with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. # tokens.append(token_to_write) # Create calibration file. kitti_transforms = dict() kitti_transforms["P0"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P1"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P2"] = p_left_kitti # Left camera transform. kitti_transforms["P3"] = np.zeros((3, 4)) # Dummy values. # Cameras are already rectified. kitti_transforms["R0_rect"] = r0_rect.rotation_matrix if rotate: velo_to_cam_rot = np.dot(velo_to_cam_rot, norm.T) kitti_transforms["Tr_velo_to_cam"] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms["Tr_imu_to_velo"] = imu_to_velo_kitti calib_path = self.calib_folder.joinpath(f"{sample_name}.txt") with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = "%.12e" % val[0] for v in val[1:]: val_str += " %.12e" % v calib_file.write("%s: %s\n" % (key, val_str)) # Write label file. label_path = self.label_folder.joinpath(f"{sample_name}.txt") if label_path.exists(): # print("Skipping existing file: %s" % label_path) continue objects = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.lyft_ds.get("sample_annotation", sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.lyft_ds.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is # not available in nuScenes. occluded = 0 obj = dict() obj["detection_name"] = sample_annotation["category_name"] if obj["detection_name"] is None or obj[ "detection_name"] not in CLASS_MAP.keys(): continue obj["detection_name"] = CLASS_MAP[obj["detection_name"]] # Convert from nuScenes to KITTI box format. obj["box_cam_kitti"] = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = project_to_2d(obj["box_cam_kitti"], p_left_kitti, cam_height, cam_width) if bbox_2d is None: continue obj["bbox_2d"] = bbox_2d["bbox"] obj["truncated"] = bbox_2d["truncated"] # Set dummy score so we can use this file as result. obj["box_cam_kitti"].score = 0 v = np.dot(obj["box_cam_kitti"].rotation_matrix, np.array([1, 0, 0])) rot_y = -np.arctan2(v[2], v[0]) obj["alpha"] = -np.arctan2( obj["box_cam_kitti"].center[0], obj["box_cam_kitti"].center[2]) + rot_y obj["depth"] = np.linalg.norm( np.array(obj["box_cam_kitti"].center[:3])) objects.append(obj) objects = postprocessing(objects, cam_height, cam_width) with open(label_path, "w") as label_file: for obj in objects: # Convert box to output string format. output = box_to_string(name=obj["detection_name"], box=obj["box_cam_kitti"], bbox_2d=obj["bbox_2d"], truncation=obj["truncated"], occlusion=obj["occluded"], alpha=obj["alpha"]) label_file.write(output + '\n') with open(self.split_file, "a") as f: f.write(sample_name + "\n")