def select_annotation_boxes(sample_token, lyftd: LyftDataset, box_vis_level: BoxVisibility = BoxVisibility.ALL, camera_type=['CAM_FRONT', 'CAM_BACK', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK_RIGHT', 'CAM_BACK_LEFT']) -> (str, str, Box): """ Select annotations that is a camera image defined by box_vis_level :param sample_token: :param box_vis_level:BoxVisbility.ALL or BoxVisibility.ANY :param camera_type: a list of camera that the token should be selected from :return: yield (sample_token,cam_token, Box) """ sample_record = lyftd.get('sample', sample_token) cams = [key for key in sample_record["data"].keys() if "CAM" in key] cams = [cam for cam in cams if cam in camera_type] for ann_token in sample_record['anns']: for cam in cams: cam_token = sample_record["data"][cam] _, boxes_in_sensor_coord, cam_intrinsic = lyftd.get_sample_data( cam_token, box_vis_level=box_vis_level, selected_anntokens=[ann_token] ) if len(boxes_in_sensor_coord) > 0: assert len(boxes_in_sensor_coord) == 1 box_in_world_coord = lyftd.get_box(boxes_in_sensor_coord[0].token) yield sample_token, cam_token, box_in_world_coord
def select_annotation_boxes(sample_token, lyftd: LyftDataset, box_vis_level: BoxVisibility = BoxVisibility.ALL, camera_type=['CAM_FRONT', 'CAM_BACK','CAM_FRONT_LEFT','CAM_FRONT_RIGHT','CAM_BACK_RIGHT','CAM_BACK_LEFT']) -> (str, str, Box): """ Select annotations that is a camera image defined by box_vis_level :param sample_token: :param box_vis_level:BoxVisbility.ALL or BoxVisibility.ANY :param camera_type: a list of camera that the token should be selected from :return: yield (sample_token,cam_token, Box) """ sample_record = lyftd.get('sample', sample_token) cams = [key for key in sample_record["data"].keys() if "CAM" in key] cams = [cam for cam in cams if cam in camera_type] for cam in cams: # This step selects all the annotations in a camera image that matches box_vis_level cam_token = sample_record["data"][cam] image_filepath, boxes_in_sensor_coord, cam_intrinsic = lyftd.get_sample_data( cam_token, box_vis_level=box_vis_level, selected_anntokens=sample_record['anns'] ) sd_record = lyftd.get('sample_data', cam_token) img_width = sd_record['width'] # typically 1920 img_height = sd_record['height'] # typically 1080 CORNER_NUMBER = 4 corner_list = np.empty((len(boxes_in_sensor_coord), CORNER_NUMBER)) for idx, box_in_sensor_coord in enumerate(boxes_in_sensor_coord): # For perspective transformation, the normalization should set to be True box_corners_on_image = view_points(box_in_sensor_coord.corners(), view=cam_intrinsic, normalize=True) corners_2d = get_2d_corners_from_projected_box_coordinates(box_corners_on_image) corner_list[idx, :] = corners_2d yield image_filepath, cam_token, corner_list, boxes_in_sensor_coord, img_width, img_height
class KittiConverter: def __init__(self, store_dir: str = "~/lyft_kitti/train/"): """ Args: store_dir: Where to write the KITTI-style annotations. """ self.store_dir = Path(store_dir).expanduser() # Create store_dir. if not self.store_dir.is_dir(): self.store_dir.mkdir(parents=True) def nuscenes_gt_to_kitti( self, lyft_dataroot: str, table_folder: str, store_dataroot: str, lidar_name: str = "LIDAR_TOP", get_all_detections: bool = False, parallel_n_jobs: int = 8, samples_count: Optional[int] = None, ) -> None: """Converts nuScenes GT formatted annotations to KITTI format. Args: lyft_dataroot: folder with tables (json files). table_folder: folder with tables (json files). lidar_name: Name of the lidar sensor. Only one lidar allowed at this moment. get_all_detections: If True, will write all bboxes in PointCloud and use only FrontCamera. parallel_n_jobs: Number of threads to parralel processing. samples_count: Number of samples to convert. """ self.lyft_dataroot = lyft_dataroot self.table_folder = table_folder self.lidar_name = lidar_name self.get_all_detections = get_all_detections self.samples_count = samples_count self.parallel_n_jobs = parallel_n_jobs self.store_dir = Path(store_dataroot) if not self.store_dir.is_dir(): self.store_dir.mkdir(parents=True) # Select subset of the data to look at. self.lyft_ds = LyftDataset(self.lyft_dataroot, self.table_folder) self.kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi) self.kitti_to_nu_lidar_inv = self.kitti_to_nu_lidar.inverse # Get assignment of scenes to splits. split_logs = [ self.lyft_ds.get("log", scene["log_token"])["logfile"] for scene in self.lyft_ds.scene ] if self.get_all_detections: self.cams_to_see = ["CAM_FRONT"] else: self.cams_to_see = [ "CAM_FRONT", "CAM_FRONT_LEFT", "CAM_FRONT_RIGHT", "CAM_BACK", "CAM_BACK_LEFT", "CAM_BACK_RIGHT", ] # Create output folders. self.label_folder = self.store_dir.joinpath("label_2") self.calib_folder = self.store_dir.joinpath("calib") self.image_folder = self.store_dir.joinpath("image_2") self.lidar_folder = self.store_dir.joinpath("velodyne") for folder in [ self.label_folder, self.calib_folder, self.image_folder, self.lidar_folder ]: if not folder.is_dir(): folder.mkdir(parents=True) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) if self.samples_count is not None: sample_tokens = sample_tokens[:self.samples_count] with parallel_backend("threading", n_jobs=self.parallel_n_jobs): Parallel()(delayed(self.process_token_to_kitti)(sample_token) for sample_token in tqdm(sample_tokens)) def process_token_to_kitti(self, sample_token: str) -> None: # Get sample data. sample = self.lyft_ds.get("sample", sample_token) sample_annotation_tokens = sample["anns"] lidar_token = sample["data"][self.lidar_name] sd_record_lid = self.lyft_ds.get("sample_data", lidar_token) cs_record_lid = self.lyft_ds.get( "calibrated_sensor", sd_record_lid["calibrated_sensor_token"]) for cam_name in self.cams_to_see: cam_front_token = sample["data"][cam_name] if self.get_all_detections: token_to_write = sample_token else: token_to_write = cam_front_token # Retrieve sensor records. sd_record_cam = self.lyft_ds.get("sample_data", cam_front_token) cs_record_cam = self.lyft_ds.get( "calibrated_sensor", sd_record_cam["calibrated_sensor_token"]) cam_height = sd_record_cam["height"] cam_width = sd_record_cam["width"] imsize = (cam_width, cam_height) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid["translation"], Quaternion( cs_record_lid["rotation"]), inverse=False) ego_to_cam = transform_matrix(cs_record_cam["translation"], Quaternion( cs_record_cam["rotation"]), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot( velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) # Cameras are always rectified. p_left_kitti[:3, :3] = cs_record_cam["camera_intrinsic"] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. if self.lyft_ds.get( "sensor", cs_record_cam["sensor_token"])["channel"] == "CAM_FRONT": expected_kitti_velo_to_cam_rot = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]]) assert ( velo_to_cam_rot.round(0) == expected_kitti_velo_to_cam_rot ).all(), velo_to_cam_rot.round(0) assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will # include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam["filename"] filename_lid_full = sd_record_lid["filename"] # Convert image (jpg to png). src_im_path = self.lyft_ds.data_path.joinpath(filename_cam_full) dst_im_path = self.image_folder.joinpath(f"{token_to_write}.png") if not dst_im_path.exists(): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = self.lyft_ds.data_path.joinpath(filename_lid_full) dst_lid_path = self.lidar_folder.joinpath(f"{token_to_write}.bin") pcl = LidarPointCloud.from_file(Path(src_lid_path)) # In KITTI lidar frame. pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix) with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. # tokens.append(token_to_write) # Create calibration file. kitti_transforms = dict() kitti_transforms["P0"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P1"] = np.zeros((3, 4)) # Dummy values. kitti_transforms["P2"] = p_left_kitti # Left camera transform. kitti_transforms["P3"] = np.zeros((3, 4)) # Dummy values. # Cameras are already rectified. kitti_transforms["R0_rect"] = r0_rect.rotation_matrix kitti_transforms["Tr_velo_to_cam"] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms["Tr_imu_to_velo"] = imu_to_velo_kitti calib_path = self.calib_folder.joinpath(f"{token_to_write}.txt") with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = "%.12e" % val[0] for v in val[1:]: val_str += " %.12e" % v calib_file.write("%s: %s\n" % (key, val_str)) # Write label file. label_path = self.label_folder.joinpath(f"{token_to_write}.txt") if label_path.exists(): print("Skipping existing file: %s" % label_path) continue with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.lyft_ds.get( "sample_annotation", sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.lyft_ds.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is # not available in nuScenes. occluded = 0 detection_name = sample_annotation["category_name"] # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None and not self.get_all_detections: continue elif bbox_2d is None and self.get_all_detections: # default KITTI bbox bbox_2d = (-1.0, -1.0, -1.0, -1.0) # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string( name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded, ) # Write to disk. label_file.write(output + "\n") def render_kitti(self, render_2d: bool = False, store_dataroot: str = "~/lyft_kitti/train/"): """Renders the annotations in the KITTI dataset from a lidar and a camera view. Args: render_2d: Whether to render 2d boxes (only works for camera data). Returns: """ if render_2d: print("Rendering 2d boxes from KITTI format") else: print("Rendering 3d boxes projected from 3d KITTI format") self.store_dir = Path(store_dataroot) # Load the KITTI dataset. kitti = KittiDB(root=self.store_dir) # Create output folder. render_dir = self.store_dir.joinpath("render") if not render_dir.is_dir(): render_dir.mkdir(parents=True) # Render each image. tokens = kitti.tokens i = 0 # currently supports only single thread processing for token in tqdm(tokens): for sensor in ["lidar", "camera"]: out_path = render_dir.joinpath(f"{token}_{sensor}.png") kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path, render_2d=render_2d) # Close the windows to avoid a warning of too many open windows. plt.close() if (i > 10): break i += 1 def _split_to_samples(self, split_logs: List[str]) -> List[str]: """Convenience function to get the samples in a particular split. Args: split_logs: A list of the log names in this split. Returns: The list of samples. """ samples = [] for sample in self.lyft_ds.sample: scene = self.lyft_ds.get("scene", sample["scene_token"]) log = self.lyft_ds.get("log", scene["log_token"]) logfile = log["logfile"] if logfile in split_logs: samples.append(sample["token"]) return samples
def render_sample_data(sample_data_token: str, with_anns: bool = True, box_vis_level: BoxVisibility = BoxVisibility.ANY, axes_limit: float = 40, ax: Axes = None, num_sweeps: int = 1, out_path: str = None, underlay_map: bool = False, detections: list = [], categories: list = [], valLyft: LyftDataset = None): """Render sample data onto axis. Args: sample_data_token: Sample_data token. with_anns: Whether to draw annotations. box_vis_level: If sample_data is an image, this sets required visibility for boxes. axes_limit: Axes limit for lidar and radar (measured in meters). ax: Axes onto which to render. num_sweeps: Number of sweeps for lidar and radar. out_path: Optional path to save the rendered figure to disk. underlay_map: When set to true, LIDAR data is plotted onto the map. This can be slow. """ # Get sensor modality. sd_record = valLyft.get("sample_data", sample_data_token) sensor_modality = sd_record["sensor_modality"] if sensor_modality == "camera": # Load boxes and image. data_path, _, camera_intrinsic = valLyft.get_sample_data( sample_data_token, box_vis_level=box_vis_level) data = Image.open(data_path) # Init axes. if ax is None: _, ax = plt.subplots(1, 1, figsize=(9, 16)) # Show image. ax.imshow(data) #categories = ['car', 'pedestrian', 'truck', 'bicycle', 'bus', 'other_vehicle', 'motorcycle', 'emergency_vehicle', 'animal'] # Show boxes. if with_anns: boxes = [] for c1, detection in enumerate(detections): #print(categories) cat = categories[c1] #print(cat) #import pdb; pdb.set_trace() box = Box(detection[:3], detection[3:6], Quaternion(np.array(detection[6:10])), name=cat) boxes.append(box) for box in boxes: c = np.array(get_color(box.name)) / 255.0 box.render(ax, view=camera_intrinsic, normalize=True, colors=(c, c, c)) # Limit visible range. ax.set_xlim(0, data.size[0]) ax.set_ylim(data.size[1], 0) ax.axis("off") ax.set_title(sd_record["channel"]) ax.set_aspect("equal") if out_path is not None: num = len([name for name in os.listdir(out_path)]) out_path = out_path + str(num).zfill( 5) + "_" + sample_data_token + ".png" plt.savefig(out_path) plt.close("all") return out_path
lyft_data.render_sample_3d_interactive(sample['token']) # Sample data sample_data = sample['data'] # print("sample data keys are, ", sample_data.keys()) # One sensor in sample data # lidar: type 1 lidar_top_channel = 'LIDAR_TOP' lidar_data = sample_data[lidar_top_channel] lidar_token = lyft_data.get('sample_data', lidar_data) # print("lidar data keys: \n", lidar_data.keys()) # start = time.time() # lyft_data.render_sample_data(lidar_data['token'], out_path=cfg.data.image) # print("lidar render time: {:.3f}".format(time.time() - start)) data_path, boxes, camera_intrinsic = lyft_data.get_sample_data( lidar_token['token']) pc = LidarPointCloud.from_file(data_path) cs_record = lyft_data.get("calibrated_sensor", lidar_token["calibrated_sensor_token"]) # Points live in the point sensor frame # Transform the points to the ego vehicle frame pc.rotate(Quaternion(cs_record["rotation"]).rotation_matrix) pc.translate(np.array(cs_record["translation"])) # cam: type 2 cam_front_channel = 'CAM_FRONT' cam_token = sample_data[cam_front_channel] # start = time.time() # lyft_data.render_sample_data(cam_token, out_path=cfg.data.image) # print("image render time: {:.3f}".format(time.time() - start)) cam = lyft_data.get("calibrated_sensor", lidar_data["calibrated_sensor_token"])
def convert(dir_output: Path, lidar_range: int, is_train: bool): dir_output.mkdir(exist_ok=True) dataset_type = 'train' if is_train else 'test' list_token = list() list_x = list() list_y = list() list_z = list() list_length = list() list_width = list() list_height = list() list_rotate = list() list_name = list() if is_train: dataset_df = pd.read_csv('../../dataset/train.csv') else: dataset_df = pd.read_csv('../../dataset/sample_submission.csv') tokens = dataset_df['Id'].tolist() # tokens = tokens[:200] list_jobs = list() num_core = cpu_count() random.shuffle(tokens) num_batch = -(-len(tokens) // num_core) img_size = 2048 for c in range(num_core): p = Process(target=process_job, args=(dataset_type, dir_output, tokens[num_batch * c:num_batch * (c + 1)], lidar_range, img_size, c)) list_jobs.append(p) for job in list_jobs: job.start() for job in list_jobs: job.join() if is_train: level5data = LyftDataset( data_path='../../dataset/{}'.format(dataset_type), json_path='../../dataset/{}/data/'.format(dataset_type), verbose=True) for token in tqdm(tokens): my_sample = level5data.get('sample', token) my_sample_data = level5data.get('sample_data', my_sample['data']['LIDAR_TOP']) try: _, boxes, _ = level5data.get_sample_data( my_sample_data['token']) except Exception: print('failed to load: {}'.format(token)) continue for box in boxes: x = round((box.center[0] + lidar_range) * (img_size - 1) / (lidar_range * 2)) y = round((box.center[1] + lidar_range) * (img_size - 1) / (lidar_range * 2)) z = round((box.center[2] + lidar_range) * (img_size - 1) / (lidar_range * 2)) rotate = -box.orientation.yaw_pitch_roll[0] if rotate > math.pi / 2: rotate -= math.pi if rotate < -math.pi / 2: rotate += math.pi width = box.wlh[0] * (img_size - 1) / (lidar_range * 2) length = box.wlh[1] * (img_size - 1) / (lidar_range * 2) height = box.wlh[2] * (img_size - 1) / (lidar_range * 2) list_token.append(token) list_x.append(x) list_y.append(y) list_z.append(z) list_length.append(length) list_width.append(width) list_height.append(height) list_rotate.append(rotate) list_name.append(box.name) result = pd.DataFrame() result['token'] = list_token result['x'] = list_x result['y'] = list_y result['z'] = list_z result['length'] = list_length result['width'] = list_width result['height'] = list_height result['rotate'] = list_rotate result['name'] = list_name result.to_csv(dir_output.parent / 'coordinates.csv', index=False, float_format='%.4f')