def pointcloud_color_from_image( nusc: NuScenes, pointsensor_token: str, camera_token: str) -> Tuple[np.array, np.array]: """ Given a point sensor (lidar/radar) token and camera sample_data token, load pointcloud and map it to the image plane, then retrieve the colors of the closest image pixels. :param nusc: NuScenes instance. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample data token. :return (coloring <np.float: 3, n>, mask <np.bool: m>). Returns the colors for n points that reproject into the image out of m total points. The mask indicates which points are selected. """ cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) pc = LidarPointCloud.from_file( osp.join(nusc.dataroot, pointsensor['filename'])) im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] # Pick the colors of the points im_data = np.array(im) coloring = np.zeros(points.shape) for i, p in enumerate(points.transpose()): point = p[:2].round().astype(np.int32) coloring[:, i] = im_data[point[1], point[0], :] return coloring, mask
def map_pointcloud_to_image(self, pointsensor_token: str, camera_token: str, min_dist: float = 1.0) -> Tuple: """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane. [Recoded from the NuscenesExplorer class so the image is not to be loaded]. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :param min_dist: Distance from the camera below which points are discarded. :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>). """ cam = self.nusc.get('sample_data', camera_token) pointsensor = self.nusc.get('sample_data', pointsensor_token) pcl_path = os.path.join(self.nusc.dataroot, pointsensor['filename']) if pointsensor['sensor_modality'] == 'lidar': pc = LidarPointCloud.from_file(pcl_path) else: pc = RadarPointCloud.from_file(pcl_path) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = self.nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = self.nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = self.nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = self.nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera # casing for non-keyframes which are slightly out of sync. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > min_dist) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < 1600 - 1) # hardcoded width mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < 900 - 1) # hardcoded height points = points[:, mask] coloring = coloring[mask] return points, coloring
def export_scene_pointcloud(nusc: NuScenes, out_path: str, scene_token: str, channel: str = 'LIDAR_TOP', min_dist: float = 3.0, max_dist: float = 30.0, verbose: bool = True) -> None: """ Export fused point clouds of a scene to a Wavefront OBJ file. This point-cloud can be viewed in your favorite 3D rendering tool, e.g. Meshlab or Maya. :param nusc: NuScenes instance. :param out_path: Output path to write the point-cloud to. :param scene_token: Unique identifier of scene to render. :param channel: Channel to render. :param min_dist: Minimum distance to ego vehicle below which points are dropped. :param max_dist: Maximum distance to ego vehicle above which points are dropped. :param verbose: Whether to print messages to stdout. """ # Check inputs. valid_channels = [ 'LIDAR_TOP', 'RADAR_FRONT', 'RADAR_FRONT_RIGHT', 'RADAR_FRONT_LEFT', 'RADAR_BACK_LEFT', 'RADAR_BACK_RIGHT' ] camera_channels = [ 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT' ] assert channel in valid_channels, 'Input channel {} not valid.'.format( channel) # Get records from DB. scene_rec = nusc.get('scene', scene_token) start_sample_rec = nusc.get('sample', scene_rec['first_sample_token']) sd_rec = nusc.get('sample_data', start_sample_rec['data'][channel]) # Make list of frames cur_sd_rec = sd_rec sd_tokens = [] while cur_sd_rec['next'] != '': cur_sd_rec = nusc.get('sample_data', cur_sd_rec['next']) sd_tokens.append(cur_sd_rec['token']) # Write point-cloud. with open(out_path, 'w') as f: f.write("OBJ File:\n") for sd_token in tqdm(sd_tokens): if verbose: print('Processing {}'.format(sd_rec['filename'])) sc_rec = nusc.get('sample_data', sd_token) sample_rec = nusc.get('sample', sc_rec['sample_token']) lidar_token = sd_rec['token'] lidar_rec = nusc.get('sample_data', lidar_token) pc = LidarPointCloud.from_file( osp.join(nusc.dataroot, lidar_rec['filename'])) # Get point cloud colors. coloring = np.ones((3, pc.points.shape[1])) * -1 for channel in camera_channels: camera_token = sample_rec['data'][channel] cam_coloring, cam_mask = pointcloud_color_from_image( nusc, lidar_token, camera_token) coloring[:, cam_mask] = cam_coloring # Points live in their own reference frame. So they need to be transformed via global to the image plane. # First step: transform the point cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', lidar_rec['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Optional Filter by distance to remove the ego vehicle. dists_origin = np.sqrt(np.sum(pc.points[:3, :]**2, axis=0)) keep = np.logical_and(min_dist <= dists_origin, dists_origin <= max_dist) pc.points = pc.points[:, keep] coloring = coloring[:, keep] if verbose: print('Distance filter: Keeping %d of %d points...' % (keep.sum(), len(keep))) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', lidar_rec['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Write points to file for (v, c) in zip(pc.points.transpose(), coloring.transpose()): if (c == -1).any(): # Ignore points without a color. pass else: f.write( "v {v[0]:.8f} {v[1]:.8f} {v[2]:.8f} {c[0]:.4f} {c[1]:.4f} {c[2]:.4f}\n" .format(v=v, c=c / 255.0)) if not sd_rec['next'] == "": sd_rec = nusc.get('sample_data', sd_rec['next'])
def nuscenes_gt_to_kitti(self) -> None: """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) token_idx = 0 # Start tokens from 0. # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) # Create output folders. label_folder = os.path.join(self.nusc_kitti_dir, self.split, 'label_2') calib_folder = os.path.join(self.nusc_kitti_dir, self.split, 'calib') image_folder = os.path.join(self.nusc_kitti_dir, self.split, 'image_2') lidar_folder = os.path.join(self.nusc_kitti_dir, self.split, 'velodyne') for folder in [label_folder, calib_folder, image_folder, lidar_folder]: if not os.path.isdir(folder): os.makedirs(folder) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) sample_tokens = sample_tokens[:self.image_count] tokens = [] for sample_token in sample_tokens: # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get( 'calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # token = '%06d' % token_idx # Alternative to use KITTI names. token_idx += 1 # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(image_folder, sample_token + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(lidar_folder, sample_token + '.bin') assert not dst_lid_path.endswith('.pcd.bin') pcl = LidarPointCloud.from_file(src_lid_path) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. tokens.append(sample_token) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. kitti_transforms[ 'R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified. kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(calib_folder, sample_token + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(label_folder, sample_token + '.txt') if os.path.exists(label_path): print('Skipping existing file: %s' % label_path) continue else: print('Writing file: %s' % label_path) with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: continue # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) # Write to disk. label_file.write(output + '\n')
def parse_sample(sample_token, nusc): ret_dict = {} sample_data = nusc.get('sample', sample_token) timestamp = sample_data['timestamp'] # First Step: Get lidar points in global frame cord! lidar_token = sample_data['data']['LIDAR_TOP'] lidar_data = nusc.get('sample_data', lidar_token) pcl_path = osp.join(nusc.dataroot, lidar_data['filename']) pc = LidarPointCloud.from_file(pcl_path) # lidar point in point sensor frame cs_record = nusc.get('calibrated_sensor', lidar_data['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform from ego to the global frame. poserecord = nusc.get('ego_pose', lidar_data['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # First Step finished! pc in global frame for cam_name in CamNames: cam_token = sample_data['data'][cam_name] _, boxes, _ = nusc.get_sample_data(cam_token, box_vis_level=BoxVisibility.ANY) all_points = [] all_sf = [] for box in boxes: ann_token = box.token points, sf, _ = get_sf_ann(ann_token, timestamp, deepcopy(pc.points[:3, ]), nusc) if points is not None: all_points.append(points) all_sf.append(sf) if len(all_points) > 0: points = np.concatenate(all_points, axis=-1) sf = np.concatenate(all_sf, axis=-1) else: # set meta[''] = None! ret_dict[cam_name] = None continue # transform points to ego pose; change sf's orentiation # change from global to ego pose points = points - np.array(poserecord['translation'])[:, np.newaxis] points = np.dot( Quaternion(poserecord['rotation']).rotation_matrix.T, points) sf = np.dot(Quaternion(poserecord['rotation']).rotation_matrix.T, sf) # change from ego to camera cam_data = nusc.get('sample_data', cam_token) cam_cs = nusc.get('calibrated_sensor', cam_data['calibrated_sensor_token']) # transform points for ego pose to camera pose cam_points = deepcopy(points) - np.array( cam_cs['translation'])[:, np.newaxis] cam_points = np.dot( Quaternion(cam_cs['rotation']).rotation_matrix.T, cam_points) cam_sf = np.dot(Quaternion(cam_cs['rotation']).rotation_matrix.T, sf) ret_points = np.concatenate([cam_points, cam_sf], axis=0) ret_dict[cam_name] = ret_points return ret_dict
import matplotlib.pyplot as plt nusc = NuScenes(version='v1.0-mini', dataroot='/media/aicore/Daten/aicore 2TB/nuScenes/v1.0-mini', verbose=True) my_sample = nusc.get('sample', 'cd9964f8c3d34383b16e9c2997de1ed0') #nusc.render_sample(my_sample['token']) sample_record = nusc.get('sample', my_sample['token']) #nusc.list_sample(my_sample['token']) for sd_token in sample_record['data'].values(): sd_record = nusc.get('sample_data', sd_token) if sd_record['sensor_modality'] == 'lidar': pcl_path = osp.join(nusc.dataroot, sd_record['filename']) pc = LidarPointCloud.from_file(pcl_path) view = np.eye(4) print(pc.points.shape) count = 0 points_in_horizion = np.transpose( view_points(pc.points[:3, :], view, normalize=True)) for point in points_in_horizion: if np.sum(np.square(point[0:1])) > 2500: count += 1 print(count, len(points_in_horizion)) count = 0 points = np.transpose(pc.points[:3, :]) for point in points: if point[2] > 10: print(point)
def accumulate_class(curr_class_name): # curr_ev = TrackingEvaluation(self.tracks_gt, self.tracks_pred, curr_class_name, self.cfg.dist_fcn_callable,\ # self.cfg.dist_th_tp, self.cfg.min_recall,\ # num_thresholds=TrackingMetricData.nelem,\ # metric_worst=self.cfg.metric_worst,\ # verbose=self.verbose,\ # output_dir=self.output_dir,\ # render_classes=self.render_classes) #curr_md = curr_ev.accumulate() """ Compute metrics for all recall thresholds of the current class. :return: TrackingMetricData instance which holds the metrics for each threshold. """ # Init. if self.verbose: print('Computing metrics for class %s...\n' % curr_class_name) accumulators = [] thresh_metrics = [] init_outscen() #md = TrackingMetricData() # Skip missing classes. gt_box_count = 0 gt_track_ids = set() for scene_tracks_gt in self.tracks_gt.values(): for frame_gt in scene_tracks_gt.values(): for box in frame_gt: if box.tracking_name == curr_class_name: gt_box_count += 1 gt_track_ids.add(box.tracking_id) if gt_box_count == 0: print("gtboxcount=0") # Do not add any metric. The average metrics will then be nan. #return md # Register mot metrics. #mh = create_motmetrics() # Get thresholds. # Note: The recall values are the hypothetical recall (10%, 20%, ..). # The actual recall may vary as there is no way to compute it without trying all thresholds. thresholds = np.array( [0.1]) #, recalls = self.compute_thresholds(gt_box_count) #md.confidence = thresholds #md.recall_hypo = recalls if self.verbose: print('Computed thresholds\n') for t, threshold in enumerate(thresholds): # If recall threshold is not achieved, we assign the worst possible value in AMOTA and AMOTP. if np.isnan(threshold): continue # Do not compute the same threshold twice. # This becomes relevant when a user submits many boxes with the exact same score. if threshold in thresholds[:t]: continue """ Accumulate metrics for a particular recall threshold of the current class. The scores are only computed if threshold is set to None. This is used to infer the recall thresholds. :param threshold: score threshold used to determine positives and negatives. :return: (The MOTAccumulator that stores all the hits/misses/etc, Scores for each TP). """ accs = [] scores = [ ] # The scores of the TPs. These are used to determine the recall thresholds initially. # Go through all frames and associate ground truth and tracker results. # Groundtruth and tracker contain lists for every single frame containing lists detections. tracks_gt = self.tracks_gt scene_num_id = 0 sum_fp = 0 sum_fn = 0 for scene_id in tqdm.tqdm(list(tracks_gt.keys()), disable=not self.verbose, leave=False): #按场景 # Initialize accumulator and frame_id for this scene acc = MOTAccumulatorCustom() frame_id = 0 # Frame ids must be unique across all scenes # Retrieve GT and preds. scene_tracks_gt = tracks_gt[scene_id] scene_tracks_pred = self.tracks_pred[scene_id] # if len(tracks_gt) == 151: # tracks_gt.pop('0') # Visualize the boxes in this frame. if curr_class_name in self.render_classes and threshold is not None and scene_num_id < scene_id_thr: save_path = os.path.join(self.output_dir, 'render', str(scene_id), curr_class_name) os.makedirs(save_path, exist_ok=True) renderer = TrackingRenderer(save_path) else: renderer = None for timestamp in scene_tracks_gt.keys(): #每个场景分别每帧 # Select only the current class. frame_gt = scene_tracks_gt[timestamp] frame_pred = scene_tracks_pred[timestamp] frame_gt = [ f for f in frame_gt if f.tracking_name == curr_class_name ] frame_pred = [ f for f in frame_pred if f.tracking_name == curr_class_name ] # Threshold boxes by score. Note that the scores were previously averaged over the whole track. if threshold is not None: frame_pred = [ f for f in frame_pred if f.tracking_score >= threshold ] # Abort if there are neither GT nor pred boxes. gt_ids = [gg.tracking_id for gg in frame_gt] pred_ids = [tt.tracking_id for tt in frame_pred] if len(gt_ids) == 0 and len(pred_ids) == 0: continue # Calculate distances. # Note that the distance function is hard-coded to achieve significant speedups via vectorization. assert self.cfg.dist_fcn_callable.__name__ == 'center_distance' if len(frame_gt) == 0 or len(frame_pred) == 0: distances = np.ones((0, 0)) else: gt_boxes = np.array( [b.translation[:2] for b in frame_gt]) pred_boxes = np.array( [b.translation[:2] for b in frame_pred]) distances = sklearn.metrics.pairwise.euclidean_distances( gt_boxes, pred_boxes) # Distances that are larger than the threshold won't be associated. assert len(distances) == 0 or not np.all( np.isnan(distances)) distances[distances >= self.cfg.dist_th_tp] = np.nan # Accumulate results. # Note that we cannot use timestamp as frameid as motmetrics assumes it's an integer. acc.update(gt_ids, pred_ids, distances, frameid=frame_id) # Store scores of matches, which are used to determine recall thresholds. if threshold is not None: events = acc.events.loc[frame_id] matches = events[events.Type == 'MATCH'] match_ids = matches.HId.values match_scores = [ tt.tracking_score for tt in frame_pred if tt.tracking_id in match_ids ] scores.extend(match_scores) else: events = None # Render the boxes in this frame. if curr_class_name in self.render_classes and threshold is not None and scene_num_id < scene_id_thr: # load lidar points data按每帧加载 #https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/scripts/export_kitti.py try: frame0 = frame_pred[0] except: frame0 = scene_tracks_gt[timestamp][0] sample = self.nusc.get( 'sample', frame0.sample_token) #frame_pred是该帧所有的物体 #sample_annotation_tokens = sample['anns'] #标注 #cam_front_token = sample['data'][cam_name]#某点位的图像 lidar_token = sample['data'][lidar_name] # Retrieve sensor records. #sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get( 'sample_data', lidar_token) cs_record = self.nusc.get( 'calibrated_sensor', sd_record_lid["calibrated_sensor_token"]) pose_record = self.nusc.get( 'ego_pose', sd_record_lid["ego_pose_token"]) #cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token']) #cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. #filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] src_lid_path = os.path.join( datset_path, filename_lid_full) points = LidarPointCloud.from_file(src_lid_path) if scene_id == "4efbf4c0b77f467385fc2e19da45c989": #or lidar_token == "16be583c31a2403caa6c158bb55ae616":#选择特定帧 上面要设成150个场景 renderer.render(events, timestamp, frame_gt, frame_pred, points, pose_record, cs_record, ifplotgt, threshold, ifpltsco, outscen_class, nusc=self.nusc, ifplthis=ifplthis) # Increment the frame_id, unless there are no boxes (equivalent to what motmetrics does). frame_id += 1 scene_num_id += 1 accs.append(acc) print("visually have done!") if outscen_class and renderer is not None: print('trk_ratio:',TrackingRenderer.trk_ratio,'\n', 'fp_disrange:',TrackingRenderer.fp_disrange, '\n', 'fp_verange:', TrackingRenderer.fp_verange, '\n', 'fp_ptsnumrange:',TrackingRenderer.fp_ptsnumrange, '\n', \ 'fpscorrange', TrackingRenderer.fpscorrange, '\n', 'gt_ratio', TrackingRenderer.gt_ratio, '\n', 'vis_ratio', TrackingRenderer.vis_ratio, '\n', 'fn_disrange', TrackingRenderer.fn_disrange, '\n',\ 'fn_verange', TrackingRenderer.fn_verange, '\n', 'fn_ptsnumrange', TrackingRenderer.fn_ptsnumrange, '\n', 'ids_verange', TrackingRenderer.ids_verange, '\n', 'ids_factratio', TrackingRenderer.ids_factratio, '\n',\ 'gt_ptsnumrange', TrackingRenderer.gt_ptsnumrange, 'at least fault_datas', TrackingRenderer.fault_datas )
def process_token_to_kitti(self, sample_token: str) -> None: # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] lidar_token = sample['data'][self.lidar_name] sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) for cam_name in self.cams_to_see: cam_front_token = sample['data'][cam_name] token_to_write = cam_front_token # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cam_height = sd_record_cam['height'] cam_width = sd_record_cam['width'] imsize = (cam_width, cam_height) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot( velo_to_cam, self.kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) # Cameras are always rectified. p_left_kitti[:3, :3] = cs_record_cam['camera_intrinsic'] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will # include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(self.image_folder, token_to_write + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(self.lidar_folder, token_to_write + '.bin') pcl = LidarPointCloud.from_file(src_lid_path) # In KITTI lidar frame. pcl.rotate(self.kitti_to_nu_lidar_inv.rotation_matrix) with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. # tokens.append(token_to_write) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. # Cameras are already rectified. kitti_transforms['R0_rect'] = r0_rect.rotation_matrix kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(self.calib_folder, token_to_write + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(self.label_folder, token_to_write + '.txt') if os.path.exists(label_path): print('Skipping existing file: %s' % label_path) continue # else: # print('Writing file: %s' % label_path) with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is # not available in nuScenes. occluded = 0 detection_name = sample_annotation['category_name'] # category_to_detection_name(sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. - disabled if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None and not self.get_all_detections: continue elif bbox_2d is None and self.get_all_detections: bbox_2d = (-1.0, -1.0, -1.0, -1.0 ) # default KITTI bbox # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) # Write to disk. label_file.write(output + '\n')
def load_point_cloud(nuscenes, sample_data): # Load point cloud lidar_path = os.path.join(nuscenes.dataroot, sample_data['filename']) pcl = LidarPointCloud.from_file(lidar_path) return pcl.points[:3, :].T
def get_pointcloud(nusc, bottom_left, top_right, box, pointsensor_token: str, camera_token: str, min_dist: float = 1.0) -> Tuple: """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :param min_dist: Distance from the camera below which points are discarded. :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>, image <Image>). """ sample_data = nusc.get("sample_data", camera_token) explorer = NuScenesExplorer(nusc) cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) im = Image.open(osp.join(nusc.dataroot, cam['filename'])) sample_rec = explorer.nusc.get('sample', pointsensor['sample_token']) chan = pointsensor['channel'] ref_chan = 'LIDAR_TOP' # pc, times = LidarPointCloud.from_file_multisweep(nusc, sample_rec, chan, ref_chan, nsweeps = 10) file_name = os.path.join( nusc.dataroot, nusc.get('sample_data', sample_rec['data'][chan])['filename']) pc = LidarPointCloud.from_file(file_name) data_path, boxes, camera_intrinsic = nusc.get_sample_data( pointsensor_token, selected_anntokens=[box.token]) pcl_box = boxes[0] original_points = pc.points.copy() # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. depths = pc.points[2, :] # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) center = np.array([[box.center[0]], [box.center[1]], [box.center[2]]]) box_center = view_points(center, np.array(cs_record['camera_intrinsic']), normalize=True) box_corners = box.corners() image_center = np.asarray([((bottom_left[0] + top_right[0]) / 2), ((bottom_left[1] + top_right[1]) / 2), 1]) # rotate to make the ray passing through the image_center into the z axis z_axis = np.linalg.lstsq(np.array(cs_record['camera_intrinsic']), image_center, rcond=None)[0] v = np.asarray([z_axis[0], z_axis[1], z_axis[2]]) z = np.asarray([0., 0., 1.]) normal = np.cross(v, z) theta = np.arccos(np.dot(v, z) / np.sqrt(v.dot(v))) new_pts = [] new_corner = [] old_pts = [] points_3 = points_3[:3, :] translate = np.dot(rotation_matrix(normal, theta), image_center) for point in points_3.T: new_pts = new_pts + [np.dot(rotation_matrix(normal, theta), point)] for corner in box_corners.T: new_corner = new_corner + [ np.dot(rotation_matrix(normal, theta), corner) ] points = np.asarray(new_pts) original_points = original_points[:3, :].T new_corners = np.asarray(new_corner) reverse_matrix = rotation_matrix(normal, -theta) # Sample 400 points if np.shape(new_pts)[0] > 400: mask = np.random.choice(points.shape[0], 400, replace=False) points = points[mask, :] original_points = original_points[mask, :] shift = np.expand_dims(np.mean(points, axis=0), 0) points = points - shift # center new_corners = new_corners - shift # center dist = np.max(np.sqrt(np.sum(points**2, axis=1)), 0) points = points / dist # scale new_corners = new_corners / dist # scale # Compute offset from point to corner n = np.shape(points)[0] offset = np.zeros((n, 8, 3)) for i in range(0, n): for j in range(0, 8): offset[i][j] = new_corners[j] - points[i] # Compute mask on which points lie inside of the box m = [] for point in original_points: if in_box(point, pcl_box.corners()) == True: m = m + [1] else: m = m + [0] m = np.asarray(m) return points.T, m, offset, reverse_matrix, new_corners
def map_pointcloud_to_image(nusc, pointsensor_token: str, camera_token: str, min_dist: float = 1.0, render_intensity: bool = False, show_lidarseg: bool = False): """ Given a point sensor (lidar/radar) token and camera sample_data token, load pointcloud and map it to the image plane. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :param min_dist: Distance from the camera below which points are discarded. :param render_intensity: Whether to render lidar intensity instead of point depth. :param show_lidarseg: Whether to render lidar intensity instead of point depth. :param filter_lidarseg_labels: Only show lidar points which belong to the given list of classes. If None or the list is empty, all classes will be displayed. :param lidarseg_preds_bin_path: A path to the .bin file which contains the user's lidar segmentation predictions for the sample. :return (pointcloud <np.float: 2, n)>, coloring <np.float: n>, image <Image>). """ cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) pcl_path = osp.join(nusc.dataroot, pointsensor['filename']) if pointsensor['sensor_modality'] == 'lidar': # Ensure that lidar pointcloud is from a keyframe. assert pointsensor['is_key_frame'], \ 'Error: Only pointclouds which are keyframes have lidar segmentation labels. Rendering aborted.' pc = LidarPointCloud.from_file(pcl_path) else: pc = RadarPointCloud.from_file(pcl_path) im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform from ego to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform from global into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform from ego into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] if render_intensity: assert pointsensor['sensor_modality'] == 'lidar', 'Error: Can only render intensity for lidar, ' \ 'not %s!' % pointsensor['sensor_modality'] # Retrieve the color from the intensities. # Performs arbitary scaling to achieve more visually pleasing results. intensities = pc.points[3, :] intensities = (intensities - np.min(intensities)) / ( np.max(intensities) - np.min(intensities)) intensities = intensities**0.1 intensities = np.maximum(0, intensities - 0.5) coloring = intensities else: # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. # Also make sure points are at least 1m in front of the camera to avoid seeing the lidar points on the camera # casing for non-keyframes which are slightly out of sync. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > min_dist) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] coloring = coloring[mask] return points, coloring, im
def validate_submission(nusc: NuScenes, results_folder: str, eval_set: str, verbose: bool = False) -> None: """ Checks if a results folder is valid. The following checks are performed: - Check that the submission folder is according to that described in https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/eval/lidarseg/README.md - Check that the submission.json is of the following structure: {"meta": {"use_camera": false, "use_lidar": true, "use_radar": false, "use_map": false, "use_external": false}} - Check that each each lidar sample data in the evaluation set is present and valid. :param nusc: A NuScenes object. :param results_folder: Path to the folder. :param eval_set: The dataset split to evaluate on, e.g. train, val or test. :param verbose: Whether to print messages during the evaluation. """ mapper = LidarsegClassMapper(nusc) num_classes = len(mapper.coarse_name_2_coarse_idx_mapping) if verbose: print('Checking if folder structure of {} is correct...'.format( results_folder)) # Check that {results_folder}/{eval_set} exists. results_meta_folder = os.path.join(results_folder, eval_set) assert os.path.exists(results_meta_folder), \ 'Error: The folder containing the submission.json ({}) does not exist.'.format(results_meta_folder) # Check that {results_folder}/{eval_set}/submission.json exists. submisson_json_path = os.path.join(results_meta_folder, 'submission.json') assert os.path.exists(submisson_json_path), \ 'Error: submission.json ({}) does not exist.'.format(submisson_json_path) # Check that {results_folder}/lidarseg/{eval_set} exists. results_bin_folder = os.path.join(results_folder, 'lidarseg', eval_set) assert os.path.exists(results_bin_folder), \ 'Error: The folder containing the .bin files ({}) does not exist.'.format(results_bin_folder) if verbose: print('\tPassed.') if verbose: print('Checking contents of {}...'.format(submisson_json_path)) with open(submisson_json_path) as f: submission_meta = json.load(f) valid_meta = { "use_camera", "use_lidar", "use_radar", "use_map", "use_external" } assert valid_meta == set(submission_meta['meta'].keys()), \ '{} must contain {}.'.format(submisson_json_path, valid_meta) for meta_key in valid_meta: meta_key_type = type(submission_meta['meta'][meta_key]) assert meta_key_type == bool, 'Error: Value for {} should be bool, not {}.'.format( meta_key, meta_key_type) if verbose: print('\tPassed.') if verbose: print( 'Checking if all .bin files for {} exist and are valid...'.format( eval_set)) sample_tokens = get_samples_in_eval_set(nusc, eval_set) for sample_token in tqdm(sample_tokens, disable=not verbose): sample = nusc.get('sample', sample_token) # Get the sample data token of the point cloud. sd_token = sample['data']['LIDAR_TOP'] # Load the predictions for the point cloud. lidarseg_pred_filename = os.path.join(results_bin_folder, sd_token + '_lidarseg.bin') assert os.path.exists(lidarseg_pred_filename), \ 'Error: The prediction .bin file {} does not exist.'.format(lidarseg_pred_filename) lidarseg_pred = np.fromfile(lidarseg_pred_filename, dtype=np.uint8) # Check number of predictions for the point cloud. if len( nusc.lidarseg ) > 0: # If ground truth exists, compare the no. of predictions with that of ground truth. lidarseg_label_filename = os.path.join( nusc.dataroot, nusc.get('lidarseg', sd_token)['filename']) assert os.path.exists(lidarseg_label_filename), \ 'Error: The ground truth .bin file {} does not exist.'.format(lidarseg_label_filename) lidarseg_label = np.fromfile(lidarseg_label_filename, dtype=np.uint8) num_points = len(lidarseg_label) else: # If no ground truth is available, compare the no. of predictions with that of points in a point cloud. pointsensor = nusc.get('sample_data', sd_token) pcl_path = os.path.join(nusc.dataroot, pointsensor['filename']) pc = LidarPointCloud.from_file(pcl_path) points = pc.points num_points = points.shape[1] assert num_points == len(lidarseg_pred), \ 'Error: There are {} predictions for lidar sample data token {} ' \ 'but there are only {} points in the point cloud.'\ .format(len(lidarseg_pred), sd_token, num_points) assert all((lidarseg_pred > 0) & (lidarseg_pred < num_classes)), \ "Error: Array for predictions in {} must be between 1 and {} (inclusive)."\ .format(lidarseg_pred_filename, num_classes - 1) if verbose: print('\tPassed.') if verbose: print( 'Results folder {} successfully validated!'.format(results_folder))
def nuscenes_gt_to_kitti(self) -> None: """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) token_idx = 0 # Start tokens from 0. # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) scene_splits = create_splits_scenes(verbose=False) scene_to_log = { scene['name']: self.nusc.get('log', scene['log_token'])['logfile'] for scene in self.nusc.scene } logs = set() scenes = scene_splits[self.split] for scene in scenes: logs.add(scene_to_log[scene]) # print(len(scenes), len(logs)) split_mapping = {"train": "training", "val": "testing"} # Create output folders. label_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'label_2') calib_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'calib') image_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'image_2') lidar_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'velodyne') for folder in [label_folder, calib_folder, image_folder, lidar_folder]: if not os.path.isdir(folder): os.makedirs(folder) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) # sample_tokens = sample_tokens[:self.image_count] # print(len(sample_tokens)) tokens = [] if self.split == "train": split_file = [ os.path.join(self.nusc_kitti_dir, "train.txt"), os.path.join(self.nusc_kitti_dir, "val.txt") ] elif self.split == 'val': split_file = os.path.join(self.nusc_kitti_dir, "test.txt") # if os.path.isfile(split_file): # os.remove(split_file) if self.split == "train": cnt = 0 with open(split_file[0], "w") as f: for seq in list(self.sequence_mapping.keys())[:-150]: for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") cnt += 1 # print(cnt) cnt = 0 with open(split_file[1], "w") as f: for seq in list(self.sequence_mapping.keys())[-150:]: for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") cnt += 1 # print(cnt) elif self.split == "val": with open(split_file, "w") as f: for seq in self.sequence_mapping.keys(): for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") for idx, sample_token in enumerate(sample_tokens): # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] sample_name = "%06d" % idx # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get( 'calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # token = '%06d' % token_idx # Alternative to use KITTI names. token_idx += 1 # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(image_folder, sample_name + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(lidar_folder, sample_name + '.bin') assert not dst_lid_path.endswith('.pcd.bin') pcl = LidarPointCloud.from_file(src_lid_path) # pcl, _ = LidarPointCloud.from_file_multisweep_future(self.nusc, sample, self.lidar_name, self.lidar_name, nsweeps=5) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. tokens.append(sample_token) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. kitti_transforms[ 'R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified. kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(calib_folder, sample_name + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(label_folder, sample_name + '.txt') if os.path.exists(label_path): # print('Skipping existing file: %s' % label_path) continue # else: # print('Writing file: %s' % label_path) objects = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 obj = dict() # Convert nuScenes category to nuScenes detection challenge category. obj["detection_name"] = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if obj["detection_name"] is None or obj[ "detection_name"] not in CLASS_MAP.keys(): continue obj["detection_name"] = CLASS_MAP[obj["detection_name"]] # Convert from nuScenes to KITTI box format. obj["box_cam_kitti"] = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = project_to_2d(obj["box_cam_kitti"], p_left_kitti, imsize[1], imsize[0]) if bbox_2d is None: continue obj["bbox_2d"] = bbox_2d["bbox"] obj["truncated"] = bbox_2d["truncated"] # Set dummy score so we can use this file as result. obj["box_cam_kitti"].score = 0 v = np.dot(obj["box_cam_kitti"].rotation_matrix, np.array([1, 0, 0])) rot_y = -np.arctan2(v[2], v[0]) obj["alpha"] = -np.arctan2( obj["box_cam_kitti"].center[0], obj["box_cam_kitti"].center[2]) + rot_y obj["depth"] = np.linalg.norm( np.array(obj["box_cam_kitti"].center[:3])) objects.append(obj) objects = postprocessing(objects, imsize[1], imsize[0]) with open(label_path, "w") as label_file: for obj in objects: # Convert box to output string format. output = box_to_string(name=obj["detection_name"], box=obj["box_cam_kitti"], bbox_2d=obj["bbox_2d"], truncation=obj["truncated"], occlusion=obj["occluded"], alpha=obj["alpha"]) label_file.write(output + '\n')
def map_pointcloud_to_image_(nusc: NuScenes, bbox, pointsensor_token: str, camera_token: str, min_dist: float = 1.0, dist_thresh: float = 0.1, visualize: bool = False) -> Tuple: """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane. :param nusc: NuScenes instance. :param bbox: object coordinates in the current image. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample_data token. :param min_dist: Distance from the camera below which points are discarded. :param dist_thresh: Threshold to consider points within floor plane. :return (points_ann <np.float: 2, n)>, coloring_ann <np.float: n>, ori_points_ann<np.float: 3, n)>, image <Image>). """ cam = nusc.get('sample_data', camera_token) # Sample camera info pointsensor = nusc.get('sample_data', pointsensor_token) # Sample point cloud # pcl_path is the path from root to the pointCloud file pcl_path = osp.join(nusc.dataroot, pointsensor['filename']) # Open the pointCloud path using the Lidar or Radar class if pointsensor['sensor_modality'] == 'lidar': # Read point cloud with LidarPointCloud (4 x samples) --> X, Y, Z and intensity pc = LidarPointCloud.from_file(pcl_path) # To access the points pc.points else: # Read point cloud with LidarPointCloud (18 x samples) --> # https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/data_classes.py#L296 pc = RadarPointCloud.from_file(pcl_path) # Open image of the interest camera im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Save original points (X, Y and Z) coordinates in LiDAR frame ori_points = pc.points[:3, :].copy() # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token'] ) # Transformation matrix of pointCloud # Transform the Quaternion into a rotation matrix and use method rotate in PointCloud class to rotate # Map from the laser sensor to ego_pose # The method is a dot product between cs_record['rotation'] (3 x 3) and points (3 x points) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) # Add the traslation vector between ego vehicle and sensor # The method translate is an addition cs_record['translation'] (3,) and points (3 x points) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) # Same step as before, map from ego_pose to world coordinate frame pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) # Same step as before, map from world coordinate frame to ego vehicle frame for the timestamp of the image. pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) # Same step as before, map from ego at camera timestamp to camera pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Retrieve the color from the depth. coloring = depths # Take the actual picture (matrix multiplication with camera-matrix + renormalization). # Normalization means to divide the X and Y coordinates by the depth # The output dim (3 x n_points) where the 3rd row are ones. points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # bounding box coordinates min_x, min_y, max_x, max_y = [int(points_b) for points_b in bbox] # Floor segmentation points_img, coloring_img, ori_points_img = segment_floor( points, coloring, ori_points, (im.size[0], im.size[1]), dist_thresh, 1.0) # Filter the points within the annotaiton # Create a mask of bools the same size of depth points mask_ann = np.ones(coloring_img.shape[0], dtype=bool) # Keep points such as X coordinate is bigger than bounding box minimum coordinate mask_ann = np.logical_and(mask_ann, points_img[0, :] > min_x + 1) # remove points such as X coordinate is bigger than bounding box maximum coordinate mask_ann = np.logical_and(mask_ann, points_img[0, :] < max_x - 1) # Keep points such as Y coordinate is bigger than bounding box minimum coordinate mask_ann = np.logical_and(mask_ann, points_img[1, :] > min_y + 1) # remove points such as Y coordinate is bigger than bounding box maximum coordinate mask_ann = np.logical_and(mask_ann, points_img[1, :] < max_y - 1) # Keep only the interest points points_ann = points_img[:, mask_ann] coloring_ann = coloring_img[mask_ann] ori_points_ann = ori_points_img[:, mask_ann] if visualize: plt.figure(figsize=(9, 16)) plt.imshow(im) plt.scatter(points_ann[0, :], points_ann[1, :], c=coloring_ann, s=5) plt.axis('off') return points_ann, coloring_ann, ori_points_ann, im
def get_lidar_data(nusc, sample_rec, nsweeps, min_distance): """Similar to LidarPointCloud.from_file_multisweep but returns in the ego car frame.""" # Init. points = np.zeros((5, 0)) # Get reference pose and timestamp. ref_sd_token = sample_rec['data']['LIDAR_TOP'] ref_sd_rec = nusc.get('sample_data', ref_sd_token) ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) ref_time = 1e-6 * ref_sd_rec['timestamp'] # Homogeneous transformation matrix from global to _current_ ego car frame. car_from_global = transform_matrix(ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True) # Aggregate current and previous sweeps. sample_data_token = sample_rec['data']['LIDAR_TOP'] current_sd_rec = nusc.get('sample_data', sample_data_token) for _ in range(nsweeps): # Load up the pointcloud and remove points close to the sensor. current_pc = LidarPointCloud.from_file( os.path.join(nusc.dataroot, current_sd_rec['filename'])) current_pc.remove_close(min_distance) # Get past pose. current_pose_rec = nusc.get('ego_pose', current_sd_rec['ego_pose_token']) global_from_car = transform_matrix(current_pose_rec['translation'], Quaternion( current_pose_rec['rotation']), inverse=False) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = nusc.get('calibrated_sensor', current_sd_rec['calibrated_sensor_token']) car_from_current = transform_matrix(current_cs_rec['translation'], Quaternion( current_cs_rec['rotation']), inverse=False) # Fuse four transformation matrices into one and perform transform. trans_matrix = reduce( np.dot, [car_from_global, global_from_car, car_from_current]) current_pc.transform(trans_matrix) # Add time vector which can be used as a temporal feature. time_lag = 1e-6 * current_sd_rec['timestamp'] - ref_time times = np.full((1, current_pc.nbr_points()), time_lag) new_points = np.concatenate((current_pc.points, times), 0) points = np.concatenate((points, new_points), 1) # Abort if there are no previous sweeps. if current_sd_rec['prev'] == '': break else: current_sd_rec = nusc.get('sample_data', current_sd_rec['prev']) return points
def get_lidar_data(nusc, sample_rec, nsweeps, min_distance): """ Returns at most nsweeps of lidar in the ego frame. Returned tensor is 5(x, y, z, reflectance, dt) x N Adapted from https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/data_classes.py#L56 """ points = np.zeros((5, 0)) # Get reference pose and timestamp. ref_sd_token = sample_rec['data']['LIDAR_TOP'] ref_sd_rec = nusc.get('sample_data', ref_sd_token) ref_pose_rec = nusc.get('ego_pose', ref_sd_rec['ego_pose_token']) ref_cs_rec = nusc.get('calibrated_sensor', ref_sd_rec['calibrated_sensor_token']) ref_time = 1e-6 * ref_sd_rec['timestamp'] # Homogeneous transformation matrix from global to _current_ ego car frame. car_from_global = transform_matrix(ref_pose_rec['translation'], Quaternion(ref_pose_rec['rotation']), inverse=True) # Aggregate current and previous sweeps. sample_data_token = sample_rec['data']['LIDAR_TOP'] current_sd_rec = nusc.get('sample_data', sample_data_token) for _ in range(nsweeps): # Load up the pointcloud and remove points close to the sensor. current_pc = LidarPointCloud.from_file( os.path.join(nusc.dataroot, current_sd_rec['filename'])) current_pc.remove_close(min_distance) # Get past pose. current_pose_rec = nusc.get('ego_pose', current_sd_rec['ego_pose_token']) global_from_car = transform_matrix(current_pose_rec['translation'], Quaternion( current_pose_rec['rotation']), inverse=False) # Homogeneous transformation matrix from sensor coordinate frame to ego car frame. current_cs_rec = nusc.get('calibrated_sensor', current_sd_rec['calibrated_sensor_token']) car_from_current = transform_matrix(current_cs_rec['translation'], Quaternion( current_cs_rec['rotation']), inverse=False) # Fuse four transformation matrices into one and perform transform. trans_matrix = reduce( np.dot, [car_from_global, global_from_car, car_from_current]) current_pc.transform(trans_matrix) # Add time vector which can be used as a temporal feature. time_lag = ref_time - 1e-6 * current_sd_rec['timestamp'] times = time_lag * np.ones((1, current_pc.nbr_points())) new_points = np.concatenate((current_pc.points, times), 0) points = np.concatenate((points, new_points), 1) # Abort if there are no previous sweeps. if current_sd_rec['prev'] == '': break else: current_sd_rec = nusc.get('sample_data', current_sd_rec['prev']) return points