def add_center_dist(nusc: NuScenes, eval_boxes: EvalBoxes): """ Adds the cylindrical (xy) center distance from ego vehicle to each box. :param nusc: The NuScenes instance. :param eval_boxes: A set of boxes, either GT or predictions. :return: eval_boxes augmented with center distances. """ for sample_token in eval_boxes.sample_tokens: sample_rec = nusc.get('sample', sample_token) sd_record = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) for box in eval_boxes[sample_token]: # Both boxes and ego pose are given in global coord system, so distance can be calculated directly. # Note that the z component of the ego pose is 0. ego_translation = (box.translation[0] - pose_record['translation'][0], box.translation[1] - pose_record['translation'][1], box.translation[2] - pose_record['translation'][2]) if isinstance(box, DetectionBox) or isinstance(box, TrackingBox): box.ego_translation = ego_translation else: raise NotImplementedError return eval_boxes
def export_videos(nusc: NuScenes, out_dir: str): """ Export videos of the images displayed in the images. """ # Load NuScenes class scene_tokens = [s['token'] for s in nusc.scene] # Create output directory if not os.path.isdir(out_dir): os.makedirs(out_dir) # Write videos to disk for scene_token in scene_tokens: scene = nusc.get('scene', scene_token) print('Writing scene %s' % scene['name']) out_path = os.path.join(out_dir, scene['name']) + '.avi' if not os.path.exists(out_path): nusc.render_scene(scene['token'], out_path=out_path)
def load_gt(nusc: NuScenes, eval_split: str, box_cls, verbose: bool = False) -> EvalBoxes: """ Loads ground truth boxes from DB. :param nusc: A NuScenes instance. :param eval_split: The evaluation split for which we load GT boxes. :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox. :param verbose: Whether to print messages to stdout. :return: The GT boxes. """ # Init. if box_cls == DetectionBox: attribute_map = {a['token']: a['name'] for a in nusc.attribute} if verbose: print('Loading annotations for {} split from nuScenes version: {}'. format(eval_split, nusc.version)) # Read out all sample_tokens in DB. sample_tokens_all = [s['token'] for s in nusc.sample] assert len(sample_tokens_all) > 0, "Error: Database has no samples!" # Only keep samples from this split. splits = create_splits_scenes() # Check compatibility of split with nusc_version. version = nusc.version if eval_split in {'train', 'val', 'train_detect', 'train_track'}: assert version.endswith('trainval'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split in {'mini_train', 'mini_val'}: assert version.endswith('mini'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split == 'test': assert version.endswith('test'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) else: raise ValueError( 'Error: Requested split {} which this function cannot map to the correct NuScenes version.' .format(eval_split)) if eval_split == 'test': # Check that you aren't trying to cheat :). assert len(nusc.sample_annotation) > 0, \ 'Error: You are trying to evaluate on the test set but you do not have the annotations!' sample_tokens = [] for sample_token in sample_tokens_all: scene_token = nusc.get('sample', sample_token)['scene_token'] scene_record = nusc.get('scene', scene_token) if scene_record['name'] in splits[eval_split]: sample_tokens.append(sample_token) all_annotations = EvalBoxes() # Load annotations and filter predictions and annotations. tracking_id_set = set() for sample_token in tqdm.tqdm(sample_tokens, leave=verbose): sample = nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] sample_boxes = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = nusc.get('sample_annotation', sample_annotation_token) if box_cls == DetectionBox: # Get label name in detection task and filter unused labels. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name is None: continue # Get attribute_name. attr_tokens = sample_annotation['attribute_tokens'] attr_count = len(attr_tokens) if attr_count == 0: attribute_name = '' elif attr_count == 1: attribute_name = attribute_map[attr_tokens[0]] else: raise Exception( 'Error: GT annotations must not have more than one attribute!' ) sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], detection_name=detection_name, detection_score=-1.0, # GT samples do not have a score. attribute_name=attribute_name)) elif box_cls == TrackingBox: # Use nuScenes token as tracking id. tracking_id = sample_annotation['instance_token'] tracking_id_set.add(tracking_id) # Get label name in detection task and filter unused labels. tracking_name = category_to_tracking_name( sample_annotation['category_name']) if tracking_name is None: continue sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], tracking_id=tracking_id, tracking_name=tracking_name, tracking_score=-1.0 # GT samples do not have a score. )) else: raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls) all_annotations.add_boxes(sample_token, sample_boxes) if verbose: print("Loaded ground truth annotations for {} samples.".format( len(all_annotations.sample_tokens))) return all_annotations
def filter_eval_boxes(nusc: NuScenes, eval_boxes: EvalBoxes, max_dist: Dict[str, float], verbose: bool = False) -> EvalBoxes: """ Applies filtering to boxes. Distance, bike-racks and points per box. :param nusc: An instance of the NuScenes class. :param eval_boxes: An instance of the EvalBoxes class. :param max_dist: Maps the detection name to the eval distance threshold for that class. :param verbose: Whether to print to stdout. """ # Retrieve box type for detectipn/tracking boxes. class_field = _get_box_class_field(eval_boxes) # Accumulators for number of filtered boxes. total, dist_filter, point_filter, bike_rack_filter = 0, 0, 0, 0 for ind, sample_token in enumerate(eval_boxes.sample_tokens): # Filter on distance first. total += len(eval_boxes[sample_token]) eval_boxes.boxes[sample_token] = [ box for box in eval_boxes[sample_token] if box.ego_dist < max_dist[box.__getattribute__(class_field)] ] dist_filter += len(eval_boxes[sample_token]) # Then remove boxes with zero points in them. Eval boxes have -1 points by default. eval_boxes.boxes[sample_token] = [ box for box in eval_boxes[sample_token] if not box.num_pts == 0 ] point_filter += len(eval_boxes[sample_token]) # Perform bike-rack filtering. sample_anns = nusc.get('sample', sample_token)['anns'] bikerack_recs = [ nusc.get('sample_annotation', ann) for ann in sample_anns if nusc.get('sample_annotation', ann)['category_name'] == 'static_object.bicycle_rack' ] bikerack_boxes = [ Box(rec['translation'], rec['size'], Quaternion(rec['rotation'])) for rec in bikerack_recs ] filtered_boxes = [] for box in eval_boxes[sample_token]: if box.__getattribute__(class_field) in ['bicycle', 'motorcycle']: in_a_bikerack = False for bikerack_box in bikerack_boxes: if np.sum( points_in_box( bikerack_box, np.expand_dims(np.array(box.translation), axis=1))) > 0: in_a_bikerack = True if not in_a_bikerack: filtered_boxes.append(box) else: filtered_boxes.append(box) eval_boxes.boxes[sample_token] = filtered_boxes bike_rack_filter += len(eval_boxes.boxes[sample_token]) if verbose: print("=> Original number of boxes: %d" % total) print("=> After distance based filtering: %d" % dist_filter) print("=> After LIDAR points based filtering: %d" % point_filter) print("=> After bike rack filtering: %d" % bike_rack_filter) return eval_boxes
def _mock_submission(nusc: NuScenes, split: str, add_errors: bool = False) -> Dict[str, dict]: """ Creates "reasonable" submission (results and metadata) by looping through the mini-val set, adding 1 GT prediction per sample. Predictions will be permuted randomly along all axes. :param nusc: NuScenes instance. :param split: Dataset split to use. :param add_errors: Whether to use GT or add errors to it. """ def random_class(category_name: str, _add_errors: bool = False) -> Optional[str]: # Alter 10% of the valid labels. class_names = sorted(TRACKING_NAMES) tmp = category_to_tracking_name(category_name) if tmp is None: return None else: if not _add_errors or np.random.rand() < .9: return tmp else: return class_names[np.random.randint(0, len(class_names) - 1)] def random_id(instance_token: str, _add_errors: bool = False) -> str: # Alter 10% of the valid ids to be a random string, which hopefully corresponds to a new track. if not _add_errors or np.random.rand() < .9: _tracking_id = instance_token + '_pred' else: _tracking_id = str(np.random.randint(0, sys.maxsize)) return _tracking_id mock_meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } mock_results = {} # Get all samples in the current evaluation split. splits = create_splits_scenes() val_samples = [] for sample in nusc.sample: if nusc.get('scene', sample['scene_token'])['name'] in splits[split]: val_samples.append(sample) # Prepare results. instance_to_score = dict() for sample in tqdm(val_samples, leave=False): sample_res = [] for ann_token in sample['anns']: ann = nusc.get('sample_annotation', ann_token) translation = np.array(ann['translation']) size = np.array(ann['size']) rotation = np.array(ann['rotation']) velocity = nusc.box_velocity(ann_token)[:2] tracking_id = random_id(ann['instance_token'], _add_errors=add_errors) tracking_name = random_class(ann['category_name'], _add_errors=add_errors) # Skip annotations for classes not part of the detection challenge. if tracking_name is None: continue # Skip annotations with 0 lidar/radar points. num_pts = ann['num_lidar_pts'] + ann['num_radar_pts'] if num_pts == 0: continue # If we randomly assign a score in [0, 1] to each box and later average over the boxes in the track, # the average score will be around 0.5 and we will have 0 predictions above that. # Therefore we assign the same scores to each box in a track. if ann['instance_token'] not in instance_to_score: instance_to_score[ann['instance_token']] = random.random() tracking_score = instance_to_score[ann['instance_token']] tracking_score = np.clip(tracking_score + random.random() * 0.3, 0, 1) if add_errors: translation += 4 * (np.random.rand(3) - 0.5) size *= (np.random.rand(3) + 0.5) rotation += (np.random.rand(4) - 0.5) * .1 velocity *= np.random.rand(3)[:2] + 0.5 sample_res.append({ 'sample_token': sample['token'], 'translation': list(translation), 'size': list(size), 'rotation': list(rotation), 'velocity': list(velocity), 'tracking_id': tracking_id, 'tracking_name': tracking_name, 'tracking_score': tracking_score }) mock_results[sample['token']] = sample_res mock_submission = { 'meta': mock_meta, 'results': mock_results } return mock_submission
def visualize_sample(nusc: NuScenes, sample_token: str, gt_boxes: EvalBoxes, pred_boxes: EvalBoxes, nsweeps: int = 1, conf_th: float = 0.15, eval_range: float = 50, verbose: bool = True, savepath: str = None) -> None: """ Visualizes a sample from BEV with annotations and detection results. :param nusc: NuScenes object. :param sample_token: The nuScenes sample token. :param gt_boxes: Ground truth boxes grouped by sample. :param pred_boxes: Prediction grouped by sample. :param nsweeps: Number of sweeps used for lidar visualization. :param conf_th: The confidence threshold used to filter negatives. :param eval_range: Range in meters beyond which boxes are ignored. :param verbose: Whether to print to stdout. :param savepath: If given, saves the the rendering here instead of displaying. """ # Retrieve sensor & pose records. sample_rec = nusc.get('sample', sample_token) sd_record = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP']) cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) # Get boxes. boxes_gt_global = gt_boxes[sample_token] boxes_est_global = pred_boxes[sample_token] # Map GT boxes to lidar. boxes_gt = boxes_to_sensor(boxes_gt_global, pose_record, cs_record) # Map EST boxes to lidar. boxes_est = boxes_to_sensor(boxes_est_global, pose_record, cs_record) # Add scores to EST boxes. for box_est, box_est_global in zip(boxes_est, boxes_est_global): box_est.score = box_est_global.detection_score # Get point cloud in lidar frame. pc, _ = LidarPointCloud.from_file_multisweep(nusc, sample_rec, 'LIDAR_TOP', 'LIDAR_TOP', nsweeps=nsweeps) # Init axes. _, ax = plt.subplots(1, 1, figsize=(9, 9)) # Show point cloud. points = view_points(pc.points[:3, :], np.eye(4), normalize=False) dists = np.sqrt(np.sum(pc.points[:2, :] ** 2, axis=0)) colors = np.minimum(1, dists / eval_range) ax.scatter(points[0, :], points[1, :], c=colors, s=0.2) # Show ego vehicle. ax.plot(0, 0, 'x', color='black') # Show GT boxes. for box in boxes_gt: box.render(ax, view=np.eye(4), colors=('g', 'g', 'g'), linewidth=2) # Show EST boxes. for box in boxes_est: # Show only predictions with a high score. assert not np.isnan(box.score), 'Error: Box score cannot be NaN!' if box.score >= conf_th: box.render(ax, view=np.eye(4), colors=('b', 'b', 'b'), linewidth=1) # Limit visible range. axes_limit = eval_range + 3 # Slightly bigger to include boxes that extend beyond the range. ax.set_xlim(-axes_limit, axes_limit) ax.set_ylim(-axes_limit, axes_limit) # Show / save plot. if verbose: print('Rendering sample token %s' % sample_token) plt.title(sample_token) if savepath is not None: plt.savefig(savepath) plt.close() else: plt.show()
def export_scene_pointcloud(nusc: NuScenes, out_path: str, scene_token: str, channel: str = 'LIDAR_TOP', min_dist: float = 3.0, max_dist: float = 30.0, verbose: bool = True) -> None: """ Export fused point clouds of a scene to a Wavefront OBJ file. This point-cloud can be viewed in your favorite 3D rendering tool, e.g. Meshlab or Maya. :param nusc: NuScenes instance. :param out_path: Output path to write the point-cloud to. :param scene_token: Unique identifier of scene to render. :param channel: Channel to render. :param min_dist: Minimum distance to ego vehicle below which points are dropped. :param max_dist: Maximum distance to ego vehicle above which points are dropped. :param verbose: Whether to print messages to stdout. """ # Check inputs. valid_channels = [ 'LIDAR_TOP', 'RADAR_FRONT', 'RADAR_FRONT_RIGHT', 'RADAR_FRONT_LEFT', 'RADAR_BACK_LEFT', 'RADAR_BACK_RIGHT' ] camera_channels = [ 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT' ] assert channel in valid_channels, 'Input channel {} not valid.'.format( channel) # Get records from DB. scene_rec = nusc.get('scene', scene_token) start_sample_rec = nusc.get('sample', scene_rec['first_sample_token']) sd_rec = nusc.get('sample_data', start_sample_rec['data'][channel]) # Make list of frames cur_sd_rec = sd_rec sd_tokens = [] while cur_sd_rec['next'] != '': cur_sd_rec = nusc.get('sample_data', cur_sd_rec['next']) sd_tokens.append(cur_sd_rec['token']) # Write point-cloud. with open(out_path, 'w') as f: f.write("OBJ File:\n") for sd_token in tqdm(sd_tokens): if verbose: print('Processing {}'.format(sd_rec['filename'])) sc_rec = nusc.get('sample_data', sd_token) sample_rec = nusc.get('sample', sc_rec['sample_token']) lidar_token = sd_rec['token'] lidar_rec = nusc.get('sample_data', lidar_token) pc = LidarPointCloud.from_file( osp.join(nusc.dataroot, lidar_rec['filename'])) # Get point cloud colors. coloring = np.ones((3, pc.points.shape[1])) * -1 for channel in camera_channels: camera_token = sample_rec['data'][channel] cam_coloring, cam_mask = pointcloud_color_from_image( nusc, lidar_token, camera_token) coloring[:, cam_mask] = cam_coloring # Points live in their own reference frame. So they need to be transformed via global to the image plane. # First step: transform the point cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', lidar_rec['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Optional Filter by distance to remove the ego vehicle. dists_origin = np.sqrt(np.sum(pc.points[:3, :]**2, axis=0)) keep = np.logical_and(min_dist <= dists_origin, dists_origin <= max_dist) pc.points = pc.points[:, keep] coloring = coloring[:, keep] if verbose: print('Distance filter: Keeping %d of %d points...' % (keep.sum(), len(keep))) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', lidar_rec['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Write points to file for (v, c) in zip(pc.points.transpose(), coloring.transpose()): if (c == -1).any(): # Ignore points without a color. pass else: f.write( "v {v[0]:.8f} {v[1]:.8f} {v[2]:.8f} {c[0]:.4f} {c[1]:.4f} {c[2]:.4f}\n" .format(v=v, c=c / 255.0)) if not sd_rec['next'] == "": sd_rec = nusc.get('sample_data', sd_rec['next'])
def pointcloud_color_from_image( nusc: NuScenes, pointsensor_token: str, camera_token: str) -> Tuple[np.array, np.array]: """ Given a point sensor (lidar/radar) token and camera sample_data token, load point-cloud and map it to the image plane, then retrieve the colors of the closest image pixels. :param nusc: NuScenes instance. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample data token. :return (coloring <np.float: 3, n>, mask <np.bool: m>). Returns the colors for n points that reproject into the image out of m total points. The mask indicates which points are selected. """ cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) pc = LidarPointCloud.from_file( osp.join(nusc.dataroot, pointsensor['filename'])) im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the point-cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] # Pick the colors of the points im_data = np.array(im) coloring = np.zeros(points.shape) for i, p in enumerate(points.transpose()): point = p[:2].round().astype(np.int32) coloring[:, i] = im_data[point[1], point[0], :] return coloring, mask
def _mock_submission(nusc: NuScenes, split: str) -> Dict[str, dict]: """ Creates "reasonable" submission (results and metadata) by looping through the mini-val set, adding 1 GT prediction per sample. Predictions will be permuted randomly along all axes. """ def random_class(category_name: str) -> str: # Alter 10% of the valid labels. class_names = sorted(DETECTION_NAMES) tmp = category_to_detection_name(category_name) if tmp is not None and np.random.rand() < .9: return tmp else: return class_names[np.random.randint(0, len(class_names) - 1)] def random_attr(name: str) -> str: """ This is the most straight-forward way to generate a random attribute. Not currently used b/c we want the test fixture to be back-wards compatible. """ # Get relevant attributes. rel_attributes = detection_name_to_rel_attributes(name) if len(rel_attributes) == 0: # Empty string for classes without attributes. return '' else: # Pick a random attribute otherwise. return rel_attributes[np.random.randint(0, len(rel_attributes))] mock_meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } mock_results = {} splits = create_splits_scenes() val_samples = [] for sample in nusc.sample: if nusc.get('scene', sample['scene_token'])['name'] in splits[split]: val_samples.append(sample) for sample in tqdm(val_samples): sample_res = [] for ann_token in sample['anns']: ann = nusc.get('sample_annotation', ann_token) detection_name = random_class(ann['category_name']) sample_res.append( { 'sample_token': sample['token'], 'translation': list(np.array(ann['translation']) + 5 * (np.random.rand(3) - 0.5)), 'size': list(np.array(ann['size']) * 2 * (np.random.rand(3) + 0.5)), 'rotation': list(np.array(ann['rotation']) + ((np.random.rand(4) - 0.5) * .1)), 'velocity': list(nusc.box_velocity(ann_token)[:2] * (np.random.rand(3)[:2] + 0.5)), 'detection_name': detection_name, 'detection_score': random.random(), 'attribute_name': random_attr(detection_name) }) mock_results[sample['token']] = sample_res mock_submission = { 'meta': mock_meta, 'results': mock_results } return mock_submission