def add_center_dist(nusc: NuScenes, eval_boxes: EvalBoxes): """ Adds the cylindrical (xy) center distance from ego vehicle to each box. :param nusc: The NuScenes instance. :param eval_boxes: A set of boxes, either GT or predictions. :return: eval_boxes augmented with center distances. """ for sample_token in eval_boxes.sample_tokens: sample_rec = nusc.get('sample', sample_token) sd_record = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) for box in eval_boxes[sample_token]: # Both boxes and ego pose are given in global coord system, so distance can be calculated directly. # Note that the z component of the ego pose is 0. ego_translation = (box.translation[0] - pose_record['translation'][0], box.translation[1] - pose_record['translation'][1], box.translation[2] - pose_record['translation'][2]) if isinstance(box, DetectionBox) or isinstance(box, TrackingBox): box.ego_translation = ego_translation else: raise NotImplementedError return eval_boxes
def test_load_pointclouds(self): """ Loads up lidar and radar pointclouds. """ assert 'NUSCENES' in os.environ, 'Set NUSCENES env. variable to enable tests.' dataroot = os.environ['NUSCENES'] nusc = NuScenes(version='v1.0-mini', dataroot=dataroot, verbose=False) sample_rec = nusc.sample[0] lidar_name = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP'])['filename'] radar_name = nusc.get('sample_data', sample_rec['data']['RADAR_FRONT'])['filename'] lidar_path = os.path.join(dataroot, lidar_name) radar_path = os.path.join(dataroot, radar_name) pc1 = LidarPointCloud.from_file(lidar_path) pc2 = RadarPointCloud.from_file(radar_path) pc3, _ = LidarPointCloud.from_file_multisweep(nusc, sample_rec, 'LIDAR_TOP', 'LIDAR_TOP', nsweeps=2) pc4, _ = RadarPointCloud.from_file_multisweep(nusc, sample_rec, 'RADAR_FRONT', 'RADAR_FRONT', nsweeps=2) # Check for valid dimensions. assert pc1.points.shape[0] == pc3.points.shape[ 0] == 4, 'Error: Invalid dimension for lidar pointcloud!' assert pc2.points.shape[0] == pc4.points.shape[ 0] == 18, 'Error: Invalid dimension for radar pointcloud!' assert pc1.points.dtype == pc3.points.dtype, 'Error: Invalid dtype for lidar pointcloud!' assert pc2.points.dtype == pc4.points.dtype, 'Error: Invalid dtype for radar pointcloud!'
def get_pcl(): # v1.0-trainval # nusc = Nuscenes(version='v1.0-trainval', dataroot='/home/fengjia/data/sets/nuscenes', verbose=True) nusc = NuScenes(version='v1.0-trainval', dataroot='/home/fengjia/data/sets/nuscenes', verbose=True) f = open(r'annotations_list.txt', 'w') count = 0 for scene in nusc.scene: sample_token = scene['first_sample_token'] my_sample = nusc.get('sample', sample_token) while sample_token != '': my_sample = nusc.get('sample', sample_token) for i in range(len(my_sample['anns'])): my_annotation_token = my_sample['anns'][i] my_annotation_metadata = nusc.get('sample_annotation', my_annotation_token) my_sample_token = my_annotation_metadata['sample_token'] my_sample_temp = nusc.get('sample', my_sample_token) sample_data_cam = nusc.get('sample_data', my_sample_temp['data']['CAM_FRONT']) s = sample_data_cam['token'] s += '_' s += my_annotation_metadata['token'] s += '\n' f.write(s) count += 1 sample_token = my_sample['next'] f.close() print(count)
def parse_frame( data: NuScenes, scene_name: str, frame_index: int, cam_token: str, boxes: Optional[List[Box]] = None, ) -> Tuple[Frame, Optional[str]]: """Parse a single camera frame.""" cam_data = data.get("sample_data", cam_token) ego_pose_cam = data.get("ego_pose", cam_data["ego_pose_token"]) cam_filepath = cam_data["filename"] img_wh = (cam_data["width"], cam_data["height"]) calibration_cam = data.get("calibrated_sensor", cam_data["calibrated_sensor_token"]) labels: Optional[List[Label]] = None if boxes is not None: labels = parse_labels(data, boxes, ego_pose_cam, calibration_cam, img_wh) frame = Frame( name=os.path.basename(cam_filepath), videoName=scene_name, frameIndex=frame_index, url=cam_filepath, timestamp=cam_data["timestamp"], extrinsics=get_extrinsics(ego_pose_cam, calibration_cam), intrinsics=calibration_to_intrinsics(calibration_cam), size=ImageSize(width=img_wh[0], height=img_wh[1]), labels=labels, ) next_token: Optional[str] = None if (cam_data["next"] != "" and not data.get("sample_data", cam_data["next"])["is_key_frame"]): next_token = cam_data["next"] return frame, next_token
def load_data(filepath: str, version: str) -> Tuple[NuScenes, pd.DataFrame]: """Load nuscenes data and extract meta-information into dataframe.""" data = NuScenes(version=version, dataroot=filepath, verbose=True) records = [(data.get("sample", record["first_sample_token"])["timestamp"], record) for record in data.scene] entries = [] for start_time, record in sorted(records): start_time = ( data.get("sample", record["first_sample_token"])["timestamp"] / 1000000) token = record["token"] name = record["name"] date = datetime.utcfromtimestamp(start_time) host = "-".join(record["name"].split("-")[:2]) first_sample_token = record["first_sample_token"] entries.append((host, name, date, token, first_sample_token)) dataframe = pd.DataFrame( entries, columns=[ "host", "scene_name", "date", "scene_token", "first_sample_token", ], ) return data, dataframe
def pred_to_world(nusc: NuScenes, pointsensor_token: str, bbox_3d, pointsensor_channel: str = 'LIDAR_TOP'): """ Given an annotation token (3d detection in world coordinate frame) and pointsensor sample_data token, transform the label from world-coordinate frame to LiDAR. :param nusc: NuScenes instance. :param pointsensor_token: Lidar/radar sample_data token. :param bbox_3d: box object with the predicted 3D bbox info. :param pointsensor_channel: Laser channel name, e.g. 'LIDAR_TOP'. :return box mapped in the world coordinate frame. """ # Point LiDAR sample point_data = nusc.get('sample_data', pointsensor_token) # Sample LiDAR info # From LiDAR to ego cs_rec = nusc.get('calibrated_sensor', point_data['calibrated_sensor_token']) # Transformation metadata from ego to world coordinate frame pose_rec = nusc.get('ego_pose', point_data['ego_pose_token']) # Map tp ego-vehicle coordinate frame bbox_3d.rotate(Quaternion(cs_rec['rotation'])) bbox_3d.translate(np.array(cs_rec['translation'])) # Map from ego-vehicle to world coordinate frame bbox_3d.rotate(Quaternion(pose_rec['rotation'])) bbox_3d.translate(np.array(pose_rec['translation'])) return bbox_3d
def get_camera_data(nusc: NuScenes, annotation_token: str, box_vis_level: BoxVisibility = BoxVisibility.ANY): """ Given an annotation token (3d detection in world coordinate frame) this method returns the camera in which the annotation is located. If the box is splitted between 2 cameras, it brings the first one found. :param nusc: NuScenes instance. :param annotation_token: Annotation token. :param box_vis_level: If sample_data is an image, this sets required visibility for boxes. :return camera channel. """ #Get sample annotation ann_record = nusc.get('sample_annotation', annotation_token) sample_record = nusc.get('sample', ann_record['sample_token']) boxes, cam = [], [] #Stores every camera cams = [key for key in sample_record['data'].keys() if 'CAM' in key] #Try with every camera a match for the annotation for cam in cams: _, boxes, _ = nusc.get_sample_data( sample_record['data'][cam], box_vis_level=box_vis_level, selected_anntokens=[annotation_token]) if len(boxes) > 0: break # Breaks if find an image that matches assert len(boxes) < 2, "Found multiple annotations. Something is wrong!" return cam
def filter_eval_boxes(nusc: NuScenes, eval_boxes: EvalBoxes, max_dist: Dict[str, float], verbose: bool = False) -> EvalBoxes: """ Applies filtering to boxes. Distance, bike-racks and points per box. :param nusc: An instance of the NuScenes class. :param eval_boxes: An instance of the EvalBoxes class. :param max_dist: Maps the detection name to the eval distance threshold for that class. :param verbose: Whether to print to stdout. """ # Accumulators for number of filtered boxes. total, dist_filter, point_filter, bike_rack_filter = 0, 0, 0, 0 for ind, sample_token in enumerate(eval_boxes.sample_tokens): # Filter on distance first total += len(eval_boxes[sample_token]) eval_boxes.boxes[sample_token] = [box for box in eval_boxes[sample_token] if box.ego_dist < max_dist[box.detection_name]] dist_filter += len(eval_boxes[sample_token]) # Then remove boxes with zero points in them. Eval boxes have -1 points by default. eval_boxes.boxes[sample_token] = [box for box in eval_boxes[sample_token] if not box.num_pts == 0] point_filter += len(eval_boxes[sample_token]) # Perform bike-rack filtering sample_anns = nusc.get('sample', sample_token)['anns'] bikerack_recs = [nusc.get('sample_annotation', ann) for ann in sample_anns if nusc.get('sample_annotation', ann)['category_name'] == 'static_object.bicycle_rack'] bikerack_boxes = [Box(rec['translation'], rec['size'], Quaternion(rec['rotation'])) for rec in bikerack_recs] filtered_boxes = [] for box in eval_boxes[sample_token]: if box.detection_name in ['bicycle', 'motorcycle']: in_a_bikerack = False for bikerack_box in bikerack_boxes: if np.sum(points_in_box(bikerack_box, np.expand_dims(np.array(box.translation), axis=1))) > 0: in_a_bikerack = True if not in_a_bikerack: filtered_boxes.append(box) else: filtered_boxes.append(box) eval_boxes.boxes[sample_token] = filtered_boxes bike_rack_filter += len(eval_boxes.boxes[sample_token]) if verbose: print("=> Original number of boxes: %d" % total) print("=> After distance based filtering: %d" % dist_filter) print("=> After LIDAR points based filtering: %d" % point_filter) print("=> After bike rack filtering: %d" % bike_rack_filter) return eval_boxes
def get_sample_ground_plane(root_path, version): nusc = NuScenes(version=version, dataroot=root_path, verbose=True) rets = {} for sample in tqdm(nusc.sample): chan = "LIDAR_TOP" sd_token = sample["data"][chan] sd_rec = nusc.get("sample_data", sd_token) lidar_path, _, _ = get_sample_data(nusc, sd_token) points = read_file(lidar_path) points = np.concatenate((points[:, :3], np.ones((points.shape[0], 1))), axis=1) plane, inliers, outliers = fit_plane_LSE_RANSAC( points, return_outlier_list=True) xx = points[:, 0] yy = points[:, 1] zz = (-plane[0] * xx - plane[1] * yy - plane[3]) / plane[2] rets.update({sd_token: { "plane": plane, "height": zz, }}) with open(nusc.root_path / "infos_trainval_ground_plane.pkl", "wb") as f: pickle.dump(rets, f)
def seg_concat(): nusc = NuScenes( version='v1.0-trainval', dataroot= '/mrtstorage/users/kpeng/nuscene_pcdet/data/nuscenes/v1.0-trainval/', verbose=True) for my_sample in nusc.sample: sample_data_token = my_sample['data']['LIDAR_TOP'] ori_filename = nusc.get('sample_data', sample_data_token)['filename'] #print(ori_filename) anno_data = torch.from_numpy( np.float32( np.fromfile( "/mrtstorage/users/kpeng/nu_lidar_seg/processed_anno/" + sample_data_token + "_lidarseg.bin", dtype=np.uint8, count=-1))) ori_data = np.fromfile(file_path + ori_filename, dtype=np.float32, count=-1) ori_data = torch.from_numpy(ori_data.reshape(-1, 5)) des_data = torch.cat([ori_data, anno_data.unsqueeze(1)], dim=-1).flatten() des_data = des_data.numpy().tobytes() binfile = open(save_path + ori_filename, 'wb+') binfile.write(des_data) binfile.close() """
def parse_labels( data: NuScenes, boxes: List[Box], ego_pose: DictStrAny, calib_sensor: DictStrAny, img_size: Optional[Tuple[int, int]] = None, ) -> Optional[List[Label]]: """Parse NuScenes labels into sensor frame.""" if len(boxes): labels = [] # transform into the sensor coord system transform_boxes(boxes, ego_pose, calib_sensor) intrinsic_matrix: NDArrayF64 = np.array( calib_sensor["camera_intrinsic"], dtype=np.float64) for box in boxes: box_class = category_to_detection_name(box.name) in_image = True if img_size is not None: in_image = box_in_image(box, intrinsic_matrix, img_size) if in_image and box_class is not None: xyz = tuple(box.center.tolist()) w, l, h = box.wlh roty = quaternion_to_yaw(box.orientation) box2d = None if img_size is not None: # Project 3d box to 2d. corners = box.corners() corner_coords = (view_points(corners, intrinsic_matrix, True).T[:, :2].tolist()) # Keep only corners that fall within the image, transform box2d = xyxy_to_box2d(*post_process_coords(corner_coords)) instance_data = data.get("sample_annotation", box.token) # Attributes can be retrieved via instance_data and also the # category is more fine-grained than box_class. # This information could be stored in attributes if needed in # the future label = Label( id=instance_data["instance_token"], category=box_class, box2d=box2d, box3d=Box3D( location=xyz, dimension=(h, w, l), orientation=(0, roty, 0), alpha=rotation_y_to_alpha(roty, xyz), # type: ignore ), ) labels.append(label) return labels return None
def what_are_the_objects_types_used_in_prediction_tasks( DATAROOT='./data/sets/nuscenes', dataset_version='v1.0-mini'): nuscenes = NuScenes(dataset_version, DATAROOT) train_agents = get_prediction_challenge_split("train", dataroot=DATAROOT) val_agents = get_prediction_challenge_split("val", dataroot=DATAROOT) # # agents = get_prediction_challenge_split("train", dataroot=DATAROOT) agents = np.concatenate((train_agents, val_agents), axis=0) categories = {} token_to_name = {} for current_sample in agents: instance_token, sample_token = current_sample.split("_") category_token = nuscenes.get("instance", instance_token)["category_token"] category_name = nuscenes.get("category", category_token)["name"] categories[category_name] = category_token token_to_name[category_token] = category_name print(categories.items()) print(token_to_name.items())
def target_to_cam(nusc: NuScenes, pointsensor_token: str, annotation_token: str, pointsensor_channel: str = 'LIDAR_TOP'): """ Given an annotation token (3d detection in world coordinate frame) and pointsensor sample_data token, transform the label from world-coordinate frame to LiDAR. :param nusc: NuScenes instance. :param pointsensor_token: Lidar/radar sample_data token. :param annotation_token: Camera sample_annotation token. :param pointsensor_channel: Laser channel name, e.g. 'LIDAR_TOP'. :return box with the labels for the 3d detection task in the LiDAR channel frame. """ # Point LiDAR sample point_data = nusc.get('sample_data', pointsensor_token) # Sample LiDAR info # From LiDAR to ego cs_rec = nusc.get('calibrated_sensor', point_data['calibrated_sensor_token']) # Transformation metadata from ego to world coordinate frame pose_rec = nusc.get('ego_pose', point_data['ego_pose_token']) # Obtain the annotation from the token annotation_metadata = nusc.get('sample_annotation', annotation_token) # Obtain box parameters box = nusc.get_box(annotation_metadata['token']) # Move them to the ego-pose frame. box.translate(-np.array(pose_rec['translation'])) box.rotate(Quaternion(pose_rec['rotation']).inverse) # Move them to the calibrated sensor frame. box.translate(-np.array(cs_rec['translation'])) box.rotate(Quaternion(cs_rec['rotation']).inverse) return box
def get_samples_in_eval_set(nusc: NuScenes, eval_set: str) -> List[str]: """ Gets all the sample tokens from the split that are relevant to the eval set. :param nusc: A NuScenes object. :param eval_set: The dataset split to evaluate on, e.g. train, val or test. :return: A list of sample tokens. """ # Create a dict to map from scene name to scene token for quick lookup later on. scene_name2tok = dict() for rec in nusc.scene: scene_name2tok[rec['name']] = rec['token'] # Get scenes splits from nuScenes. scenes_splits = create_splits_scenes(verbose=False) # Collect sample tokens for each scene. samples = [] for scene in scenes_splits[eval_set]: scene_record = nusc.get('scene', scene_name2tok[scene]) total_num_samples = scene_record['nbr_samples'] first_sample_token = scene_record['first_sample_token'] last_sample_token = scene_record['last_sample_token'] sample_token = first_sample_token i = 0 while sample_token != '': sample_record = nusc.get('sample', sample_token) samples.append(sample_record['token']) if sample_token == last_sample_token: sample_token = '' else: sample_token = sample_record['next'] i += 1 assert total_num_samples == i, 'Error: There were supposed to be {} keyframes, ' \ 'but only {} keyframes were processed'.format(total_num_samples, i) return samples
def get_pcl(): nusc = NuScenes(version='v1.0-mini', dataroot='/data/sets/nuscenes', verbose=True) explorer = NuScenesExplorer(nusc) imglist = [] count = 0 for scene in nusc.scene: token = scene['first_sample_token'] while token != '': count += 1 sample = nusc.get('sample',token) sample_data_cam = nusc.get('sample_data', sample['data']['CAM_FRONT']) sample_data_pcl = nusc.get('sample_data', sample['data']['LIDAR_TOP']) # get CAM_FRONT datapath data_path, boxes, camera_intrinsic = explorer.nusc.get_sample_data(sample_data_cam['token'], box_vis_level = BoxVisibility.ANY) imglist += [data_path] # get LiDAR point cloud sample_rec = explorer.nusc.get('sample', sample_data_pcl['sample_token']) chan = sample_data_pcl['channel'] ref_chan = 'LIDAR_TOP' pc, times = LidarPointCloud.from_file_multisweep(nusc, sample_rec, chan, ref_chan) radius = np.sqrt((pc.points[0])**2 + (pc.points[1])**2 + (pc.points[2])**2) pc = pc.points.transpose() pc = pc[np.where(radius<20)] print(pc) display(pc.transpose(), count) token = sample['next'] if count == 2: break if count == 2: break plt.show() print(len(imglist))
def add_center_dist(nusc: NuScenes, eval_boxes: EvalBoxes): """ Adds the cylindrical (xy) center distance from ego vehicle to each box. :param nusc: The NuScenes instance. :param eval_boxes: A set of boxes, either GT or predictions. :return: eval_boxes augmented with center distances. """ for sample_token in eval_boxes.sample_tokens: sample_rec = nusc.get('sample', sample_token) sd_record = nusc.get('sample_data', sample_rec['data']['LIDAR_TOP']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) cs_record = nusc.get('calibrated_sensor', sd_record['calibrated_sensor_token']) lidar2ego_rotation = cs_record['rotation'] lidar2ego_translation = cs_record['translation'] ego2global_rotation = pose_record['rotation'] ego2global_translation = pose_record['translation'] for box in eval_boxes[sample_token]: # Both boxes and ego pose are given in global coord system, so distance can be calculated directly. # Note that the z component of the ego pose is 0. center_ego = np.array( box.translation) - np.array(ego2global_translation) center_ego_tmp = np.dot( Quaternion(ego2global_rotation).inverse.rotation_matrix, center_ego) center_lidar_tmp = center_ego_tmp - np.array(lidar2ego_translation) center_lidar = np.dot( Quaternion(lidar2ego_rotation).inverse.rotation_matrix, center_lidar_tmp) if isinstance(box, DetectionBox) or isinstance(box, TrackingBox): box.ego_translation = tuple(center_ego) box.lidar_translation = tuple(center_lidar) else: raise NotImplementedError return eval_boxes
def export_videos(nusc: NuScenes, out_dir: str): """ Export videos of the images displayed in the images. """ # Load NuScenes class scene_tokens = [s['token'] for s in nusc.scene] # Create output directory if not os.path.isdir(out_dir): os.makedirs(out_dir) # Write videos to disk for scene_token in scene_tokens: scene = nusc.get('scene', scene_token) print('Writing scene %s' % scene['name']) out_path = os.path.join(out_dir, scene['name']) + '.avi' if not os.path.exists(out_path): nusc.render_scene(scene['token'], out_path=out_path)
def _mock_submission(nusc: NuScenes, split: str, add_errors: bool = False) -> Dict[str, dict]: """ Creates "reasonable" submission (results and metadata) by looping through the mini-val set, adding 1 GT prediction per sample. Predictions will be permuted randomly along all axes. :param nusc: NuScenes instance. :param split: Dataset split to use. :param add_errors: Whether to use GT or add errors to it. """ def random_class(category_name: str, _add_errors: bool = False) -> Optional[str]: # Alter 10% of the valid labels. class_names = sorted(TRACKING_NAMES) tmp = category_to_tracking_name(category_name) if tmp is None: return None else: if not _add_errors or np.random.rand() < .9: return tmp else: return class_names[np.random.randint(0, len(class_names) - 1)] def random_id(instance_token: str, _add_errors: bool = False) -> str: # Alter 10% of the valid ids to be a random string, which hopefully corresponds to a new track. if not _add_errors or np.random.rand() < .9: _tracking_id = instance_token + '_pred' else: _tracking_id = str(np.random.randint(0, sys.maxsize)) return _tracking_id mock_meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } mock_results = {} # Get all samples in the current evaluation split. splits = create_splits_scenes() val_samples = [] for sample in nusc.sample: if nusc.get('scene', sample['scene_token'])['name'] in splits[split]: val_samples.append(sample) # Prepare results. instance_to_score = dict() for sample in tqdm(val_samples, leave=False): sample_res = [] for ann_token in sample['anns']: ann = nusc.get('sample_annotation', ann_token) translation = np.array(ann['translation']) size = np.array(ann['size']) rotation = np.array(ann['rotation']) velocity = nusc.box_velocity(ann_token)[:2] tracking_id = random_id(ann['instance_token'], _add_errors=add_errors) tracking_name = random_class(ann['category_name'], _add_errors=add_errors) # Skip annotations for classes not part of the detection challenge. if tracking_name is None: continue # Skip annotations with 0 lidar/radar points. num_pts = ann['num_lidar_pts'] + ann['num_radar_pts'] if num_pts == 0: continue # If we randomly assign a score in [0, 1] to each box and later average over the boxes in the track, # the average score will be around 0.5 and we will have 0 predictions above that. # Therefore we assign the same scores to each box in a track. if ann['instance_token'] not in instance_to_score: instance_to_score[ann['instance_token']] = random.random() tracking_score = instance_to_score[ann['instance_token']] tracking_score = np.clip(tracking_score + random.random() * 0.3, 0, 1) if add_errors: translation += 4 * (np.random.rand(3) - 0.5) size *= (np.random.rand(3) + 0.5) rotation += (np.random.rand(4) - 0.5) * .1 velocity *= np.random.rand(3)[:2] + 0.5 sample_res.append({ 'sample_token': sample['token'], 'translation': list(translation), 'size': list(size), 'rotation': list(rotation), 'velocity': list(velocity), 'tracking_id': tracking_id, 'tracking_name': tracking_name, 'tracking_score': tracking_score }) mock_results[sample['token']] = sample_res mock_submission = { 'meta': mock_meta, 'results': mock_results } return mock_submission
class NusLoaderQ10(Dataset): def __init__(self, root='/datasets/nuscene/v1.0-mini', sampling_time=3, layer_names=None, colors=None): if layer_names is None: layer_names = ['drivable_area', 'road_segment', 'road_block', 'lane', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area', 'road_divider', 'lane_divider'] if colors is None: colors = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255),] self.root = root self.nus = NuScenes('v1.0-mini', dataroot=self.root) self.scenes = self.nus.scene self.samples = self.nus.sample self.layer_names = layer_names self.colors = colors self.helper = PredictHelper(self.nus) self.seconds = sampling_time def __len__(self): return len(self.scenes) def __getitem__(self, idx): # 1. past_agents_traj : (Num obv agents in batch_i X 20 X 2) # 2. past_agents_traj_len : (Num obv agents in batch_i, ) # 3. future_agents_traj : (Num pred agents in batch_i X 20 X 2) # 4. future_agents_traj_len : (Num pred agents in batch_i, ) # 5. future_agent_masks : (Num obv agents in batch_i) # 6. decode_rel_pos: (Num pred agents in batch_i X 2) # 7. decode_start_pos: (Num pred agents in batch_i X 2) # 8. map_image : (3 X 224 X 224) # 9. scene ID: (string) sample = self.samples[idx] sample_token = sample['token'] # 1. calculate ego pose sample_data_lidar = self.nus.get('sample_data', sample['data']['LIDAR_TOP']) ego_pose = self.nus.get('ego_pose', sample_data_lidar['ego_pose_token']) ego_pose_xy = ego_pose['translation'] ego_pose_rotation = ego_pose['rotation'] # 타임스탬프 timestamp = ego_pose['timestamp'] # 2. Generate Map & Agent Masks scene = self.nus.get('scene', sample['scene_token']) log = self.nus.get('log', scene['log_token']) location = log['location'] nus_map = NuScenesMap(dataroot=self.root, map_name=location) static_layer = StaticLayerRasterizer(self.helper, layer_names=self.layer_names, colors=self.colors) agent_layer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=self.seconds) map_masks, lanes, map_img = static_layer.generate_mask(ego_pose_xy, ego_pose_rotation, sample_token) agent_mask = agent_layer.generate_mask(ego_pose_xy, ego_pose_rotation, sample_token) # 3. Generate Agent Trajectory annotation_tokens = sample['anns'] num_agent = len(annotation_tokens) agents = [] for ans_token in annotation_tokens: agent_states = [] agent = self.nus.get('sample_annotation', ans_token) instance_token = agent['instance_token'] # 에이전트 주행경로 xy_global = agent['translation'] past_xy_global = self.helper.get_past_for_agent( instance_token, sample_token, seconds=self.seconds, in_agent_frame=False) future_xy_global = self.helper.get_future_for_agent( instance_token, sample_token, seconds=self.seconds, in_agent_frame=False) # 로컬 주행경로 xy_local = convert_global_coords_to_local(np.array([xy_global]), ego_pose_xy, ego_pose_rotation) past_xy_local = convert_global_coords_to_local(past_xy_global, ego_pose_xy, ego_pose_rotation) future_xy_local = convert_global_coords_to_local(future_xy_global, ego_pose_xy, ego_pose_rotation) # 에이전트 주행상태 rot = agent['rotation'] vel = self.helper.get_velocity_for_agent(instance_token, sample_token) accel = self.helper.get_acceleration_for_agent(instance_token, sample_token) yaw_rate = self.helper.get_heading_change_rate_for_agent(instance_token, sample_token) agent_states = {'present_pos': xy_global, 'past_pos': past_xy_global, 'future_pos': future_xy_global, 'rot': rot, 'vel': vel, 'accel': accel, 'yaw_rate': yaw_rate, 'present_local_xy': xy_local, 'past_local_xy': past_xy_local, 'future_local_xy': future_xy_local} agents.append(agent_states) return map_masks, agents
def _mock_submission(nusc: NuScenes, split: str) -> Dict[str, dict]: """ Creates "reasonable" submission (results and metadata) by looping through the mini-val set, adding 1 GT prediction per sample. Predictions will be permuted randomly along all axes. """ def random_class(category_name: str) -> str: # Alter 10% of the valid labels. class_names = sorted(DETECTION_NAMES) tmp = category_to_detection_name(category_name) if tmp is not None and np.random.rand() < .9: return tmp else: return class_names[np.random.randint(0, len(class_names) - 1)] def random_attr(name: str) -> str: """ This is the most straight-forward way to generate a random attribute. Not currently used b/c we want the test fixture to be back-wards compatible. """ # Get relevant attributes. rel_attributes = detection_name_to_rel_attributes(name) if len(rel_attributes) == 0: # Empty string for classes without attributes. return '' else: # Pick a random attribute otherwise. return rel_attributes[np.random.randint( 0, len(rel_attributes))] mock_meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } mock_results = {} splits = create_splits_scenes() val_samples = [] for sample in nusc.sample: if nusc.get('scene', sample['scene_token'])['name'] in splits[split]: val_samples.append(sample) for sample in tqdm(val_samples, leave=False): sample_res = [] for ann_token in sample['anns']: ann = nusc.get('sample_annotation', ann_token) detection_name = random_class(ann['category_name']) sample_res.append({ 'sample_token': sample['token'], 'translation': list( np.array(ann['translation']) + 5 * (np.random.rand(3) - 0.5)), 'size': list( np.array(ann['size']) * 2 * (np.random.rand(3) + 0.5)), 'rotation': list( np.array(ann['rotation']) + ((np.random.rand(4) - 0.5) * .1)), 'velocity': list( nusc.box_velocity(ann_token)[:2] * (np.random.rand(3)[:2] + 0.5)), 'detection_name': detection_name, 'detection_score': random.random(), 'attribute_name': random_attr(detection_name) }) mock_results[sample['token']] = sample_res mock_submission = {'meta': mock_meta, 'results': mock_results} return mock_submission
def __init__(self, batch_size, num_classes, training=True, normalize=None): self._num_classes = num_classes # we make the height of image consistent to trim_height, trim_width self.trim_height = cfg.TRAIN.TRIM_HEIGHT self.trim_width = cfg.TRAIN.TRIM_WIDTH self.max_num_box = cfg.MAX_NUM_GT_BOXES self.training = training self.normalize = normalize self.batch_size = batch_size self.classes = ('__background__', 'pedestrian', 'barrier', 'trafficcone', 'bicycle', 'bus', 'car', 'construction', 'motorcycle', 'trailer', 'truck') self.max_num_box = 50 # Checks if pickle file of dataset already exists. If it doesn't exist, creates the file if os.path.exists('lib/roi_data_layer/roidb_CAMFRONT.pkl'): print("Reading roidb..") pickle_in = open("lib/roi_data_layer/roidb_CAMFRONT.pkl", "rb") self.roidb = pickle.load(pickle_in) trainsize = 20000 if training == True: self.roidb = self.roidb[:trainsize] print("roidb size: " + str(len(self.roidb))) else: self.roidb = self.roidb[trainsize:] else: nusc_path = '/data/sets/nuscenes' nusc = NuScenes(version='v1.0-trainval', dataroot=nusc_path, verbose=True) file_dir = os.path.dirname(os.path.abspath(__file__)) roots = file_dir.split('/')[:-2] root_dir = "" for folder in roots: if folder != "": root_dir = root_dir + "/" + folder PATH = root_dir + '/data/CAMFRONT.txt' with open(PATH) as f: image_token = [x.strip() for x in f.readlines()] roidb = [] # Loads information on images and ground truth boxes and saves it as pickle file for faster loading print("Loading roidb...") for i in range(len(image_token)): im_token = image_token[i] sample_data = nusc.get('sample_data', im_token) image_name = sample_data['filename'] image_path = nusc_path + '/' + image_name data_path, boxes, camera_intrinsic = nusc.get_sample_data( im_token, box_vis_level=BoxVisibility.ALL) # Only accepts boxes with above level 3 or 4 visibility and classes with more than 1000 instances gt_boxes = [] gt_cls = [] for box in boxes: visibility_token = nusc.get('sample_annotation', box.token)['visibility_token'] vis_level = int( nusc.get('visibility', visibility_token)['token']) if (vis_level == 3) or (vis_level == 4): visible = True else: visible = False if visible == True: if box.name.split('.')[0] == 'vehicle': if box.name.split('.')[1] != 'emergency': name = box.name.split('.')[1] else: name = '' elif box.name.split('.')[0] == 'human': name = 'pedestrian' elif box.name.split('.')[0] == 'movable_object': if box.name.split( '.')[1] != 'debris' and box.name.split( '.')[1] != 'pushable_pullable': name = box.name.split('.')[1] else: name = '' else: name = '' if name != '': corners = view_points(box.corners(), view=camera_intrinsic, normalize=True)[:2, :] box = np.zeros(4) box[0] = np.min(corners[0]) box[1] = np.min(corners[1]) box[2] = np.max(corners[0]) box[3] = np.max(corners[1]) gt_boxes = gt_boxes + [box] gt_cls = gt_cls + [self.classes.index(name)] # Only accepts images with at least one object if len(gt_boxes) >= 2: image = {} image['image'] = image_path image['width'] = 1600 image['height'] = 900 image['boxes'] = np.asarray(gt_boxes) image['gt_classes'] = np.asarray(gt_cls) roidb = roidb + [image] print(len(roidb)) print("Saving roidb") pickle_out = open("lib/roi_data_layer/roidb_CAMFRONT.pkl", "wb") pickle.dump(roidb, pickle_out) pickle_out.close() self.roidb = roidb trainsize = int(len(self.roidb) * 0.8) if training == True: self.roidb = self.roidb[:trainsize] else: self.roidb = self.roidb[trainsize:]
nusc = NuScenes(version='v1.0-trainval', dataroot=data_path, verbose=True) explorer = NuScenesExplorer(nusc) PATH = data_path + '/CAMFRONT.txt' with open(PATH) as f: image_token = [x.strip() for x in f.readlines()] annotations = [] counter = 0 max_v = 0 min_v = 999999 # pdb.set_trace() for im_token in image_token: sample_data = nusc.get('sample_data', im_token) image_name = sample_data['filename'] img = cv2.imread('/home/fengjia/data/sets/nuscenes/' + image_name) im = np.array(img) sample = nusc.get('sample', sample_data['sample_token']) lidar_token = sample['data']['LIDAR_TOP'] # get ground truth boxes _, boxes, img_camera_intrinsic = nusc.get_sample_data(im_token, box_vis_level=BoxVisibility.ALL) for box in boxes: visibility_token = nusc.get('sample_annotation', box.token)['visibility_token'] vis_level = int(nusc.get('visibility', visibility_token)['token']) if (vis_level != 3) and (vis_level != 4):
def load_merge_from_pkl(nusc: NuScenes, pkl_path: str, box_cls, verbose: bool = False) -> EvalBoxes: # Init. if box_cls == DetectionBox: attribute_map = {a['token']: a['name'] for a in nusc.attribute} if verbose: print('Loading annotations for {} split from nuScenes version: {}'. format(pkl_path, nusc.version)) import mmcv infos = mmcv.load(pkl_path)['infos'] samples = [] for info in infos: samples.append(nusc.get('sample', info['token'])) all_annotations = EvalBoxes() # Load annotations and filter predictions and annotations. merge_map = dict(car='vehicle', truck='vehicle', bus='vehicle', trailer='vehicle', construction_vehicle='vehicle', pedestrian='pedestrian', motorcycle='bike', bicycle='bike', traffic_cone='traffic_boundary', barrier='traffic_boundary') for sample in tqdm.tqdm(samples, leave=verbose): sample_token = sample['token'] cam_token = sample['data']['CAM_FRONT'] _, boxes_cam, _ = nusc.get_sample_data(cam_token) sample_annotation_tokens = [box.token for box in boxes_cam] # sample = nusc.get('sample', sample_token) # sample_annotation_tokens = sample['anns'] sample_boxes = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = nusc.get('sample_annotation', sample_annotation_token) if box_cls == DetectionBox: # Get label name in detection task and filter unused labels. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name is None: continue detection_name = merge_map[detection_name] # Get attribute_name. attr_tokens = sample_annotation['attribute_tokens'] attr_count = len(attr_tokens) if attr_count == 0: attribute_name = '' elif attr_count == 1: attribute_name = attribute_map[attr_tokens[0]] else: raise Exception( 'Error: GT annotations must not have more than one attribute!' ) sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], detection_name=detection_name, detection_score=-1.0, # GT samples do not have a score. attribute_name=attribute_name)) else: raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls) all_annotations.add_boxes(sample_token, sample_boxes) if verbose: print("Loaded ground truth annotations for {} samples.".format( len(all_annotations.sample_tokens))) return all_annotations
class NusLoaderQ10(Dataset): def __init__(self, root='/datasets/nuscene/v1.0-mini', sampling_time=3, agent_time=0, layer_names=None, colors=None, resolution: float = 0.1, # meters / pixel meters_ahead: float = 25, meters_behind: float = 25, meters_left: float = 25, meters_right: float = 25, version='v1.0-mini'): if layer_names is None: layer_names = ['drivable_area', 'road_segment', 'road_block', 'lane', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area', 'road_divider', 'lane_divider'] if colors is None: colors = [(255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), ] self.root = root self.nus = NuScenes(version, dataroot=self.root) self.scenes = self.nus.scene self.samples = self.nus.sample self.layer_names = layer_names self.colors = colors self.helper = PredictHelper(self.nus) self.seconds = sampling_time self.agent_seconds = agent_time self.static_layer = StaticLayerRasterizer(self.helper, layer_names=self.layer_names, colors=self.colors, resolution=resolution, meters_ahead=meters_ahead, meters_behind=meters_behind, meters_left=meters_left, meters_right=meters_right) self.agent_layer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=self.agent_seconds, resolution=resolution, meters_ahead=meters_ahead, meters_behind=meters_behind, meters_left=meters_left, meters_right=meters_right) def __len__(self): return len(self.samples) def __getitem__(self, idx): scene_id = idx sample = self.samples[idx] sample_token = sample['token'] # 1. calculate ego pose sample_data_lidar = self.nus.get('sample_data', sample['data']['LIDAR_TOP']) ego_pose = self.nus.get('ego_pose', sample_data_lidar['ego_pose_token']) ego_pose_xy = ego_pose['translation'] ego_pose_rotation = ego_pose['rotation'] # 2. Generate map mask map_masks, lanes, map_img = self.static_layer.generate_mask(ego_pose_xy, ego_pose_rotation, sample_token) # 3. Generate Agent Trajectory agent_mask, xy_global = self.agent_layer.generate_mask( ego_pose_xy, ego_pose_rotation, sample_token, self.seconds, show_agent=False) xy_local = [] # past, future trajectory for path_global in xy_global[:2]: pose_xy = [] for path_global_i in path_global: if len(path_global_i) == 0: pose_xy.append(path_global_i) else: pose_xy.append(convert_global_coords_to_local(path_global_i, ego_pose_xy, ego_pose_rotation)) xy_local.append(pose_xy) # current pose if len(xy_global[2]) == 0: xy_local.append(xy_global[2]) else: xy_local.append(convert_global_coords_to_local(xy_global[2], ego_pose_xy, ego_pose_rotation)) # 4. Generate Virtual Agent Trajectory lane_tokens = list(lanes.keys()) lanes_disc = [np.array(lanes[token])[:, :2] for token in lane_tokens] virtual_mask, virtual_xy = self.agent_layer.generate_virtual_mask( ego_pose_xy, ego_pose_rotation, lanes_disc, sample_token, show_agent=False, past_trj_len=4, future_trj_len=6, min_dist=6) virtual_xy_local = [] # past, future trajectory for path_global in virtual_xy[:2]: pose_xy = [] for path_global_i in path_global: if len(path_global_i) == 0: pose_xy.append(path_global_i) else: pose_xy.append(convert_global_coords_to_local(path_global_i, ego_pose_xy, ego_pose_rotation)) virtual_xy_local.append(pose_xy) # current pose if len(virtual_xy[2]) == 0: virtual_xy_local.append(virtual_xy[2]) else: virtual_xy_local.append(convert_global_coords_to_local(virtual_xy[2], ego_pose_xy, ego_pose_rotation)) return map_masks, map_img, agent_mask, xy_local, virtual_mask, virtual_xy_local, scene_id def render_sample(self, idx): sample = self.samples[idx] sample_token = sample['token'] self.nus.render_sample(sample_token) def render_scene(self, idx): sample = self.samples[idx] sample_token = sample['token'] scene = self.nus.get('scene', sample['scene_token']) log = self.nus.get('log', scene['log_token']) location = log['location'] nus_map = NuScenesMap(dataroot=self.root, map_name=location) layer_names = ['road_segment', 'lane', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area'] camera_channel = 'CAM_FRONT' nus_map.render_map_in_image(self.nus, sample_token, layer_names=layer_names, camera_channel=camera_channel) def render_map(self, idx, combined=True): sample = self.samples[idx] sample_token = sample['token'] sample_data_lidar = self.nus.get('sample_data', sample['data']['LIDAR_TOP']) ego_pose = self.nus.get('ego_pose', sample_data_lidar['ego_pose_token']) ego_pose_xy = ego_pose['translation'] ego_pose_rotation = ego_pose['rotation'] timestamp = ego_pose['timestamp'] # 2. Generate Map & Agent Masks scene = self.nus.get('scene', sample['scene_token']) log = self.nus.get('log', scene['log_token']) location = log['location'] nus_map = NuScenesMap(dataroot=self.root, map_name=location) static_layer = StaticLayerRasterizer(self.helper, layer_names=self.layer_names, colors=self.colors) agent_layer = AgentBoxesWithFadedHistory(self.helper, seconds_of_history=self.seconds) map_masks, lanes, map_img = static_layer.generate_mask(ego_pose_xy, ego_pose_rotation, sample_token) agent_mask = agent_layer.generate_mask(ego_pose_xy, ego_pose_rotation, sample_token) if combined: plt.subplot(1, 2, 1) plt.title("combined map") plt.imshow(map_img) plt.subplot(1, 2, 2) plt.title("agent") plt.imshow(agent_mask) plt.show() else: num_labels = len(self.layer_names) num_rows = num_labels // 3 fig, ax = plt.subplots(num_rows, 3, figsize=(10, 3 * num_rows)) for row in range(num_rows): for col in range(3): num = 3 * row + col if num == num_labels - 1: break ax[row][col].set_title(self.layer_names[num]) ax[row][col].imshow(map_masks[num]) plt.show()
coloring = coloring[mask] pdb.set_trace() return points, coloring, im PATH = '/data/sets/nuscenes/train_mini.txt' with open(PATH) as f: image_token = [x.strip() for x in f.readlines()] #model = PointNetCls(k=16) #model.cuda() #model.load_state_dict(torch.load('/home/julia/pointnet.pytorch/utils/cls/cls_model_22.pth')) #model.eval() im_token = image_token[0] sample_data = nusc.get('sample_data', im_token) sample_token = sample_data['sample_token'] sample =nusc.get('sample', sample_token) image_name = sample_data['filename'] img = imread('/data/sets/nuscenes/' + image_name) # load in the LiDAR data for the sample sample_data_pcl = nusc.get('sample_data', sample['data']['LIDAR_TOP']) sample_rec = explorer.nusc.get('sample', sample_data_pcl['sample_token']) # get LiDAR point cloud chan = sample_data_pcl['channel'] ref_chan = 'LIDAR_TOP' pc, times = LidarPointCloud.from_file_multisweep(nusc, sample_rec, chan, ref_chan) points= pc.points[:3,:]
def track_nuscenes(data_root: str, detection_file: str, save_dir: str, eval_set: str = 'val', covariance_id: int = 0, match_distance: str = 'iou', match_threshold: float = 0.1, match_algorithm: str = 'h', use_angular_velocity: bool = False): ''' submission { "meta": { "use_camera": <bool> -- Whether this submission uses camera data as an input. "use_lidar": <bool> -- Whether this submission uses lidar data as an input. "use_radar": <bool> -- Whether this submission uses radar data as an input. "use_map": <bool> -- Whether this submission uses map data as an input. "use_external": <bool> -- Whether this submission uses external data as an input. }, "results": { sample_token <str>: List[sample_result] -- Maps each sample_token to a list of sample_results. } } ''' if 'train' in eval_set: version = 'v1.0-trainval' elif 'val' in eval_set: version = 'v1.0-trainval' elif 'mini' in eval_set: version = 'v1.0-mini' elif 'test' in eval_set: version = 'v1.0-test' else: version = eval_set print("WARNING: Unknown subset version: '{}'".format(version)) nusc = NuScenes(version=version, dataroot=data_root, verbose=True) mkdir_if_missing(save_dir) output_path = os.path.join(save_dir, 'probabilistic_tracking_results.json') results = {} total_time = 0.0 total_frames = 0 with open(detection_file) as f: data = json.load(f) assert 'results' in data, 'Error: No field `results` in result file. Please note that the result format changed.' \ 'See https://www.nuscenes.org/object-detection for more information.' all_results = EvalBoxes.deserialize(data['results'], DetectionBox) meta = data['meta'] print('meta: ', meta) print("Loaded results from {}. Found detections for {} samples.".format( detection_file, len(all_results.sample_tokens))) processed_scene_tokens = set() for sample_token_idx in tqdm(range(len(all_results.sample_tokens))): sample_token = all_results.sample_tokens[sample_token_idx] scene_token = nusc.get('sample', sample_token)['scene_token'] if scene_token in processed_scene_tokens: continue first_sample_token = nusc.get('scene', scene_token)['first_sample_token'] current_sample_token = first_sample_token mot_trackers = { tracking_name: AB3DMOT(covariance_id, tracking_name=tracking_name, use_angular_velocity=use_angular_velocity, tracking_nuscenes=True) for tracking_name in NUSCENES_TRACKING_NAMES } while current_sample_token != '': results[current_sample_token] = [] dets = { tracking_name: [] for tracking_name in NUSCENES_TRACKING_NAMES } info = { tracking_name: [] for tracking_name in NUSCENES_TRACKING_NAMES } for box in all_results.boxes[current_sample_token]: if box.detection_name not in NUSCENES_TRACKING_NAMES: continue q = Quaternion(box.rotation) angle = q.angle if q.axis[2] > 0 else -q.angle #print('box.rotation, angle, axis: ', box.rotation, q.angle, q.axis) #print('box.rotation, angle, axis: ', q.angle, q.axis) #[h, w, l, x, y, z, rot_y] detection = np.array([ box.size[2], box.size[0], box.size[1], box.translation[0], box.translation[1], box.translation[2], angle ]) #print('detection: ', detection) information = np.array([box.detection_score]) dets[box.detection_name].append(detection) info[box.detection_name].append(information) dets_all = { tracking_name: { 'dets': np.array(dets[tracking_name]), 'info': np.array(info[tracking_name]) } for tracking_name in NUSCENES_TRACKING_NAMES } total_frames += 1 start_time = time.time() for tracking_name in NUSCENES_TRACKING_NAMES: if dets_all[tracking_name]['dets'].shape[0] > 0: trackers = mot_trackers[tracking_name].update( dets_all[tracking_name], match_distance, match_threshold, match_algorithm, scene_token) # (N, 9) # (h, w, l, x, y, z, rot_y), tracking_id, tracking_score # print('trackers: ', trackers) for i in range(trackers.shape[0]): sample_result = format_sample_result( current_sample_token, tracking_name, trackers[i]) results[current_sample_token].append(sample_result) cycle_time = time.time() - start_time total_time += cycle_time # get next frame and continue the while loop current_sample_token = nusc.get('sample', current_sample_token)['next'] # left while loop and mark this scene as processed processed_scene_tokens.add(scene_token) # finished tracking all scenes, write output data output_data = {'meta': meta, 'results': results} with open(output_path, 'w') as outfile: json.dump(output_data, outfile) print("Total Tracking took: %.3f for %d frames or %.1f FPS" % (total_time, total_frames, total_frames / total_time))
def load_gt(nusc: NuScenes, eval_split: str, box_cls, verbose: bool = False) -> EvalBoxes: """ Loads ground truth boxes from DB. :param nusc: A NuScenes instance. :param eval_split: The evaluation split for which we load GT boxes. :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox. :param verbose: Whether to print messages to stdout. :return: The GT boxes. """ # Init. if box_cls == DetectionBox: attribute_map = {a['token']: a['name'] for a in nusc.attribute} if verbose: print('Loading annotations for {} split from nuScenes version: {}'. format(eval_split, nusc.version)) # Read out all sample_tokens in DB. sample_tokens_all = [s['token'] for s in nusc.sample] assert len(sample_tokens_all) > 0, "Error: Database has no samples!" # Only keep samples from this split. splits = create_splits_scenes() # Check compatibility of split with nusc_version. version = nusc.version if eval_split in {'train', 'val', 'train_detect', 'train_track'}: assert version.endswith('trainval'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split in {'mini_train', 'mini_val'}: assert version.endswith('mini'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split == 'test': assert version.endswith('test'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) else: raise ValueError( 'Error: Requested split {} which this function cannot map to the correct NuScenes version.' .format(eval_split)) if eval_split == 'test': # Check that you aren't trying to cheat :). assert len(nusc.sample_annotation) > 0, \ 'Error: You are trying to evaluate on the test set but you do not have the annotations!' sample_tokens = [] for sample_token in sample_tokens_all: scene_token = nusc.get('sample', sample_token)['scene_token'] scene_record = nusc.get('scene', scene_token) if scene_record['name'] in splits[eval_split]: sample_tokens.append(sample_token) all_annotations = EvalBoxes() # Load annotations and filter predictions and annotations. tracking_id_set = set() for sample_token in tqdm.tqdm(sample_tokens, leave=verbose): sample = nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] sample_boxes = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = nusc.get('sample_annotation', sample_annotation_token) if box_cls == DetectionBox: # Get label name in detection task and filter unused labels. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name is None: continue # Get attribute_name. attr_tokens = sample_annotation['attribute_tokens'] attr_count = len(attr_tokens) if attr_count == 0: attribute_name = '' elif attr_count == 1: attribute_name = attribute_map[attr_tokens[0]] else: raise Exception( 'Error: GT annotations must not have more than one attribute!' ) sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], detection_name=detection_name, detection_score=-1.0, # GT samples do not have a score. attribute_name=attribute_name)) elif box_cls == TrackingBox: # Use nuScenes token as tracking id. tracking_id = sample_annotation['instance_token'] tracking_id_set.add(tracking_id) # Get label name in detection task and filter unused labels. tracking_name = category_to_tracking_name( sample_annotation['category_name']) if tracking_name is None: continue sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], tracking_id=tracking_id, tracking_name=tracking_name, tracking_score=-1.0 # GT samples do not have a score. )) else: raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls) all_annotations.add_boxes(sample_token, sample_boxes) if verbose: print("Loaded ground truth annotations for {} samples.".format( len(all_annotations.sample_tokens))) return all_annotations
def pointcloud_color_from_image( nusc: NuScenes, pointsensor_token: str, camera_token: str) -> Tuple[np.array, np.array]: """ Given a point sensor (lidar/radar) token and camera sample_data token, load pointcloud and map it to the image plane, then retrieve the colors of the closest image pixels. :param nusc: NuScenes instance. :param pointsensor_token: Lidar/radar sample_data token. :param camera_token: Camera sample data token. :return (coloring <np.float: 3, n>, mask <np.bool: m>). Returns the colors for n points that reproject into the image out of m total points. The mask indicates which points are selected. """ cam = nusc.get('sample_data', camera_token) pointsensor = nusc.get('sample_data', pointsensor_token) pc = LidarPointCloud.from_file( osp.join(nusc.dataroot, pointsensor['filename'])) im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # Points live in the point sensor frame. So they need to be transformed via global to the image plane. # First step: transform the pointcloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', pointsensor['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', pointsensor['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Third step: transform into the ego vehicle frame for the timestamp of the image. poserecord = nusc.get('ego_pose', cam['ego_pose_token']) pc.translate(-np.array(poserecord['translation'])) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix.T) # Fourth step: transform into the camera. cs_record = nusc.get('calibrated_sensor', cam['calibrated_sensor_token']) pc.translate(-np.array(cs_record['translation'])) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix.T) # Fifth step: actually take a "picture" of the point cloud. # Grab the depths (camera frame z axis points away from the camera). depths = pc.points[2, :] # Take the actual picture (matrix multiplication with camera-matrix + renormalization). points = view_points(pc.points[:3, :], np.array(cs_record['camera_intrinsic']), normalize=True) # Remove points that are either outside or behind the camera. Leave a margin of 1 pixel for aesthetic reasons. mask = np.ones(depths.shape[0], dtype=bool) mask = np.logical_and(mask, depths > 0) mask = np.logical_and(mask, points[0, :] > 1) mask = np.logical_and(mask, points[0, :] < im.size[0] - 1) mask = np.logical_and(mask, points[1, :] > 1) mask = np.logical_and(mask, points[1, :] < im.size[1] - 1) points = points[:, mask] # Pick the colors of the points im_data = np.array(im) coloring = np.zeros(points.shape) for i, p in enumerate(points.transpose()): point = p[:2].round().astype(np.int32) coloring[:, i] = im_data[point[1], point[0], :] return coloring, mask
class NusTrajectoryExtractor: def __init__( self, root='/datasets/nuscene/v1.0-mini', sampling_time=3, agent_time=0, layer_names=None, colors=None, resolution: float = 0.1, # meters / pixel meters_ahead: float = 32, meters_behind: float = 32, meters_left: float = 32, meters_right: float = 32, version='v1.0-mini'): self.layer_names = [ 'drivable_area', 'road_segment', 'road_block', 'lane', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area', 'road_divider', 'lane_divider' ] if layer_names is None else layer_names self.colors = [ (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), (255, 255, 255), ] if colors is None else colors self.root = root self.nus = NuScenes(version, dataroot=self.root) self.helper = PredictHelper(self.nus) self.sampling_time = sampling_time self.agent_seconds = agent_time self.scene_size = (64, 64) self.past_len = 4 self.future_len = 6 self.static_layer = StaticLayerRasterizer(self.helper, layer_names=self.layer_names, colors=self.colors, resolution=resolution, meters_ahead=meters_ahead, meters_behind=meters_behind, meters_left=meters_left, meters_right=meters_right) self.agent_layer = AgentBoxesWithFadedHistory( self.helper, seconds_of_history=self.agent_seconds, resolution=resolution, meters_ahead=meters_ahead, meters_behind=meters_behind, meters_left=meters_left, meters_right=meters_right) self.combinator = Rasterizer() self.show_agent = True self.resized_ratio = 0.5 # resize ratio of image for visualizing self.drivable_area_idx = self.layer_names.index('drivable_area') self.road_divider_idx = self.layer_names.index('road_divider') self.lane_divider_idx = self.layer_names.index('lane_divider') def __len__(self): return len(self.nus.sample) def get_annotation(self, instance_token, sample_token): annotation = self.helper.get_sample_annotation( instance_token, sample_token) # agent annotation # ego pose ego_pose_xy = annotation['translation'] ego_pose_rotation = annotation['rotation'] # agent attributes agent_attributes = { 'category_name': annotation['category_name'], 'translation': annotation['translation'], 'size': annotation['size'], 'rotation': annotation['rotation'], 'visibility': int(annotation['visibility_token'] ) # 1 ~ 4 (0~40%, 40~60%, 60~80%, 80~100%) } # agent trajectory annotation agent_mask, total_agents_annotation, agent_annotation = \ self.agent_layer.generate_mask_with_path( instance_token, sample_token, seconds=self.sampling_time, vehicle_only=True) ''' total_agents_annotation: 'instance_tokens', category_names', 'translations', 'pasts', 'futures', 'velocities', 'accelerations', 'yaw_rates' ''' agent_trajectories = { 'agent_mask': agent_mask, 'total_agents_annotation': total_agents_annotation, 'agent_annotation': agent_annotation } # map attributes map_masks, lanes, map_img, map_img_with_lanes = \ self.static_layer.generate_mask(ego_pose_xy, ego_pose_rotation, sample_token) map_attributes = { 'map_masks': map_masks, 'lanes': lanes, 'map_img': map_img, 'map_img_with_lanes': map_img_with_lanes, 'map_img_with_agents': self.combinator.combine([map_img, agent_mask]) } scene_data = { 'instance_token': instance_token, 'sample_token': sample_token, 'annotation': annotation, 'ego_pose_xy': ego_pose_xy, 'ego_pose_rotation': ego_pose_rotation, 'agent_attributes': agent_attributes, 'agent_trajectories': agent_trajectories, 'map_attributes': map_attributes, } return scene_data def get_cmu_annotation(self, instance_tokens, sample_token): scene_data = self.get_annotation(instance_tokens[0], sample_token) ego_pose_xy = scene_data['ego_pose_xy'] ego_pose_rotation = scene_data['ego_pose_rotation'] agents_annotation = scene_data['agent_trajectories'][ 'total_agents_annotation'] agents_past = [ convert_global_coords_to_local(path_global_i, ego_pose_xy, ego_pose_rotation).tolist() if len(path_global_i) != 0 else [] for path_global_i in agents_annotation['pasts'] ] agents_future = [ convert_global_coords_to_local(path_global_i, ego_pose_xy, ego_pose_rotation).tolist() if len(path_global_i) != 0 else [] for path_global_i in agents_annotation['futures'] ] agents_translation = \ convert_global_coords_to_local(agents_annotation['translations'], ego_pose_xy, ego_pose_rotation).tolist() \ if len(agents_annotation['translations']) != 0 else [] drivable_area = scene_data['map_attributes']['map_masks'][ self.drivable_area_idx] road_divider = scene_data['map_attributes']['map_masks'][ self.road_divider_idx] lane_divider = scene_data['map_attributes']['map_masks'][ self.lane_divider_idx] drivable_area = cv2.cvtColor( cv2.resize(drivable_area, self.scene_size), cv2.COLOR_BGR2GRAY) road_divider = cv2.cvtColor(cv2.resize(road_divider, self.scene_size), cv2.COLOR_BGR2GRAY) lane_divider = cv2.cvtColor(cv2.resize(lane_divider, self.scene_size), cv2.COLOR_BGR2GRAY) # distance_map, prior = self.generateDistanceMaskFromColorMap( # drivable_area, image_size=self.scene_size, prior_size=self.scene_size) map_image_show = cv2.resize( scene_data['map_attributes']['map_img_with_agents'], dsize=(0, 0), fx=self.resized_ratio, fy=self.resized_ratio, interpolation=cv2.INTER_LINEAR) num_agents = len(agents_past) agents_mask = [ tk in instance_tokens for tk in agents_annotation['instance_tokens'] ] # agents to decode future_agent_masks = [] past_agents_traj = [] future_agents_traj = [] past_agents_traj_len = [] future_agents_traj_len = [] decode_start_vel = [] decode_start_pos = [] for idx in range(num_agents): past = agents_past[idx][-self.past_len + 1:] + [agents_translation[idx]] future = agents_future[idx][:self.future_len] if len(past) != self.past_len: continue elif not (abs(past[-1][0]) <= self.scene_size[1] // 2) or not (abs( past[-1][1]) <= self.scene_size[0] // 2): continue if len(future) != self.future_len: agents_mask[idx] = False future_agent_masks.append(agents_mask[idx]) past_agents_traj.append(past) past_agents_traj_len.append(len(past)) future_agents_traj.append(future) future_agents_traj_len.append(len(future)) decode_start_pos.append(past[-1]) decode_start_vel.append([ (past[-1][0] - past[-2][0]) / self.sampling_time, (past[-1][1] - past[-2][1]) / self.sampling_time ]) episode = [ past_agents_traj, past_agents_traj_len, future_agents_traj, future_agents_traj_len, future_agent_masks, decode_start_vel, decode_start_pos ] episode_img = [drivable_area, road_divider, lane_divider] return { 'episode': episode, 'episode_img': episode_img, 'img_show': map_image_show } def save_cmu_dataset(self, save_dir, partition='all'): from nuscenes.eval.prediction.splits import get_prediction_challenge_split split_types = ['mini_train', 'mini_val', 'train', 'train_val', 'val'] if partition == 'mini': split_types = ['mini_train', 'mini_val'] if not os.path.isdir(save_dir): os.mkdir(save_dir) for split in tqdm(split_types, desc='split dataset'): partition_tokens = get_prediction_challenge_split( split, dataroot=self.root) tokens_dict = {} for token in partition_tokens: instance_token, sample_token = token.split('_') try: tokens_dict[sample_token].append(instance_token) except KeyError: tokens_dict[sample_token] = [instance_token] with open('{}/{}.tokens'.format(save_dir, split), 'wb') as f: pickle.dump(tokens_dict, f, pickle.HIGHEST_PROTOCOL) for sample_tk, instance_tks in tqdm(tokens_dict.items(), desc=split, total=len(tokens_dict)): sample_dir = os.path.join(save_dir, sample_tk) if not os.path.isdir(sample_dir): os.mkdir(sample_dir) scene_data = self.get_cmu_annotation( instance_tks, sample_tk) with open('{}/map.bin'.format(sample_dir), 'wb') as f: pickle.dump(scene_data['episode_img'], f, pickle.HIGHEST_PROTOCOL) with open('{}/viz.bin'.format(sample_dir), 'wb') as f: pickle.dump(scene_data['img_show'], f, pickle.HIGHEST_PROTOCOL) with open('{}/episode.bin'.format(sample_dir), 'wb') as f: pickle.dump(scene_data['episode'], f, pickle.HIGHEST_PROTOCOL) with open('{}/instance_tks.bin'.format(sample_dir), 'wb') as f: pickle.dump(instance_tks, f, pickle.HIGHEST_PROTOCOL) def render_sample(self, sample_token): self.nus.render_sample(sample_token) def render_scene(self, sample_token): sample = self.nus.get('sample', sample_token) scene = self.nus.get('scene', sample['scene_token']) log = self.nus.get('log', scene['log_token']) location = log['location'] nus_map = NuScenesMap(dataroot=self.root, map_name=location) layer_names = [ 'road_segment', 'lane', 'ped_crossing', 'walkway', 'stop_line', 'carpark_area' ] camera_channel = 'CAM_FRONT' nus_map.render_map_in_image(self.nus, sample_token, layer_names=layer_names, camera_channel=camera_channel)
def export_scene_pointcloud(nusc: NuScenes, out_path: str, scene_token: str, channel: str = 'LIDAR_TOP', min_dist: float = 3.0, max_dist: float = 30.0, verbose: bool = True) -> None: """ Export fused point clouds of a scene to a Wavefront OBJ file. This pointcloud can be viewed in your favorite 3D rendering tool, e.g. Meshlab or Maya. :param nusc: NuScenes instance. :param out_path: Output path to write the pointcloud to. :param scene_token: Unique identifier of scene to render. :param channel: Channel to render. :param min_dist: Minimum distance to ego vehicle below which points are dropped. :param max_dist: Maximum distance to ego vehicle above which points are dropped. :param verbose: Whether to print messages to stdout. """ # Check inputs. valid_channels = [ 'LIDAR_TOP', 'RADAR_FRONT', 'RADAR_FRONT_RIGHT', 'RADAR_FRONT_LEFT', 'RADAR_BACK_LEFT', 'RADAR_BACK_RIGHT' ] camera_channels = [ 'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT' ] assert channel in valid_channels, 'Input channel {} not valid.'.format( channel) # Get records from DB. scene_rec = nusc.get('scene', scene_token) start_sample_rec = nusc.get('sample', scene_rec['first_sample_token']) sd_rec = nusc.get('sample_data', start_sample_rec['data'][channel]) # Make list of frames cur_sd_rec = sd_rec sd_tokens = [] while cur_sd_rec['next'] != '': cur_sd_rec = nusc.get('sample_data', cur_sd_rec['next']) sd_tokens.append(cur_sd_rec['token']) # Write pointcloud. with open(out_path, 'w') as f: f.write("OBJ File:\n") for sd_token in tqdm(sd_tokens): if verbose: print('Processing {}'.format(sd_rec['filename'])) sc_rec = nusc.get('sample_data', sd_token) sample_rec = nusc.get('sample', sc_rec['sample_token']) lidar_token = sd_rec['token'] lidar_rec = nusc.get('sample_data', lidar_token) pc = LidarPointCloud.from_file( osp.join(nusc.dataroot, lidar_rec['filename'])) # Get point cloud colors. coloring = np.ones((3, pc.points.shape[1])) * -1 for channel in camera_channels: camera_token = sample_rec['data'][channel] cam_coloring, cam_mask = pointcloud_color_from_image( nusc, lidar_token, camera_token) coloring[:, cam_mask] = cam_coloring # Points live in their own reference frame. So they need to be transformed via global to the image plane. # First step: transform the point cloud to the ego vehicle frame for the timestamp of the sweep. cs_record = nusc.get('calibrated_sensor', lidar_rec['calibrated_sensor_token']) pc.rotate(Quaternion(cs_record['rotation']).rotation_matrix) pc.translate(np.array(cs_record['translation'])) # Optional Filter by distance to remove the ego vehicle. dists_origin = np.sqrt(np.sum(pc.points[:3, :]**2, axis=0)) keep = np.logical_and(min_dist <= dists_origin, dists_origin <= max_dist) pc.points = pc.points[:, keep] coloring = coloring[:, keep] if verbose: print('Distance filter: Keeping %d of %d points...' % (keep.sum(), len(keep))) # Second step: transform to the global frame. poserecord = nusc.get('ego_pose', lidar_rec['ego_pose_token']) pc.rotate(Quaternion(poserecord['rotation']).rotation_matrix) pc.translate(np.array(poserecord['translation'])) # Write points to file for (v, c) in zip(pc.points.transpose(), coloring.transpose()): if (c == -1).any(): # Ignore points without a color. pass else: f.write( "v {v[0]:.8f} {v[1]:.8f} {v[2]:.8f} {c[0]:.4f} {c[1]:.4f} {c[2]:.4f}\n" .format(v=v, c=c / 255.0)) if not sd_rec['next'] == "": sd_rec = nusc.get('sample_data', sd_rec['next'])