def __init__(self, set_name="mini_train"): #assert statements set_paths = [ 'train', 'val', 'test', 'mini_train', 'mini_val', 'train_detect', 'train_track' ] assert set_name in set_paths, "Incorrect set_name" #Initialize data and Prediction Helper classes self.data_path = DATA_PATH self.nusc = NuScenes(version=DATA_VERSION, dataroot=self.data_path, verbose=True) self.helper = PredictHelper(self.nusc) #get all the scenes self.scenes = create_splits_scenes() #get all the scenes in the trainset self.set_name = set_name self.trainset = self.scenes[ self.set_name] #List of scenes as part of training set self.prediction_scenes = json.load( open(self.data_path + "maps/prediction_scenes.json", "r") ) #Dictionary containing list of instance and sample tokens for each scene print("Number of samples in train set: %d" % (len(self.trainset)))
def get_prediction_challenge_split(split: str, dataroot: str = '/data/sets/nuscenes') -> List[str]: """ Gets a list of {instance_token}_{sample_token} strings for each split. :param split: One of 'mini_train', 'mini_val', 'train', 'val'. :param dataroot: Path to the nuScenes dataset. :return: List of tokens belonging to the split. Format {instance_token}_{sample_token}. """ if split not in {'mini_train', 'mini_val', 'train', 'train_val', 'val'}: raise ValueError("split must be one of (mini_train, mini_val, train, train_val, val)") if split == 'train_val': split_name = 'train' else: split_name = split path_to_file = os.path.join(dataroot, "maps", "prediction", "prediction_scenes.json") prediction_scenes = json.load(open(path_to_file, "r")) scenes = create_splits_scenes() scenes_for_split = scenes[split_name] if split == 'train': scenes_for_split = scenes_for_split[NUM_IN_TRAIN_VAL:] if split == 'train_val': scenes_for_split = scenes_for_split[:NUM_IN_TRAIN_VAL] token_list_for_scenes = map(lambda scene: prediction_scenes.get(scene, []), scenes_for_split) return list(chain.from_iterable(token_list_for_scenes))
def factory(dataset, dir_nuscenes): """Define dataset type and split training and validation""" assert dataset in ['nuscenes', 'nuscenes_mini', 'nuscenes_teaser'] if dataset == 'nuscenes_mini': version = 'v1.0-mini' else: version = 'v1.0-trainval' nusc = NuScenes(version=version, dataroot=dir_nuscenes, verbose=True) scenes = nusc.scene if dataset == 'nuscenes_teaser': with open("splits/nuscenes_teaser_scenes.txt", "r") as file: teaser_scenes = file.read().splitlines() scenes = [scene for scene in scenes if scene['token'] in teaser_scenes] with open("splits/split_nuscenes_teaser.json", "r") as file: dic_split = json.load(file) split_train = [ scene['name'] for scene in scenes if scene['token'] in dic_split['train'] ] split_val = [ scene['name'] for scene in scenes if scene['token'] in dic_split['val'] ] else: split_scenes = splits.create_splits_scenes() split_train, split_val = split_scenes['train'], split_scenes['val'] return nusc, scenes, split_train, split_val
def gen_scene_splits(dataroot: str): """ Retruns all nuScenes scene splits by scene token, as specified in https://github.com/nutonomy/nuscenes-devkit/blob/master/python-sdk/nuscenes/utils/splits.py. Arguments: dataroot: Directory path of the nuScenes datasets, <str>. """ # Imports from nuscenes.nuscenes import NuScenes from nuscenes.utils.splits import create_splits_scenes # Define scene_splits = create_splits_scenes() # Load trainval version = 'v1.0-trainval' nusc = NuScenes(version=version, dataroot=dataroot + version, verbose=False) for split, scene_names in scene_splits.items(): if split == 'test': continue else: scene_splits[split] = [nusc.field2token('scene', 'name', scene_name)[0] for scene_name in scene_names] # Load test version = 'v1.0-test' nusc = NuScenes(version=version, dataroot=dataroot + version, verbose=False) scene_splits['test'] = [nusc.field2token('scene', 'name', scene_name)[0] for scene_name in scene_splits['test']] return scene_splits
def get_scenes(self): # filter by scene split split = { 'v1.0-trainval': { True: 'train', False: 'val' }, 'v1.0-mini': { True: 'mini_train', False: 'mini_val' }, }[self.nusc.version][self.is_train] scenes = create_splits_scenes()[split] return scenes
def split_scenes(scenes, split): """ Get the list of scenes in a split :param scenes (list): list of all scenes from nuscene :param split (str): split name :return scene_list(list): list of scene tokens in the split """ scene_split_names = splits.create_splits_scenes()[split] scenes_list = [] for scene in scenes: #NOTE: mini train and mini val are subsets of train and val if scene['name'] in scene_split_names: scenes_list.append(scene['token']) return scenes_list
def _split_scenes(self) -> None: """ Split scenes into train, val and test scenes """ scene_split_names = splits.create_splits_scenes() scenes_list = [] for scene in self.nusc.scene: #NOTE: mini train and mini val are subsets of train and val if scene['name'] in scene_split_names[self.split]: scenes_list.append(scene['token']) self.logger.debug('{}: {} scenes'.format(self.nusc_version, str(len(scenes_list)))) return scenes_list
def _get_scenes(self): # filter by scene split split = { 'v1.0-trainval': { True: 'train', False: 'val' }, 'v1.0-mini': { True: 'mini_train', False: 'mini_val' }, 'lyft': { True: 'lyft_train', False: 'lyft_val' }, }[self.nusc.version][self.train] return create_splits_scenes()[split]
def __init__(self, root_path, split='train', init_nusc=True): super().__init__() self.root_path = root_path self.split = split if (init_nusc): self.nusc = NuScenes(version='v1.0-trainval', dataroot=root_path, verbose=True) splits = create_splits_scenes() split_scenes = splits[split] all_scene_names = [scene['name'] for scene in self.nusc.scene] split_scene_tokens = [ self.nusc.scene[all_scene_names.index(scene_name)]['token'] for scene_name in split_scenes ] self.sample_id_list = self.get_sample_tokens_from_scenes( split_scene_tokens)
def get_samples_in_eval_set(nusc: NuScenes, eval_set: str) -> List[str]: """ Gets all the sample tokens from the split that are relevant to the eval set. :param nusc: A NuScenes object. :param eval_set: The dataset split to evaluate on, e.g. train, val or test. :return: A list of sample tokens. """ # Create a dict to map from scene name to scene token for quick lookup later on. scene_name2tok = dict() for rec in nusc.scene: scene_name2tok[rec['name']] = rec['token'] # Get scenes splits from nuScenes. scenes_splits = create_splits_scenes(verbose=False) # Collect sample tokens for each scene. samples = [] for scene in scenes_splits[eval_set]: scene_record = nusc.get('scene', scene_name2tok[scene]) total_num_samples = scene_record['nbr_samples'] first_sample_token = scene_record['first_sample_token'] last_sample_token = scene_record['last_sample_token'] sample_token = first_sample_token i = 0 while sample_token != '': sample_record = nusc.get('sample', sample_token) samples.append(sample_record['token']) if sample_token == last_sample_token: sample_token = '' else: sample_token = sample_record['next'] i += 1 assert total_num_samples == i, 'Error: There were supposed to be {} keyframes, ' \ 'but only {} keyframes were processed'.format(total_num_samples, i) return samples
def process_data(data_path, version, val_split): nusc = NuScenes(version=version, dataroot=data_path, verbose=True) splits = create_splits_scenes() train_scenes, val_scenes = train_test_split(splits['train' if 'mini' not in version else 'mini_train'], test_size=val_split) train_scene_names = splits['train' if 'mini' not in version else 'mini_train'] val_scene_names = splits['val' if 'mini' not in version else 'mini_val'] ns_scene_names = dict() ns_scene_names['train'] = train_scene_names ns_scene_names['val'] = val_scene_names scenes = [] for data_class in ['train', 'val']: for ns_scene_name in tqdm(ns_scene_names[data_class]): ns_scene = nusc.get('scene', nusc.field2token('scene', 'name', ns_scene_name)[0]) scene_id = int(ns_scene['name'].replace('scene-', '')) if scene_id in scene_blacklist: # Some scenes have bad localization continue scene = process_scene(ns_scene, nusc) if scene is not None: scenes.append(scene) print(f'Processed {len(scenes):.2f} scenes')
def get_scenes(self): if self.is_lyft: scenes = [row['name'] for row in self.nusc.scene] # Split in train/val indices = TRAIN_LYFT_INDICES if self.is_train else VAL_LYFT_INDICES scenes = [scenes[i] for i in indices] else: # filter by scene split split = { 'v1.0-trainval': { True: 'train', False: 'val' }, 'v1.0-mini': { True: 'mini_train', False: 'mini_val' }, }[self.nusc.version][self.is_train] scenes = create_splits_scenes()[split] return scenes
def from_nuscenes( data_path: str, version: str, split: str, nproc: int = NPROC, add_nonkey_frames: bool = False, ) -> Dataset: """Convert NuScenes dataset to Scalabel format.""" data, df = load_data(data_path, version) scene_names_per_split = create_splits_scenes() first_sample_tokens = [] for token, name in zip(df.first_sample_token.values, df.scene_name.values): if name in scene_names_per_split[split]: first_sample_tokens.append(token) func = partial(parse_sequence, data, add_nonkey_frames) if nproc > 1: partial_results = pmap( func, zip(first_sample_tokens, scene_names_per_split[split]), nprocs=nproc, ) else: partial_results = map( # type: ignore func, zip(first_sample_tokens, scene_names_per_split[split]), ) frames, groups = [], [] for f, g in partial_results: frames.extend(f) groups.extend(g) cfg = Config(categories=[Category(name=n) for n in DETECTION_NAMES]) dataset = Dataset(frames=frames, groups=groups, config=cfg) return dataset
def load_gt(nusc, eval_split: str, verbose: bool = False) -> EvalBoxes: """ Loads ground truth boxes from DB. """ # Init. attribute_map = {a['token']: a['name'] for a in nusc.attribute} if verbose: print('Loading annotations for {} split from nuScenes version: {}'. format(eval_split, nusc.version)) # Read out all sample_tokens in DB. sample_tokens_all = [s['token'] for s in nusc.sample] assert len(sample_tokens_all) > 0, "Error: Database has no samples!" # Only keep samples from this split. splits = create_splits_scenes() # Check compatibility of split with nusc_version. version = nusc.version if eval_split in {'train', 'val', 'train_detect', 'train_track'}: assert version.endswith('trainval'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split in {'mini_train', 'mini_val'}: assert version.endswith('mini'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split == 'test': assert version.endswith('test'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) else: raise ValueError( 'Error: Requested split {} which this function cannot map to the correct NuScenes version.' .format(eval_split)) if eval_split == 'test': # Check that you aren't trying to cheat :). assert len(nusc.sample_annotation) > 0, \ 'Error: You are trying to evaluate on the test set but you do not have the annotations!' sample_tokens = [] for sample_token in sample_tokens_all: scene_token = nusc.get('sample', sample_token)['scene_token'] scene_record = nusc.get('scene', scene_token) if scene_record['name'] in splits[eval_split]: sample_tokens.append(sample_token) all_annotations = EvalBoxes() # Load annotations and filter predictions and annotations. for sample_token in tqdm.tqdm(sample_tokens): sample = nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] sample_boxes = [] for sample_annotation_token in sample_annotation_tokens: # Get label name in detection task and filter unused labels. sample_annotation = nusc.get('sample_annotation', sample_annotation_token) detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name is None: continue # Get attribute_name. attr_tokens = sample_annotation['attribute_tokens'] attr_count = len(attr_tokens) if attr_count == 0: attribute_name = '' elif attr_count == 1: attribute_name = attribute_map[attr_tokens[0]] else: raise Exception( 'Error: GT annotations must not have more than one attribute!' ) sample_boxes.append( EvalBox( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity(sample_annotation['token'])[:2], detection_name=detection_name, detection_score=-1.0, # GT samples do not have a score. attribute_name=attribute_name, num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'])) all_annotations.add_boxes(sample_token, sample_boxes) if verbose: print("Loaded ground truth annotations for {} samples.".format( len(all_annotations.sample_tokens))) return all_annotations
def _mock_submission(nusc, split) -> Dict[str, dict]: """ Creates "reasonable" submission (results and metadata) by looping through the full val-set, and adding 1 prediction per GT. Predictions will be permuted randomly along all axes. """ def random_class(category_name): class_names = [ 'barrier', 'bicycle', 'bus', 'car', 'construction_vehicle', 'motorcycle', 'pedestrian', 'traffic_cone', 'trailer', 'truck' ] tmp = category_to_detection_name(category_name) if tmp is not None and np.random.rand() < .9: return tmp else: return class_names[np.random.randint(0, 9)] def random_attr(name): """ This is the most straight-forward way to generate a random attribute. Not currently used b/c we want the test fixture to be back-wards compatible. """ # Get relevant attributes. rel_attributes = detection_name_to_rel_attributes(name) if len(rel_attributes) == 0: # Empty string for classes without attributes. return '' else: # Pick a random attribute otherwise. return rel_attributes[np.random.randint( 0, len(rel_attributes))] mock_meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } mock_results = {} splits = create_splits_scenes() val_samples = [] for sample in nusc.sample: if nusc.get('scene', sample['scene_token'])['name'] in splits[split]: val_samples.append(sample) for sample in tqdm(val_samples): sample_res = [] for ann_token in sample['anns']: ann = nusc.get('sample_annotation', ann_token) detection_name = random_class(ann['category_name']) sample_res.append({ 'sample_token': sample['token'], 'translation': list( np.array(ann['translation']) + 5 * (np.random.rand(3) - 0.5)), 'size': list( np.array(ann['size']) * 2 * (np.random.rand(3) + 0.5)), 'rotation': list( np.array(ann['rotation']) + ((np.random.rand(4) - 0.5) * .1)), 'velocity': list( nusc.box_velocity(ann_token)[:2] * (np.random.rand(3)[:2] + 0.5)), 'detection_name': detection_name, 'detection_score': random.random(), 'attribute_name': random_attr(detection_name) }) mock_results[sample['token']] = sample_res mock_submission = {'meta': mock_meta, 'results': mock_results} return mock_submission
def load_gt(nusc: NuScenes, eval_split: str, box_cls, verbose: bool = False) -> EvalBoxes: """ Loads ground truth boxes from DB. :param nusc: A NuScenes instance. :param eval_split: The evaluation split for which we load GT boxes. :param box_cls: Type of box to load, e.g. DetectionBox or TrackingBox. :param verbose: Whether to print messages to stdout. :return: The GT boxes. """ # Init. if box_cls == DetectionBox: attribute_map = {a['token']: a['name'] for a in nusc.attribute} if verbose: print('Loading annotations for {} split from nuScenes version: {}'. format(eval_split, nusc.version)) # Read out all sample_tokens in DB. sample_tokens_all = [s['token'] for s in nusc.sample] assert len(sample_tokens_all) > 0, "Error: Database has no samples!" # Only keep samples from this split. splits = create_splits_scenes() # Check compatibility of split with nusc_version. version = nusc.version if eval_split in {'train', 'val', 'train_detect', 'train_track'}: assert version.endswith('trainval'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split in {'mini_train', 'mini_val'}: assert version.endswith('mini'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) elif eval_split == 'test': assert version.endswith('test'), \ 'Error: Requested split {} which is not compatible with NuScenes version {}'.format(eval_split, version) else: raise ValueError( 'Error: Requested split {} which this function cannot map to the correct NuScenes version.' .format(eval_split)) if eval_split == 'test': # Check that you aren't trying to cheat :). assert len(nusc.sample_annotation) > 0, \ 'Error: You are trying to evaluate on the test set but you do not have the annotations!' sample_tokens = [] for sample_token in sample_tokens_all: scene_token = nusc.get('sample', sample_token)['scene_token'] scene_record = nusc.get('scene', scene_token) if scene_record['name'] in splits[eval_split]: sample_tokens.append(sample_token) all_annotations = EvalBoxes() # Load annotations and filter predictions and annotations. tracking_id_set = set() for sample_token in tqdm.tqdm(sample_tokens, leave=verbose): sample = nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] sample_boxes = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = nusc.get('sample_annotation', sample_annotation_token) if box_cls == DetectionBox: # Get label name in detection task and filter unused labels. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name is None: continue # Get attribute_name. attr_tokens = sample_annotation['attribute_tokens'] attr_count = len(attr_tokens) if attr_count == 0: attribute_name = '' elif attr_count == 1: attribute_name = attribute_map[attr_tokens[0]] else: raise Exception( 'Error: GT annotations must not have more than one attribute!' ) sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], detection_name=detection_name, detection_score=-1.0, # GT samples do not have a score. attribute_name=attribute_name)) elif box_cls == TrackingBox: # Use nuScenes token as tracking id. tracking_id = sample_annotation['instance_token'] tracking_id_set.add(tracking_id) # Get label name in detection task and filter unused labels. tracking_name = category_to_tracking_name( sample_annotation['category_name']) if tracking_name is None: continue sample_boxes.append( box_cls( sample_token=sample_token, translation=sample_annotation['translation'], size=sample_annotation['size'], rotation=sample_annotation['rotation'], velocity=nusc.box_velocity( sample_annotation['token'])[:2], num_pts=sample_annotation['num_lidar_pts'] + sample_annotation['num_radar_pts'], tracking_id=tracking_id, tracking_name=tracking_name, tracking_score=-1.0 # GT samples do not have a score. )) else: raise NotImplementedError('Error: Invalid box_cls %s!' % box_cls) all_annotations.add_boxes(sample_token, sample_boxes) if verbose: print("Loaded ground truth annotations for {} samples.".format( len(all_annotations.sample_tokens))) return all_annotations
def evaluate(split): params = read_params(FLAGS.param) nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True) sensor = 'LIDAR_TOP' kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } results = {} results_0_3 = {} detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FLAGS.graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) for node in od_graph_def.node: if 'BatchMultiClassNonMaxSuppression' in node.name: node.device = '/device:CPU:0' tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): config = tf.ConfigProto() config.gpu_options.allow_growth = False with tf.Session(graph=detection_graph, config=config) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') scene_splits = create_splits_scenes() # os.system('touch {}'.format()) inf_time_list = [] for scene in nusc.scene: if scene['name'] not in scene_splits[split]: continue current_sample_token = scene['first_sample_token'] last_sample_token = scene['last_sample_token'] sample_in_scene = True while sample_in_scene: if current_sample_token == last_sample_token: sample_in_scene = False sample = nusc.get('sample', current_sample_token) lidar_top_data = nusc.get('sample_data', sample['data'][sensor]) # Get global pose and calibration data ego_pose = nusc.get('ego_pose', lidar_top_data['ego_pose_token']) calib_sensor = nusc.get('calibrated_sensor', lidar_top_data['calibrated_sensor_token']) ego_to_global = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation'])) lidar_to_ego = transform_matrix(calib_sensor['translation'], Quaternion(calib_sensor['rotation'])) # Read input data filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0] image_stacked, det_mask, image_ground, image_zmax = read_images(FLAGS.data, filename_prefix) # Inference start_time = time.time() (boxes_aligned, boxes_inclined, scores, classes, num) = sess.run( [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_stacked}) inf_time = time.time() - start_time print('Inference time:', inf_time) inf_time_list.append(inf_time) # Evaluate object detection label_map = label_map_util.load_labelmap(FLAGS.label_map) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10, use_display_name=True) category_index = label_map_util.create_category_index(categories) boxes = [] boxes_0_3 = [] scores = np.squeeze(scores) for i in range(scores.shape[0]): if scores[i] > .230: object_class = category_index[int(np.squeeze(classes)[i])]['name'] box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]), tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax, object_class, scores[i], params) # Transformation box coordinate system to nuscenes lidar coordinate system box.rotate(kitti_to_nu_lidar) # Transformation nuscenes lidar coordinate system to ego vehicle frame box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3])) box.translate(lidar_to_ego[:3, 3]) # Transformation ego vehicle frame to global frame box.rotate(Quaternion(matrix=ego_to_global[:3, :3])) box.translate(ego_to_global[:3, 3]) boxes.append(box) for i in range(scores.shape[0]): if scores[i] > .225: object_class = category_index[int(np.squeeze(classes)[i])]['name'] box = calculate_object_box(tuple(np.squeeze(boxes_aligned)[i]), tuple(np.squeeze(boxes_inclined)[i]), image_ground, image_zmax, object_class, scores[i], params) # Transformation box coordinate system to nuscenes lidar coordinate system box.rotate(kitti_to_nu_lidar) # Transformation nuscenes lidar coordinate system to ego vehicle frame box.rotate(Quaternion(matrix=lidar_to_ego[:3, :3])) box.translate(lidar_to_ego[:3, 3]) # Transformation ego vehicle frame to global frame box.rotate(Quaternion(matrix=ego_to_global[:3, :3])) box.translate(ego_to_global[:3, 3]) boxes_0_3.append(box) # Convert boxes to nuScenes detection challenge result format. sample_results = [box_to_sample_result(current_sample_token, box) for box in boxes] results[current_sample_token] = sample_results sample_results_0_3 = [box_to_sample_result(current_sample_token, box) for box in boxes_0_3] results_0_3[current_sample_token] = sample_results_0_3 current_sample_token = sample['next'] f_info = open( os.path.join(FLAGS.output, 'inference_time.txt'),'w+') average_inf_time = sum(inf_time_list) / float(len(inf_time_list)) f_info.write('average time:{}\n'.format(average_inf_time)) f_info.write(str(inf_time_list)) submission = { 'meta': meta, 'results': results } submission_path = os.path.join(FLAGS.output, 'submission_0_3nn30.json') with open(submission_path, 'w') as f: json.dump(submission, f, indent=2) submission_0_3 = { 'meta': meta, 'results': results_0_3 } submission_path_0_3 = os.path.join(FLAGS.output, 'submission_0_3nn25.json') with open(submission_path_0_3, 'w') as f: json.dump(submission_0_3, f, indent=2)
def __init__(self, mode='test', limiter=0): name = 'nuscenes' db.__init__(self, name) self._train_scenes = [] self._val_scenes = [] self._test_scenes = [] self._train_index = [] self._val_index = [] self._test_index = [] self._devkit_path = self._get_default_path() self._mode = mode self._nusc = None self._scene_sel = True #For now one large cache file is OK, but ideally just take subset of actually needed data and cache that. No need to load nusc every time. self._classes = ( 'dontcare', # always index 0 'vehicle.car', 'human.pedestrian', 'vehicle.bicycle') self.config = {'cleanup': True, 'matlab_eval': False, 'rpn_file': None} self._class_to_ind = dict( list(zip(self.classes, list(range(self.num_classes))))) self._val_scenes = create_splits_scenes()['val'] self._train_scenes = create_splits_scenes()['train'] self._test_scenes = create_splits_scenes()['test'] #TODO: create custom scene list #print(self._train_scenes) for rec in self.nusc.sample_data: if (rec['channel'] == 'CAM_FRONT' and rec['is_key_frame'] is True): rec_tmp = deepcopy(rec) #Reverse lookup, getting the overall sample from the picture sample token, to get the scene information. scene_name = self.nusc.get( 'scene', self.nusc.get('sample', rec['sample_token'])['scene_token'])['name'] desc = self.nusc.get( 'scene', self.nusc.get('sample', rec['sample_token']) ['scene_token'])['description'].lower() if (self._scene_sel and 'night' not in desc and 'rain' not in desc and 'cones' not in desc): sample = self.nusc.get('sample', rec['sample_token']) rec_tmp['anns'] = sample['anns'] rec_tmp['lidar_token'] = sample['data']['LIDAR_TOP'] if (scene_name in self._train_scenes): self._train_index.append(rec_tmp) elif (scene_name in self._val_scenes): self._val_index.append(rec_tmp) elif (scene_name in self._train_scenes): self._test_index.append(rec_tmp) rand = SystemRandom() #Get global image info if (mode == 'train'): img_index = self._train_index rand.shuffle(self._val_index) elif (mode == 'val'): img_index = self._val_index elif (mode == 'test'): img_index = self._test_index self._imwidth = img_index[0]['width'] self._imheight = img_index[0]['height'] self._imtype = img_index[0]['fileformat'] rand = SystemRandom() rand.shuffle(img_index) if (limiter != 0): img_index = img_index[:limiter] if (mode == 'train'): self._train_index = img_index elif (mode == 'val'): self._val_index = img_index elif (mode == 'test'): self._test_index = img_index assert os.path.exists( self._devkit_path ), 'nuscenes dataset path does not exist: {}'.format(self._devkit_path)
def _mock_submission(nusc: NuScenes, split: str, add_errors: bool = False) -> Dict[str, dict]: """ Creates "reasonable" submission (results and metadata) by looping through the mini-val set, adding 1 GT prediction per sample. Predictions will be permuted randomly along all axes. :param nusc: NuScenes instance. :param split: Dataset split to use. :param add_errors: Whether to use GT or add errors to it. """ def random_class(category_name: str, _add_errors: bool = False) -> Optional[str]: # Alter 10% of the valid labels. class_names = sorted(TRACKING_NAMES) tmp = category_to_tracking_name(category_name) if tmp is None: return None else: if not _add_errors or np.random.rand() < .9: return tmp else: return class_names[np.random.randint(0, len(class_names) - 1)] def random_id(instance_token: str, _add_errors: bool = False) -> str: # Alter 10% of the valid ids to be a random string, which hopefully corresponds to a new track. if not _add_errors or np.random.rand() < .9: _tracking_id = instance_token + '_pred' else: _tracking_id = str(np.random.randint(0, sys.maxsize)) return _tracking_id mock_meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } mock_results = {} # Get all samples in the current evaluation split. splits = create_splits_scenes() val_samples = [] for sample in nusc.sample: if nusc.get('scene', sample['scene_token'])['name'] in splits[split]: val_samples.append(sample) # Prepare results. instance_to_score = dict() for sample in tqdm(val_samples, leave=False): sample_res = [] for ann_token in sample['anns']: ann = nusc.get('sample_annotation', ann_token) translation = np.array(ann['translation']) size = np.array(ann['size']) rotation = np.array(ann['rotation']) velocity = nusc.box_velocity(ann_token)[:2] tracking_id = random_id(ann['instance_token'], _add_errors=add_errors) tracking_name = random_class(ann['category_name'], _add_errors=add_errors) # Skip annotations for classes not part of the detection challenge. if tracking_name is None: continue # Skip annotations with 0 lidar/radar points. num_pts = ann['num_lidar_pts'] + ann['num_radar_pts'] if num_pts == 0: continue # If we randomly assign a score in [0, 1] to each box and later average over the boxes in the track, # the average score will be around 0.5 and we will have 0 predictions above that. # Therefore we assign the same scores to each box in a track. if ann['instance_token'] not in instance_to_score: instance_to_score[ann['instance_token']] = random.random() tracking_score = instance_to_score[ann['instance_token']] tracking_score = np.clip(tracking_score + random.random() * 0.3, 0, 1) if add_errors: translation += 4 * (np.random.rand(3) - 0.5) size *= (np.random.rand(3) + 0.5) rotation += (np.random.rand(4) - 0.5) * .1 velocity *= np.random.rand(3)[:2] + 0.5 sample_res.append({ 'sample_token': sample['token'], 'translation': list(translation), 'size': list(size), 'rotation': list(rotation), 'velocity': list(velocity), 'tracking_id': tracking_id, 'tracking_name': tracking_name, 'tracking_score': tracking_score }) mock_results[sample['token']] = sample_res mock_submission = { 'meta': mock_meta, 'results': mock_results } return mock_submission
def visualize(split): nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True) sensor = 'LIDAR_TOP' # pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() # with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: # text_format.Merge(f.read(), pipeline_config) # if not pipeline_config.model.HasField('ssd_augmentation'): # raise ValueError('Model with ssd_augmentation estimation is required.') detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FLAGS.graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) for node in od_graph_def.node: if 'BatchMultiClassNonMaxSuppression' in node.name: node.device = '/device:CPU:0' tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') scene_splits = create_splits_scenes() for scene in nusc.scene: if scene['name'] not in vis_set: continue scene_dir = os.path.join(FLAGS.output, scene['name']) os.system('mkdir {}'.format(scene_dir)) folder_inverse = os.path.join(scene_dir, 'inverse') os.system('mkdir {}'.format(folder_inverse)) current_sample_token = scene['first_sample_token'] last_sample_token = scene['last_sample_token'] # first_sample = nusc.get('sample', scene['first_sample_token']) # current_token = first_sample['data'][sensor] sample_in_scene = True first_inference = True while sample_in_scene: # while current_token: if current_sample_token == last_sample_token: sample_in_scene = False sample = nusc.get('sample', current_sample_token) lidar_top_data = nusc.get('sample_data', sample['data'][sensor]) if first_inference: # current_token = lidar_top_data['next'] # if use_10hz_capture_frequency: # if current_token: # lidar_top_data_next = nusc.get('sample_data', current_token) # current_token = lidar_top_data_next['next'] current_sample_token = sample['next'] first_inference = False continue # Read input data filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0] image_stacked, det_mask, observation_mask, z_mask = read_images(FLAGS.data, FLAGS.data_beliefs, filename_prefix) # Inference start_time = time.time() (boxes_aligned, boxes_inclined, scores, classes, num) = sess.run( [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes, num_detections], feed_dict={image_tensor: image_stacked}) print('Inference time:', time.time() - start_time) # Visualize object detection and scene flow label_map = label_map_util.load_labelmap(FLAGS.label_map) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Create grid map to visualize image_vis = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8) image_vis_inv = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8) image_vis_color = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8) * 255 # todo image_vis_color_inv = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8) * 255 # todo # for (v, u), val in np.ndenumerate(observation_mask): # if val: # image_vis[v, u, :] = 50 # image_vis_inv = cv2.bitwise_not(image_vis) # for (v, u), val in np.ndenumerate(det_mask): # if val: # image_vis[v, u] = 255 # image_vis_inv[v, u] = 0 # print("z_mask") # print(z_mask) # print("observation_mask") # print(observation_mask) for v, u in itertools.product(range(image_stacked.shape[1]), range(image_stacked.shape[2])): image_vis_color[v, u, 0] = observation_mask[v, u] * 5 image_vis_color[v, u, 1] = det_mask[v, u] * 10 image_vis_color[v, u, 2] = z_mask[v, u] image_vis_color_inv = cv2.bitwise_not(image_vis_color) for (v, u), val in np.ndenumerate(det_mask): if val: image_vis[v, u] = 255 image_vis_inv[v, u] = 0 image_vis = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8) for (v, u), val in np.ndenumerate(observation_mask): if val: image_vis[v, u, :] = 50 image_vis_inv = cv2.bitwise_not(image_vis) for (v, u), val in np.ndenumerate(det_mask): if val: image_vis[v, u] = 255 image_vis_inv[v, u] = 0 # Draw inclined detection box vis_util.visualize_boxes_and_labels_on_image_array( image_vis_color, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.3, use_normalized_coordinates=True, line_thickness=3) vis_util.visualize_boxes_and_labels_on_image_array( image_vis_color_inv, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.3, use_normalized_coordinates=True, line_thickness=3) vis_util.visualize_boxes_and_labels_on_image_array( image_vis, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.3, use_normalized_coordinates=True, line_thickness=3) vis_util.visualize_boxes_and_labels_on_image_array( image_vis_inv, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.3, use_normalized_coordinates=True, line_thickness=3) # Save image print(filename_prefix.split('/')[-1]) output_path = os.path.join(scene_dir, filename_prefix.split('/')[-1] + '.png') cv2.imwrite(output_path, image_vis) output_path_inv = os.path.join(folder_inverse, filename_prefix.split('/')[-1] + '.png') # output_color_path = os.path.join(folder_color,filename_prefix.split('/')[-1] + '.png') # output_color_path_inv = os.path.join(folder_color_inverse, filename_prefix.split('/')[-1] + '.png') cv2.imwrite(output_path_inv, image_vis_inv) # cv2.imwrite(output_color_path, image_vis_color) # cv2.imwrite(output_color_path_inv, image_vis_color_inv) current_sample_token = sample['next']
def __init__(self, nusc, nusc_split, kwargs, seed=0): super(nuScenesDataset, self).__init__() # set seed for split np.random.seed(seed) self.nusc = nusc self.nusc_root = self.nusc.dataroot self.nusc_can = NuScenesCanBus(dataroot=self.nusc_root) self.nusc_split = nusc_split # number of input samples self.n_input = kwargs["n_input"] # number of sampled trajectories self.n_samples = kwargs["n_samples"] # number of output samples self.n_output = kwargs["n_output"] assert(self.n_output == 7) # self.train_on_all_sweeps = kwargs["train_on_all_sweeps"] # scene-0419 does not have vehicle monitor data blacklist = [419] + self.nusc_can.can_blacklist # NOTE: use the official split (minus the ones in the blacklist) if "scene_token" in kwargs and kwargs["scene_token"] != "": scene = self.nusc.get("scene", kwargs["scene_token"]) scenes = [scene] else: scene_splits = create_splits_scenes(verbose=False) scene_names = scene_splits[self.nusc_split] scenes = [] for scene in self.nusc.scene: scene_name = scene["name"] scene_no = int(scene_name[-4:]) if (scene_name in scene_names) and (scene_no not in blacklist): scenes.append(scene) # list all sample data self.valid_index = [] self.flip_flags = [] self.scene_tokens = [] self.sample_data_tokens = [] for scene in scenes: scene_token = scene["token"] # location log = self.nusc.get("log", scene["log_token"]) # flip x axis if in left-hand traffic (singapore) flip_flag = True if log["location"].startswith("singapore") else False # record the token of every key frame start_index = len(self.sample_data_tokens) first_sample = self.nusc.get("sample", scene["first_sample_token"]) sample_data_token = first_sample["data"]["LIDAR_TOP"] while sample_data_token != "": sample_data = self.nusc.get("sample_data", sample_data_token) if (self.nusc_split == "train" and self.train_on_all_sweeps) or (sample_data["is_key_frame"]): self.flip_flags.append(flip_flag) self.scene_tokens.append(scene_token) self.sample_data_tokens.append(sample_data_token) sample_data_token = sample_data["next"] end_index = len(self.sample_data_tokens) # NOTE: make sure we have enough number of sweeps for input and output if self.nusc_split == "train" and self.train_on_all_sweeps: valid_start_index = start_index + self.n_input - 1 valid_end_index = end_index - (self.n_output - 1) * self.N_SWEEPS_PER_SAMPLE else: # NEW: acknowledge the fact and skip the first sample n_input_samples = self.n_input // self.N_SWEEPS_PER_SAMPLE valid_start_index = start_index + n_input_samples valid_end_index = end_index - self.n_output + 1 self.valid_index += list(range(valid_start_index, valid_end_index)) self._n_examples = len(self.valid_index) print(f"{self.nusc_split}: {self._n_examples} valid samples over {len(scenes)} scenes")
def nuscenes_gt_to_kitti(self) -> None: """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) token_idx = 0 # Start tokens from 0. # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) scene_splits = create_splits_scenes(verbose=False) scene_to_log = { scene['name']: self.nusc.get('log', scene['log_token'])['logfile'] for scene in self.nusc.scene } logs = set() scenes = scene_splits[self.split] for scene in scenes: logs.add(scene_to_log[scene]) # print(len(scenes), len(logs)) split_mapping = {"train": "training", "val": "testing"} # Create output folders. label_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'label_2') calib_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'calib') image_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'image_2') lidar_folder = os.path.join(self.nusc_kitti_dir, split_mapping[self.split], 'velodyne') for folder in [label_folder, calib_folder, image_folder, lidar_folder]: if not os.path.isdir(folder): os.makedirs(folder) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) # sample_tokens = sample_tokens[:self.image_count] # print(len(sample_tokens)) tokens = [] if self.split == "train": split_file = [ os.path.join(self.nusc_kitti_dir, "train.txt"), os.path.join(self.nusc_kitti_dir, "val.txt") ] elif self.split == 'val': split_file = os.path.join(self.nusc_kitti_dir, "test.txt") # if os.path.isfile(split_file): # os.remove(split_file) if self.split == "train": cnt = 0 with open(split_file[0], "w") as f: for seq in list(self.sequence_mapping.keys())[:-150]: for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") cnt += 1 # print(cnt) cnt = 0 with open(split_file[1], "w") as f: for seq in list(self.sequence_mapping.keys())[-150:]: for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") cnt += 1 # print(cnt) elif self.split == "val": with open(split_file, "w") as f: for seq in self.sequence_mapping.keys(): for tk in self.sequence_mapping[seq]: f.write("%06d" % tk + "\n") for idx, sample_token in enumerate(sample_tokens): # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] sample_name = "%06d" % idx # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get( 'calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # token = '%06d' % token_idx # Alternative to use KITTI names. token_idx += 1 # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(image_folder, sample_name + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(lidar_folder, sample_name + '.bin') assert not dst_lid_path.endswith('.pcd.bin') pcl = LidarPointCloud.from_file(src_lid_path) # pcl, _ = LidarPointCloud.from_file_multisweep_future(self.nusc, sample, self.lidar_name, self.lidar_name, nsweeps=5) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. tokens.append(sample_token) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. kitti_transforms[ 'R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified. kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(calib_folder, sample_name + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(label_folder, sample_name + '.txt') if os.path.exists(label_path): # print('Skipping existing file: %s' % label_path) continue # else: # print('Writing file: %s' % label_path) objects = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 obj = dict() # Convert nuScenes category to nuScenes detection challenge category. obj["detection_name"] = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if obj["detection_name"] is None or obj[ "detection_name"] not in CLASS_MAP.keys(): continue obj["detection_name"] = CLASS_MAP[obj["detection_name"]] # Convert from nuScenes to KITTI box format. obj["box_cam_kitti"] = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = project_to_2d(obj["box_cam_kitti"], p_left_kitti, imsize[1], imsize[0]) if bbox_2d is None: continue obj["bbox_2d"] = bbox_2d["bbox"] obj["truncated"] = bbox_2d["truncated"] # Set dummy score so we can use this file as result. obj["box_cam_kitti"].score = 0 v = np.dot(obj["box_cam_kitti"].rotation_matrix, np.array([1, 0, 0])) rot_y = -np.arctan2(v[2], v[0]) obj["alpha"] = -np.arctan2( obj["box_cam_kitti"].center[0], obj["box_cam_kitti"].center[2]) + rot_y obj["depth"] = np.linalg.norm( np.array(obj["box_cam_kitti"].center[:3])) objects.append(obj) objects = postprocessing(objects, imsize[1], imsize[0]) with open(label_path, "w") as label_file: for obj in objects: # Convert box to output string format. output = box_to_string(name=obj["detection_name"], box=obj["box_cam_kitti"], bbox_2d=obj["bbox_2d"], truncation=obj["truncated"], occlusion=obj["occluded"], alpha=obj["alpha"]) label_file.write(output + '\n')
def process_data(data_path, version, output_path, val_split): nusc = NuScenes(version=version, dataroot=data_path, verbose=True) splits = create_splits_scenes() train_scenes, val_scenes = train_test_split( splits['train' if 'mini' not in version else 'mini_train'], test_size=val_split) train_scene_names = splits['train' if 'mini' not in version else 'mini_train'] val_scene_names = [] #val_scenes test_scene_names = splits['val' if 'mini' not in version else 'mini_val'] ns_scene_names = dict() ns_scene_names['train'] = train_scene_names ns_scene_names['val'] = val_scene_names ns_scene_names['test'] = test_scene_names for data_class in ['train', 'val', 'test']: env = Environment(node_type_list=['VEHICLE', 'PEDESTRIAN'], standardization=standardization) attention_radius = dict() attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.PEDESTRIAN)] = 10.0 attention_radius[(env.NodeType.PEDESTRIAN, env.NodeType.VEHICLE)] = 20.0 attention_radius[(env.NodeType.VEHICLE, env.NodeType.PEDESTRIAN)] = 20.0 attention_radius[(env.NodeType.VEHICLE, env.NodeType.VEHICLE)] = 30.0 env.attention_radius = attention_radius env.robot_type = env.NodeType.VEHICLE scenes = [] for ns_scene_name in tqdm(ns_scene_names[data_class]): ns_scene = nusc.get( 'scene', nusc.field2token('scene', 'name', ns_scene_name)[0]) scene_id = int(ns_scene['name'].replace('scene-', '')) if scene_id in scene_blacklist: # Some scenes have bad localization continue scene = process_scene(ns_scene, env, nusc, data_path) if scene is not None: if data_class == 'train': scene.augmented = list() angles = np.arange(0, 360, 15) for angle in angles: scene.augmented.append(augment_scene(scene, angle)) scenes.append(scene) print(f'Processed {len(scenes):.2f} scenes') env.scenes = scenes if len(scenes) > 0: mini_string = '' if 'mini' in version: mini_string = '_mini' data_dict_path = os.path.join( output_path, 'nuScenes_' + data_class + mini_string + '_full.pkl') with open(data_dict_path, 'wb') as f: dill.dump(env, f, protocol=dill.HIGHEST_PROTOCOL) print('Saved Environment!') global total global curv_0_2 global curv_0_1 print(f"Total Nodes: {total}") print(f"Curvature > 0.1 Nodes: {curv_0_1}") print(f"Curvature > 0.2 Nodes: {curv_0_2}") total = 0 curv_0_1 = 0 curv_0_2 = 0
def main(): input_root, output_root, do_trajectories, do_maps = get_command_line_args() print("Preprocessing Script for nuScenes Dataset.") print("Trajectories: {}, Maps: {}".format("Y" if do_trajectories else "N", "Y" if do_maps else "N")) if do_trajectories: nusc = NuScenes(version='v1.0-trainval', dataroot=input_root) name2ind = {} # Maps "scene-name" to nusc.scene(list) index. for ind, member in enumerate(nusc.scene): name2ind[member['name']] = ind token2attr = {} # Maps attribute_token to attribute string. for attribute in nusc.attribute: token2attr[attribute['token']] = attribute['name'] splits = create_splits_scenes() if do_maps: from nuscenes.map_expansion.map_api import NuScenesMap city_list = ['singapore-onenorth', 'singapore-hollandvillage', 'singapore-queenstown', 'boston-seaport'] for city_name in city_list: scale_h = scale_w = 2 print("Generating maps for {:s}.".format(city_name)) mask_path = P(output_root).joinpath('raw_map', '{:s}_mask.pkl'.format(city_name)) dt_path = P(output_root).joinpath('raw_map', '{:s}_dt.pkl'.format(city_name)) mask_vis_path = P(output_root).joinpath('raw_map_visualization', '{:s}_mask_vis.png'.format(city_name)) dt_vis_path = P(output_root).joinpath('raw_map_visualization', '{:s}_dt_vis.png'.format(city_name)) mask_vis_path.parent.mkdir(parents=True, exist_ok=True) mask_path.parent.mkdir(parents=True, exist_ok=True) nusc_map = NuScenesMap(input_root, city_name) print("Calculating a map mask with scale_h: {:d}, scale_w: {:d}... ".format(scale_h, scale_w), end="", flush=True) map_mask = get_drivable_area_mask(nusc_map, scale_h=2, scale_w=2) print("Done.") print("Calculating Signed Distance Transform... ", end="", flush=True) image = map_mask.astype(np.int32) invert_image = 1-image dt = np.where(invert_image, -distance_transform_edt(invert_image), distance_transform_edt(image)) print("Done.") print("Saving Results... ", end="", flush=True) dump({'map': map_mask, 'scale_h': 2, 'scale_w': 2}, mask_path) dump({'map': dt, 'scale_h': 2, 'scale_w': 2}, dt_path) mask_vis = (map_mask*255).astype(np.uint8) dt_max = dt.max() dt_min = dt.min() dt_vis = ((dt - dt_min)/(dt_max - dt_min)*255).astype(np.uint8) cv2.imwrite(str(mask_vis_path), mask_vis) cv2.imwrite(str(dt_vis_path), dt_vis) print("Done. Saved {:s}, {:s}, {:s}, and {:s}.".format(str(mask_path), str(mask_vis_path), str(dt_path), str(dt_vis_path))) if do_trajectories: for partition in ['train', 'train_val', 'val']: print("Generating Trajectories for {:s} set.".format(partition)) if 'train' in partition: scene_list = splits['train'] if partition == "train": scene_list = scene_list[NUM_IN_TRAIN_VAL:] if partition == "train_val": scene_list = scene_list[:NUM_IN_TRAIN_VAL] else: scene_list = splits['val'] pool = Pool(cpu_count()) callback = Counting_Callback(task_name="Trajectory Imputation & Smoothing", num_data=len(scene_list)) for name in scene_list: """ Generate a raw DataFrame object for each scene_name. Filter object categories other than "human" and "vehicle". Perform Kalman Smoothing and/or rule-based Imputation. """ ind = name2ind[name] scene = nusc.scene[ind] log = nusc.get('log', scene['log_token']) location = log['location'] data_dict = annotation_dict() sample_token = scene['first_sample_token'] last_sample_token = scene['last_sample_token'] frame = 0 passed_last = False while not passed_last: sample_data = nusc.get("sample", sample_token) timestamp = sample_data["timestamp"] # Gather pose token from LiDAR since it has timestamp synced with sample_data. lidar_data = nusc.get("sample_data", sample_data["data"]["LIDAR_TOP"]) ego_pose_token = lidar_data['ego_pose_token'] ego_pose_data = nusc.get("ego_pose", ego_pose_token) ego_translation = ego_pose_data["translation"] ego_rotation = ego_pose_data["rotation"] # Append Ego Motion Data data_dict.append(location, sample_token, '00000000000000000000000000000000', 'vehicle.ego', 'ego', timestamp, frame, ego_translation, ego_rotation) for anns_token in sample_data['anns']: anns_data = nusc.get("sample_annotation", anns_token) instance_token = anns_data['instance_token'] instance_category = anns_data['category_name'] instance_attributes = [token2attr[token] for token in anns_data['attribute_tokens']] instance_attributes = ", ".join(instance_attributes) instance_translation = anns_data["translation"] instance_rotation = anns_data["rotation"] # Append Instance Motion Data data_dict.append(location, sample_token, instance_token, instance_category, instance_attributes, timestamp, frame, instance_translation, instance_rotation) # goto next sample if sample_token == last_sample_token or len(sample_data['next']) == 0: passed_last = True else: sample_token = sample_data['next'] frame += 1 df = pd.DataFrame.from_dict(data_dict) # Generate a DataFrame pool.apply_async(kalman_smoother, (df.copy(), name), callback=callback) # Perform Kalman Smoothing pool.close() pool.join() # Get Kalman Smoothed results and sort w.r.t. scene_anme. smoothed_trajectories = callback.get_results() smoothed_trajectories.sort(key=lambda x: x[1]) pool = Pool(cpu_count()) callback = Counting_Callback(task_name="Trajectory Chopping & Sample Generation", num_data=float('inf')) num_data = 0 for df, scene_name in smoothed_trajectories: """ Chop a smoothed DataFrame into multiple samples (~33 samples per scene) such that each sample spans 5 seconds where the reference frame is set at the 2 second's frame. Then, split the sample to obsv (0~2 seconds) and pred (2~5 seconds) files. """ scn_code = int(scene_name.split('-')[-1]) frames = df.FRAME.to_list() initial_frame = frames[0] last_frame = frames[-1] for ref_frame in range(initial_frame+3, last_frame-5): obsv_path = P(output_root).joinpath(partition, 'observation', '{:04d}-{:03d}.pkl'.format(scn_code, ref_frame)) obsv_path.parent.mkdir(parents=True, exist_ok=True) pred_path = P(output_root).joinpath(partition, 'prediction', '{:04d}-{:03d}.pkl'.format(scn_code, ref_frame)) pred_path.parent.mkdir(parents=True, exist_ok=True) pool.apply_async(generate_trajectories, (df.copy(), ref_frame, obsv_path, pred_path), callback=callback) # generate_trajectories(df.copy(), ref_frame, obsv_path, pred_path) num_data += 1 callback.set_num_data(num_data) pool.close() pool.join() print("Saved {:d} {:s} samples at {:s}.".format(num_data, partition, str(P(output_root).joinpath(partition)))) # Create train_all set using symbolic link. print("Making symlinks to form train_all split... ", end="", flush=True) trainall_dirname = 'train_all' trainall_obsv_path = P(output_root).joinpath('{:s}/observation'.format(trainall_dirname)) trainall_obsv_path.mkdir(parents=True, exist_ok=True) trainall_pred_path = P(output_root).joinpath('{:s}/prediction'.format(trainall_dirname)) trainall_pred_path.mkdir(parents=True, exist_ok=True) train_path = P(output_root).joinpath('train') train_obsv_pkl = list(train_path.glob('observation/*.pkl')) train_pred_pkl = list(train_path.glob('prediction/*.pkl')) trainval_path = P(output_root).joinpath('train_val') trainval_obsv_pkl = list(trainval_path.glob('observation/*.pkl')) trainval_pred_pkl = list(trainval_path.glob('prediction/*.pkl')) obsv_pkl_list = train_obsv_pkl + trainval_obsv_pkl pred_pkl_list = train_pred_pkl + trainval_pred_pkl for obsv_pkl, pred_pkl in zip(obsv_pkl_list, pred_pkl_list): obsv_filename, obsv_split = obsv_pkl.name, obsv_pkl.parent.parent.stem pred_filename, pred_split = pred_pkl.name, pred_pkl.parent.parent.stem obsv_relpath = P('../../{:s}/observation/'.format(obsv_split)).joinpath(obsv_filename) obsv_link = trainall_obsv_path.joinpath(obsv_filename) obsv_link.symlink_to(obsv_relpath) pred_relpath = P('../../{:s}/prediction/'.format(pred_split)).joinpath(pred_filename) pred_link = trainall_pred_path.joinpath(pred_filename) pred_link.symlink_to(pred_relpath) print(" Done.")
def create_tracks(all_boxes: EvalBoxes, nusc: NuScenes, eval_split: str, gt: bool) \ -> Dict[str, Dict[int, List[TrackingBox]]]: """ Returns all tracks for all scenes. Samples within a track are sorted in chronological order. This can be applied either to GT or predictions. :param all_boxes: Holds all GT or predicted boxes. :param nusc: The NuScenes instance to load the sample information from. :param eval_split: The evaluation split for which we create tracks. :param gt: Whether we are creating tracks for GT or predictions :return: The tracks. """ # Only keep samples from this split. splits = create_splits_scenes() scene_tokens = set() for sample_token in all_boxes.sample_tokens: scene_token = nusc.get('sample', sample_token)['scene_token'] scene = nusc.get('scene', scene_token) if scene['name'] in splits[eval_split]: scene_tokens.add(scene_token) # Tracks are stored as dict {scene_token: {timestamp: List[TrackingBox]}}. tracks = defaultdict(lambda: defaultdict(list)) # Init all scenes and timestamps to guarantee completeness. for scene_token in scene_tokens: # Init all timestamps in this scene. scene = nusc.get('scene', scene_token) cur_sample_token = scene['first_sample_token'] while True: # Initialize array for current timestamp. cur_sample = nusc.get('sample', cur_sample_token) tracks[scene_token][cur_sample['timestamp']] = [] # Abort after the last sample. if cur_sample_token == scene['last_sample_token']: break # Move to next sample. cur_sample_token = cur_sample['next'] # Group annotations wrt scene and timestamp. for sample_token in all_boxes.sample_tokens: sample_record = nusc.get('sample', sample_token) scene_token = sample_record['scene_token'] tracks[scene_token][sample_record['timestamp']] = all_boxes.boxes[sample_token] # Replace box scores with track score (average box score). This only affects the compute_thresholds method and # should be done before interpolation to avoid diluting the original scores with interpolated boxes. if not gt: for scene_id, scene_tracks in tracks.items(): # For each track_id, collect the scores. track_id_scores = defaultdict(list) for timestamp, boxes in scene_tracks.items(): for box in boxes: track_id_scores[box.tracking_id].append(box.tracking_score) # Compute average scores for each track. track_id_avg_scores = {} for tracking_id, scores in track_id_scores.items(): track_id_avg_scores[tracking_id] = np.mean(scores) # Apply average score to each box. for timestamp, boxes in scene_tracks.items(): for box in boxes: box.tracking_score = track_id_avg_scores[box.tracking_id] # Interpolate GT and predicted tracks. for scene_token in tracks.keys(): tracks[scene_token] = interpolate_tracks(tracks[scene_token]) if not gt: # Make sure predictions are sorted in in time. (Always true for GT). tracks[scene_token] = defaultdict(list, sorted(tracks[scene_token].items(), key=lambda kv: kv[0])) return tracks
imgs = [] # Stores image paths img_detections = [] # Stores detections for each image index file1 = open('videos_tokens.txt') tokens_list = file1.readlines() file1.close() valid_channels = [ 'CAM_FRONT', 'CAM_FRONT_LEFT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT', 'CAM_BACK', 'CAM_BACK_RIGHT', ] lll = [] splits = create_splits_scenes() for camera_channel in valid_channels: for token in tokens_list: first_time = True Dict = {} current_recs = {} num_objects = 0 scene_token = token[:-1] scene_record = nusc.get('scene', scene_token) scene_rec = scene_record scene = nusc.get('scene', scene_token) scene_name = scene_record['name'] scene_id = int(scene_name.replace('scene-', '')) if scene_name in splits['val']: spl = '/test/'
def create_nuscenes_dataset(version, output_path, mode='train'): """ For each video, we store a sequence of data with the following information: - image (H, W, 3) jpg - instance segmentation (H, W) np.array<np.uint8> with values from [0, MAX_INSTANCES-1]. Note that the instances are not aligned with position/velocity i.e. id 1 in instance segmentation corresponds to element 0 in position/velocity when including more classes, we can store a 4D tensor (N_CLASSES, H, W) - position (MAX_INSTANCES, 3) np.array - velocity (MAX_INSTANCES, 3) np.array """ ## Yaw angle different on camera: https://github.com/nutonomy/nuscenes-devkit/issues/21 # Load Mask R-CNN # update the config options with the config file cfg.merge_from_file(MASK_RCNN_CONFIG_FILE) # manual override some options # cfg.merge_from_list(['MODEL.DEVICE', 'cpu']) mask_rcnn = COCODemo( cfg, confidence_threshold=0.8, ) # Load NuScenes nusc = NuScenes(version=version, dataroot=NUSCENES_ROOT, verbose=True) scene_splits = create_splits_scenes() print('Begin iterating over Nuscenes') print('-' * 30) # Loop over dataset for scene in nusc.scene: # Ensure the scene belongs to the split if scene['name'] not in scene_splits[mode]: continue scene_path = os.path.join(output_path, mode, scene['name']) print('scene_path: {}'.format(scene_path)) os.makedirs(scene_path, exist_ok=True) t = 0 sample_token = scene['first_sample_token'] while sample_token: print('Image {}'.format(t)) sample = nusc.get('sample', sample_token) data = match_instance_seg_and_bbox(nusc, mask_rcnn, sample) if data is not None: data['image'].save(os.path.join(scene_path, '{:04d}_image_tmp.jpg'.format(t))) np.save(os.path.join(scene_path, '{:04d}_instance_seg_tmp.npy'.format(t)), data['instance_seg']) np.save(os.path.join(scene_path, '{:04d}_position_tmp.npy'.format(t)), data['position']) np.save(os.path.join(scene_path, '{:04d}_velocity_tmp.npy'.format(t)), data['velocity']) np.save(os.path.join(scene_path, '{:04d}_orientation_tmp.npy'.format(t)), data['orientation']) np.save(os.path.join(scene_path, '{:04d}_size_tmp.npy'.format(t)), data['size']) np.save(os.path.join(scene_path, '{:04d}_token_tmp.npy'.format(t)), data['token']) np.save(os.path.join(scene_path, '{:04d}_intrinsics_tmp.npy'.format(t)), data['intrinsics']) np.save(os.path.join(scene_path, '{:04d}_sample_token_tmp.npy'.format(t)), np.array([sample_token])) sample_token = sample['next'] t += 1 link_instance_ids(nusc, scene_path) print('------------------\n') print('Computing depth maps') print('-' * 30) # Compute depth map here. generate_depth(output_path, mode) print('Dataset saved.')