def main(): SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val'] if not os.path.exists(OUT_PATH): os.mkdir(OUT_PATH) for split in SPLITS: data_path = DATA_PATH + '{}/'.format(SPLITS[split]) nusc = NuScenes( version=SPLITS[split], dataroot=data_path, verbose=True) out_path = OUT_PATH + '{}.json'.format(split) categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))] ret = {'images': [], 'annotations': [], 'categories': categories_info, 'videos': [], 'attributes': ATTRIBUTE_TO_ID} num_images = 0 num_anns = 0 num_videos = 0 # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR. for sample in nusc.sample: scene_name = nusc.get('scene', sample['scene_token'])['name'] if not (split in ['mini', 'test']) and \ not (scene_name in SCENE_SPLITS[split]): continue if sample['prev'] == '': print('scene_name', scene_name) num_videos += 1 ret['videos'].append({'id': num_videos, 'file_name': scene_name}) frame_ids = {k: 0 for k in sample['data']} track_ids = {} # We decompose a sample into 6 images in our case. for sensor_name in sample['data']: if sensor_name in USED_SENSOR: image_token = sample['data'][sensor_name] image_data = nusc.get('sample_data', image_token) num_images += 1 # Complex coordinate transform. This will take time to understand. sd_record = nusc.get('sample_data', image_token) cs_record = nusc.get( 'calibrated_sensor', sd_record['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) global_from_car = transform_matrix(pose_record['translation'], Quaternion(pose_record['rotation']), inverse=False) car_from_sensor = transform_matrix( cs_record['translation'], Quaternion(cs_record['rotation']), inverse=False) trans_matrix = np.dot(global_from_car, car_from_sensor) _, boxes, camera_intrinsic = nusc.get_sample_data( image_token, box_vis_level=BoxVisibility.ANY) calib = np.eye(4, dtype=np.float32) calib[:3, :3] = camera_intrinsic calib = calib[:3] frame_ids[sensor_name] += 1 # image information in COCO format image_info = {'id': num_images, 'file_name': image_data['filename'], 'calib': calib.tolist(), 'video_id': num_videos, 'frame_id': frame_ids[sensor_name], 'sensor_id': SENSOR_ID[sensor_name], 'sample_token': sample['token'], 'trans_matrix': trans_matrix.tolist(), 'width': sd_record['width'], 'height': sd_record['height'], 'pose_record_trans': pose_record['translation'], 'pose_record_rot': pose_record['rotation'], 'cs_record_trans': cs_record['translation'], 'cs_record_rot': cs_record['rotation']} ret['images'].append(image_info) anns = [] for box in boxes: det_name = category_to_detection_name(box.name) if det_name is None: continue num_anns += 1 v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) box.translate(np.array([0, box.wlh[2] / 2, 0])) category_id = CAT_IDS[det_name] amodel_center = project_to_image( np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], np.float32).reshape(1, 3), calib)[0].tolist() sample_ann = nusc.get( 'sample_annotation', box.token) instance_token = sample_ann['instance_token'] if not (instance_token in track_ids): track_ids[instance_token] = len(track_ids) + 1 attribute_tokens = sample_ann['attribute_tokens'] attributes = [nusc.get('attribute', att_token)['name'] \ for att_token in attribute_tokens] att = '' if len(attributes) == 0 else attributes[0] if len(attributes) > 1: print(attributes) import pdb; pdb.set_trace() track_id = track_ids[instance_token] vel = nusc.box_velocity(box.token) # global frame vel = np.dot(np.linalg.inv(trans_matrix), np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist() # instance information in COCO format ann = { 'id': num_anns, 'image_id': num_images, 'category_id': category_id, 'dim': [box.wlh[2], box.wlh[0], box.wlh[1]], 'location': [box.center[0], box.center[1], box.center[2]], 'depth': box.center[2], 'occluded': 0, 'truncated': 0, 'rotation_y': yaw, 'amodel_center': amodel_center, 'iscrowd': 0, 'track_id': track_id, 'attributes': ATTRIBUTE_TO_ID[att], 'velocity': vel } bbox = KittiDB.project_kitti_box_to_image( copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900)) alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, camera_intrinsic[0, 2], camera_intrinsic[0, 0]) ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]] ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) ann['alpha'] = alpha anns.append(ann) # Filter out bounding boxes outside the image visable_anns = [] for i in range(len(anns)): vis = True for j in range(len(anns)): if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \ anns[j]['depth'] + max(anns[j]['dim']) / 2 and \ _bbox_inside(anns[i]['bbox'], anns[j]['bbox']): vis = False break if vis: visable_anns.append(anns[i]) else: pass for ann in visable_anns: ret['annotations'].append(ann) if DEBUG: img_path = data_path + image_info['file_name'] img = cv2.imread(img_path) img_3d = img.copy() for ann in visable_anns: bbox = ann['bbox'] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), (0, 0, 255), 3, lineType=cv2.LINE_AA) box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y']) box_2d = project_to_image(box_3d, calib) img_3d = draw_box_3d(img_3d, box_2d) pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib) pt_3d[1] += ann['dim'][0] / 2 print('location', ann['location']) print('loc model', pt_3d) pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib) pt_3d[1] += ann['dim'][0] / 2 print('loc ', pt_3d) cv2.imshow('img', img) cv2.imshow('img_3d', img_3d) cv2.waitKey() nusc.render_sample_data(image_token) plt.show() print('reordering images') images = ret['images'] video_sensor_to_images = {} for image_info in images: tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id'] if tmp_seq_id in video_sensor_to_images: video_sensor_to_images[tmp_seq_id].append(image_info) else: video_sensor_to_images[tmp_seq_id] = [image_info] ret['images'] = [] for tmp_seq_id in sorted(video_sensor_to_images): ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id] print('{} {} images {} boxes'.format( split, len(ret['images']), len(ret['annotations']))) print('out_path', out_path) json.dump(ret, open(out_path, 'w'))
class NuScenesDataset(Dataset): """ NuScenes dataset loader and producer """ def __init__(self, mode, split='training', img_list='trainval', is_training=True, workers_num=1): """ mode: 'loading', 'preprocessing' """ self.mode = mode self.dataset_dir = os.path.join(cfg.ROOT_DIR, cfg.DATASET.KITTI.BASE_DIR_PATH) self.max_sweeps = cfg.DATASET.NUSCENES.NSWEEPS self.is_training = is_training self.img_list = img_list self.workers_num = workers_num # cast labels from NuScenes name to useful name self.useful_cls_dict = { 'animal': 'ignore', 'human.pedestrian.personal_mobility': 'ignore', 'human.pedestrian.stroller': 'ignore', 'human.pedestrian.wheelchair': 'ignore', 'movable_object.debris': 'ignore', 'movable_object.pushable_pullable': 'ignore', 'static_object.bicycle_rack': 'ignore', 'vehicle.emergency.ambulance': 'ignore', 'vehicle.emergency.police': 'ignore', 'movable_object.barrier': 'barrier', 'vehicle.bicycle': 'bicycle', 'vehicle.bus.bendy': 'bus', 'vehicle.bus.rigid': 'bus', 'vehicle.car': 'car', 'vehicle.construction': 'construction_vehicle', 'vehicle.motorcycle': 'motorcycle', 'human.pedestrian.adult': 'pedestrian', 'human.pedestrian.child': 'pedestrian', 'human.pedestrian.construction_worker': 'pedestrian', 'human.pedestrian.police_officer': 'pedestrian', 'movable_object.trafficcone': 'traffic_cone', 'vehicle.trailer': 'trailer', 'vehicle.truck': 'truck' } # cast attribute to index self.attribute_idx_list = { 'vehicle.moving': 0, 'vehicle.stopped': 1, 'vehicle.parked': 2, 'cycle.with_rider': 3, 'cycle.without_rider': 4, 'pedestrian.sitting_lying_down': 5, 'pedestrian.standing': 6, 'pedestrian.moving': 7, 'default': -1, } self.idx_attribute_list = dict([ (v, k) for k, v in self.attribute_idx_list.items() ]) self.AttributeIdxLabelMapping = { "car": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "truck": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "bus": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "trailer": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "construction_vehicle": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "pedestrian": [ 'pedestrian.sitting_lying_down', 'pedestrian.standing', 'pedestrian.moving' ], "motorcycle": ['cycle.with_rider', 'cycle.without_rider', ''], "bicycle": ['cycle.with_rider', 'cycle.without_rider', ''], "traffic_cone": ['', '', ''], "barrier": ['', '', ''], } self.DefaultAttribute = { "car": "vehicle.parked", "pedestrian": "pedestrian.moving", "trailer": "vehicle.parked", "truck": "vehicle.parked", "bus": "vehicle.parked", "motorcycle": "cycle.without_rider", "construction_vehicle": "vehicle.parked", "bicycle": "cycle.without_rider", "barrier": "", "traffic_cone": "", } self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.idx2cls_dict = dict([(idx + 1, cls) for idx, cls in enumerate(self.cls_list)]) self.cls2idx_dict = dict([(cls, idx + 1) for idx, cls in enumerate(self.cls_list)]) self.sv_npy_path = os.path.join( cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, 'NuScenes', '{}_{}'.format(img_list, self.max_sweeps)) self.train_list = os.path.join(self.sv_npy_path, 'infos.pkl') self.voxel_generator = VoxelGenerator() self.test_mode = cfg.TEST.TEST_MODE if self.test_mode == 'mAP': self.evaluation = self.evaluate_map self.logger_and_select_best = self.logger_and_select_best_map elif self.test_mode == 'Recall': self.evaluation = self.evaluate_recall self.logger_and_select_best = self.logger_and_select_best_recall else: raise Exception('No other evaluation mode.') if mode == 'loading': # data loader with open(self.train_list, 'rb') as f: self.train_npy_list = pickle.load(f) self.sample_num = len(self.train_npy_list) if self.is_training: self.data_augmentor = DataAugmentor( 'NuScenes', workers_num=self.workers_num) elif mode == 'preprocessing': # preprocess raw data if img_list == 'train': self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-trainval') self.scenes = [ scene for scene in self.nusc.scene if scene['name'] in train_scene ] elif img_list == 'val': self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-trainval') self.scenes = [ scene for scene in self.nusc.scene if scene['name'] in val_scene ] else: # test self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-test') self.scenes = self.nusc.scene self.sample_data_token_list = OrderedDict() sample_num = 0 for scene in self.scenes: # static the sample num, and save all sample_data_token self.sample_data_token_list[scene['token']] = [] all_sample = self.nusc.field2token('sample', 'scene_token', scene['token']) sample_num += len(all_sample) for sample in all_sample: # all sample token sample = self.nusc.get('sample', sample) cur_token = sample['token'] cur_data_token = sample['data']['LIDAR_TOP'] self.sample_data_token_list[scene['token']].append( cur_data_token) self.sample_num = sample_num self.extents = cfg.DATASET.POINT_CLOUD_RANGE self.extents = np.reshape(self.extents, [3, 2]) if not os.path.exists(self.sv_npy_path): os.makedirs(self.sv_npy_path) # also calculate the mean size here self.cls_size_dict = dict([(cls, np.array([0, 0, 0], dtype=np.float32)) for cls in self.cls_list]) self.cls_num_dict = dict([(cls, 0) for cls in self.cls_list]) # the save path for MixupDB if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: self.mixup_db_cls_path = dict() self.mixup_db_trainlist_path = dict() self.mixup_db_class = cfg.TRAIN.AUGMENTATIONS.MIXUP.CLASS for cls in self.mixup_db_class: mixup_db_cls_path = os.path.join( cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, cfg.TRAIN.AUGMENTATIONS.MIXUP.SAVE_NUMPY_PATH, cfg.TRAIN.AUGMENTATIONS.MIXUP.PC_LIST, '{}'.format(cls)) mixup_db_trainlist_path = os.path.join( mixup_db_cls_path, 'train_list.txt') if not os.path.exists(mixup_db_cls_path): os.makedirs(mixup_db_cls_path) self.mixup_db_cls_path[cls] = mixup_db_cls_path self.mixup_db_trainlist_path[cls] = mixup_db_trainlist_path def __len__(self): return self.sample_num def load_samples(self, sample_idx, pipename): """ load data per thread """ pipename = int(pipename) biggest_label_num = 0 sample_dict = self.train_npy_list[sample_idx] points_path = sample_dict[maps_dict.KEY_POINT_CLOUD] sweeps = sample_dict[maps_dict.KEY_SWEEPS] sample_name = sample_dict[maps_dict.KEY_SAMPLE_NAME] cur_transformation_matrix = sample_dict[ maps_dict.KEY_TRANSFORMRATION_MATRIX] ts = sample_dict[maps_dict.KEY_TIMESTAMPS] / 1e6 # then first read points and stack points from multiple frame points = np.fromfile(points_path, dtype=np.float32) points = points.reshape((-1, 5)) points = cast_points_to_kitti(points) points[:, 3] /= 255 points[:, 4] = 0 sweep_points_list = [points] original_cur_sweep_points = points cur_sweep_points_num = points.shape[0] for sweep in sweeps: points_sweep = np.fromfile(sweep['lidar_path'], dtype=np.float32) points_sweep = points_sweep.reshape((-1, 5)) sweep_ts = sweep['timestamp'] / 1e6 points_sweep[:, 3] /= 255 points_sweep[:, :3] = points_sweep[:, :3] @ sweep[ 'sweep2lidar_rotation'].T points_sweep[:, :3] += sweep['sweep2lidar_translation'] points_sweep[:, 4] = ts - sweep_ts points_sweep = cast_points_to_kitti(points_sweep) sweep_points_list.append(points_sweep) if cfg.DATASET.NUSCENES.INPUT_FEATURE_CHANNEL == 4: points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]] else: points = np.concatenate(sweep_points_list, axis=0) # then read groundtruth file if have if self.is_training or cfg.TEST.WITH_GT: label_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D] label_boxes_3d = cast_box_3d_to_kitti_format(label_boxes_3d) label_classes_name = sample_dict[maps_dict.KEY_LABEL_CLASSES] label_classes = np.array([ self.cls2idx_dict[label_class] for label_class in label_classes_name ]) label_attributes = sample_dict[maps_dict.KEY_LABEL_ATTRIBUTES] label_velocity = sample_dict[ maps_dict.KEY_LABEL_VELOCITY] # [-1, 2] ry_cls_label, residual_angle = encode_angle2class_np( label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM) else: # not is_training and no_gt label_boxes_3d = np.zeros([1, 7], np.float32) label_classes = np.zeros([1], np.int32) label_attributes = np.zeros([1], np.int32) label_velocity = np.zeros([1, 2], np.float32) ry_cls_label = np.zeros([1], np.int32) residual_angle = np.zeros([1], np.float32) if self.is_training: # data augmentation points, label_boxes_3d, label_classes, label_attributes, label_velocity, cur_sweep_points_num = self.data_augmentor.nuscenes_forward( points, label_boxes_3d, label_classes, pipename, label_attributes, label_velocity, cur_sweep_points_num) ry_cls_label, residual_angle = encode_angle2class_np( label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM) cur_label_num = len(label_boxes_3d) # then randomly choose some points cur_sweep_points = points[:cur_sweep_points_num, :] # [-1, 4] other_sweep_points = points[cur_sweep_points_num:, :] # [-1, 4] if len(other_sweep_points) == 0: other_sweep_points = cur_sweep_points.copy() np.random.shuffle(cur_sweep_points) np.random.shuffle(other_sweep_points) input_sample_points, num_points_per_voxel = self.voxel_generator.generate_nusc( cur_sweep_points, other_sweep_points, cfg.DATASET.NUSCENE.MAX_CUR_SAMPLE_POINTS_NUM ) # points, [num_voxels, num_points, 5], sem_labels, [num_voxels, num_points] cur_sample_points = input_sample_points[:cfg.DATASET.NUSCENE. MAX_CUR_SAMPLE_POINTS_NUM] other_sample_points = input_sample_points[cfg.DATASET.NUSCENE. MAX_CUR_SAMPLE_POINTS_NUM:] biggest_label_num = max(biggest_label_num, cur_label_num) return biggest_label_num, input_sample_points, cur_sample_points, other_sample_points, label_boxes_3d, ry_cls_label, residual_angle, label_classes, label_attributes, label_velocity, sample_name, cur_transformation_matrix, sweeps, original_cur_sweep_points def load_batch(self, batch_size): perm = np.arange( self.sample_num).tolist() # a list indicates each data dp = DataFromList(perm, is_train=self.is_training, shuffle=self.is_training) dp = MultiProcessMapData(dp, self.load_samples, self.workers_num) use_list = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1] use_concat = [0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0] dp = BatchDataNuscenes(dp, batch_size, use_concat=use_concat, use_list=use_list) dp.reset_state() dp = dp.get_data() return dp # Preprocess data def preprocess_samples(self, cur_scene_key, sample_data_token): sample_dicts = [] biggest_label_num = 0 cur_sample_data = self.nusc.get('sample_data', sample_data_token) cur_sample_token = cur_sample_data['sample_token'] cur_sample = self.nusc.get('sample', cur_sample_token) ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token']) calibrated_sensor = self.nusc.get( 'calibrated_sensor', cur_sample_data['calibrated_sensor_token']) l2e_r = calibrated_sensor['rotation'] l2e_t = calibrated_sensor['translation'] e2g_r = ego_pose['rotation'] e2g_t = ego_pose['translation'] l2e_r_mat = Quaternion(l2e_r).rotation_matrix e2g_r_mat = Quaternion(e2g_r).rotation_matrix cur_timestamp = cur_sample['timestamp'] cur_transformation_matrix = { 'lidar2ego_translation': l2e_t, 'lidar2ego_rotation': l2e_r, 'ego2global_translation': e2g_t, 'ego2global_rotation': e2g_r, } # get point cloud in former 0.5 second sweeps = [] while len(sweeps) < self.max_sweeps: if not cur_sample_data['prev'] == '': # has next frame cur_sample_data = self.nusc.get('sample_data', cur_sample_data['prev']) cur_ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token']) cur_calibrated_sensor = self.nusc.get( 'calibrated_sensor', cur_sample_data['calibrated_sensor_token']) cur_lidar_path, cur_sweep_boxes, _ = self.nusc.get_sample_data( cur_sample_data['token']) sweep = { "lidar_path": cur_lidar_path, "sample_data_token": cur_sample_data['token'], "lidar2ego_translation": cur_calibrated_sensor['translation'], "lidar2ego_rotation": cur_calibrated_sensor['rotation'], "ego2global_translation": cur_ego_pose['translation'], "ego2global_rotation": cur_ego_pose['rotation'], "timestamp": cur_sample_data["timestamp"] } l2e_r_s = sweep["lidar2ego_rotation"] l2e_t_s = sweep["lidar2ego_translation"] e2g_r_s = sweep["ego2global_rotation"] e2g_t_s = sweep["ego2global_translation"] # sweep->ego->global->ego'->lidar l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T sweep["sweep2lidar_rotation"] = R.T # points @ R.T + T sweep["sweep2lidar_translation"] = T sweeps.append(sweep) else: # prev is none break # then load gt_boxes_3d if self.img_list in ['train', 'val'] and cfg.TEST.WITH_GT: cur_data_path, all_boxes, _ = self.nusc.get_sample_data( sample_data_token) # then first parse boxes labels locs = np.array([box.center for box in all_boxes]).reshape(-1, 3) sizes = np.array([box.wlh for box in all_boxes]).reshape(-1, 3) rots = np.array([ box.orientation.yaw_pitch_roll[0] for box in all_boxes ]).reshape(-1, 1) all_boxes_3d = np.concatenate([locs, sizes, -rots], axis=-1) annos_tokens = cur_sample['anns'] all_velocity = np.array([ self.nusc.box_velocity(ann_token)[:2] for ann_token in annos_tokens ]) # [-1, 2] for i in range(len(all_boxes)): velo = np.array([*all_velocity[i], 0.0]) velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T all_velocity[i] = velo[:2] # [-1, 2] attribute_tokens = [ self.nusc.get('sample_annotation', ann_token)['attribute_tokens'] for ann_token in annos_tokens ] all_attribute = [] for attribute_token in attribute_tokens: if len(attribute_token) == 0: all_attribute.append([]) else: all_attribute.append( self.nusc.get('attribute', attribute_token[0])['name']) # then filter these ignore labels categories = np.array([box.name for box in all_boxes]) if self.img_list == 'train': useful_idx = [ index for index, category in enumerate(categories) if self.useful_cls_dict[category] != 'ignore' ] else: useful_idx = [ index for index, category in enumerate(categories) ] if len(useful_idx) == 0: if self.img_list == 'train': return None, biggest_label_num else: all_boxes_3d = np.ones([1, 7], dtype=np.float32) all_boxes_classes = np.array(['ignore']) all_attribute = np.array([-1]) all_velocity = np.array([[0, 0]], dtype=np.float32) else: all_boxes_3d = all_boxes_3d[useful_idx] categories = categories[useful_idx] all_boxes_classes = np.array( [self.useful_cls_dict[cate] for cate in categories]) # now calculate the mean size of each box for tmp_idx, all_boxes_class in enumerate(all_boxes_classes): cur_mean_size = self.cls_size_dict[ all_boxes_class] * self.cls_num_dict[all_boxes_class] cur_cls_num = self.cls_num_dict[all_boxes_class] + 1 cur_total_size = cur_mean_size + all_boxes_3d[tmp_idx, [ 4, 5, 3 ]] # [l, w, h] cur_mean_size = cur_total_size / cur_cls_num self.cls_size_dict[all_boxes_class] = cur_mean_size self.cls_num_dict[all_boxes_class] = cur_cls_num all_attribute = [ all_attribute[tmp_idx] for tmp_idx in useful_idx ] tmp_attribute = [] for attr in all_attribute: if attr == []: tmp_attribute.append(-1) else: tmp_attribute.append(self.attribute_idx_list[attr]) all_attribute = tmp_attribute all_attribute = np.array(all_attribute, dtype=np.int32) all_velocity = [ all_velocity[tmp_idx] for tmp_idx in useful_idx ] all_velocity = np.array(all_velocity, dtype=np.float32) else: cur_data_path = self.nusc.get_sample_data_path(sample_data_token) # then generate the bev_maps if self.img_list in ['train', 'val', 'trainval'] and cfg.TEST.WITH_GT: sample_dict = { maps_dict.KEY_LABEL_BOXES_3D: all_boxes_3d, maps_dict.KEY_LABEL_CLASSES: all_boxes_classes, maps_dict.KEY_LABEL_ATTRIBUTES: all_attribute, maps_dict.KEY_LABEL_VELOCITY: all_velocity, maps_dict.KEY_LABEL_NUM: len(all_boxes_3d), maps_dict.KEY_POINT_CLOUD: cur_data_path, maps_dict.KEY_TRANSFORMRATION_MATRIX: cur_transformation_matrix, maps_dict.KEY_SAMPLE_NAME: '{}/{}/{}'.format(cur_scene_key, cur_sample_token, sample_data_token), maps_dict.KEY_SWEEPS: sweeps, maps_dict.KEY_TIMESTAMPS: cur_timestamp, } biggest_label_num = max(len(all_boxes_3d), biggest_label_num) else: # img_list is test sample_dict = { maps_dict.KEY_POINT_CLOUD: cur_data_path, maps_dict.KEY_SAMPLE_NAME: '{}/{}/{}'.format(cur_scene_key, cur_sample_token, sample_data_token), maps_dict.KEY_TRANSFORMRATION_MATRIX: cur_transformation_matrix, maps_dict.KEY_SWEEPS: sweeps, maps_dict.KEY_TIMESTAMPS: cur_timestamp, } return sample_dict, biggest_label_num def preprocess_batch(self): # if create_gt_dataset, then also create a boxes_numpy, saving all points if cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: # also save mixup database mixup_label_dict = dict([(cls, []) for cls in self.mixup_db_class]) sample_dicts_list = [] for scene_key, v in tqdm.tqdm(self.sample_data_token_list.items()): for sample_data_token in v: sample_dict, tmp_biggest_label_num = self.preprocess_samples( scene_key, sample_data_token) if sample_dict is None: continue # else save the result sample_dicts_list.append(sample_dict) # create_gt_dataset if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: mixup_sample_dicts = self.generate_mixup_sample( sample_dict) if mixup_sample_dicts is None: continue for mixup_sample_dict in mixup_sample_dicts: cur_cls = mixup_sample_dict[ maps_dict.KEY_SAMPLED_GT_CLSES] mixup_label_dict[cur_cls].append(mixup_sample_dict) # save preprocessed data with open(self.train_list, 'wb') as f: pickle.dump(sample_dicts_list, f) for k, v in self.cls_num_dict.items(): print('class name: %s / class num: %d / mean size: (%f, %f, %f)' % (k, v, self.cls_size_dict[k][0], self.cls_size_dict[k][1], self.cls_size_dict[k][2])) # [l, w, h] if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: print('**** Generating groundtruth database ****') for cur_cls_name, mixup_sample_dict in mixup_label_dict.items(): cur_mixup_db_cls_path = self.mixup_db_cls_path[cur_cls_name] cur_mixup_db_trainlist_path = self.mixup_db_trainlist_path[ cur_cls_name] print('**** Class %s ****' % cur_cls_name) with open(cur_mixup_db_trainlist_path, 'w') as f: for tmp_idx, tmp_cur_mixup_sample_dict in tqdm.tqdm( enumerate(mixup_sample_dict)): f.write('%06d.npy\n' % tmp_idx) np.save( os.path.join(cur_mixup_db_cls_path, '%06d.npy' % tmp_idx), tmp_cur_mixup_sample_dict) print('Ending of the preprocess !!!') def generate_mixup_sample(self, sample_dict): """ This function is bound for generating mixup dataset """ all_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D] all_boxes_classes = sample_dict[maps_dict.KEY_LABEL_CLASSES] point_cloud_path = sample_dict[maps_dict.KEY_POINT_CLOUD] # then we first cast all_boxes_3d to kitti format all_boxes_3d = cast_box_3d_to_kitti_format(all_boxes_3d) # load points points = np.fromfile(point_cloud_path, dtype=np.float32).reshape( (-1, 5)) points = cast_points_to_kitti(points) points[:, 3] /= 255 points[:, 4] = 0 # timestamp is zero points_mask = check_inside_points(points, all_boxes_3d) # [pts_num, gt_num] points_masks_num = np.sum(points_masks, axis=0) # [gt_num] valid_box_idx = np.where( points_masks_num >= cfg.DATASET.MIN_POINTS_NUM)[0] if len(valid_box_idx) == 0: return None valid_label_boxes_3d = all_boxes_3d[valid_box_idx] valid_label_classes = all_boxes_classes[valid_box_idx] sample_dicts = [] for index, i in enumerate(valid_box_idx): cur_points_mask = points_mask[:, i] cur_points_idx = np.where(cur_points_mask)[0] cur_inside_points = points[cur_points_idx, :] sample_dict = { # 0 timestamp and /255 reflectance maps_dict.KEY_SAMPLED_GT_POINTS: cur_inside_points, # kitti format points maps_dict.KEY_SAMPLED_GT_LABELS_3D: valid_label_boxes_3d[index], maps_dict.KEY_SAMPLED_GT_CLSES: valid_label_classes[index], } sample_dicts.append(sample_dict) return sample_dicts # Evaluation def set_evaluation_tensor(self, model): # get prediction results, bs = 1 pred_bbox_3d = tf.squeeze(model.output[maps_dict.PRED_3D_BBOX][-1], axis=0) pred_cls_score = tf.squeeze(model.output[maps_dict.PRED_3D_SCORE][-1], axis=0) pred_cls_category = tf.squeeze( model.output[maps_dict.PRED_3D_CLS_CATEGORY][-1], axis=0) pred_list = [pred_bbox_3d, pred_cls_score, pred_cls_category] if len(model.output[maps_dict.PRED_3D_ATTRIBUTE]) > 0: pred_attribute = tf.squeeze( model.output[maps_dict.PRED_3D_ATTRIBUTE][-1], axis=0) pred_velocity = tf.squeeze( model.output[maps_dict.PRED_3D_VELOCITY][-1], axis=0) pred_list.extend([pred_attribute, pred_velocity]) return pred_list def evaluate_map(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): submissions = {} submissions['meta'] = dict() submissions['meta']['use_camera'] = False submissions['meta']['use_lidar'] = True submissions['meta']['use_radar'] = False submissions['meta']['use_map'] = False submissions['meta']['use_external'] = False submissions_results = dict() pred_attr_velo = (len(pred_list) == 5) for i in tqdm.tqdm(range(val_size)): feed_dict = feeddict_producer.create_feed_dict() if pred_attr_velo: pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, pred_attr_op, pred_velo_op = sess.run( pred_list, feed_dict=feed_dict) else: pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op = sess.run( pred_list, feed_dict=feed_dict) pred_cls_category_op += 1 # label from 1 to n sample_name, cur_transformation_matrix, sweeps = feeddict_producer.info sample_name = sample_name[0] cur_transformation_matrix = cur_transformation_matrix[0] sweeps = sweeps[0] cur_scene_key, cur_sample_token, cur_sample_data_token = sample_name.split( '/') select_idx = np.where(pred_cls_score_op >= cls_thresh)[0] pred_cls_score_op = pred_cls_score_op[select_idx] pred_cls_category_op = pred_cls_category_op[select_idx] pred_bbox_3d_op = pred_bbox_3d_op[select_idx] if pred_attr_velo: pred_attr_op = pred_attr_op[select_idx] pred_velo_op = pred_velo_op[select_idx] else: pred_attr_op, pred_velo_op = None, None if len(pred_bbox_3d_op) > 500: arg_sort_idx = np.argsort(pred_cls_score_op)[::-1] arg_sort_idx = arg_sort_idx[:500] pred_cls_score_op = pred_cls_score_op[arg_sort_idx] pred_cls_category_op = pred_cls_category_op[arg_sort_idx] pred_bbox_3d_op = pred_bbox_3d_op[arg_sort_idx] if pred_attr_velo: pred_attr_op = pred_attr_op[arg_sort_idx] pred_velo_op = pred_velo_op[arg_sort_idx] # then transform pred_bbox_op to nuscenes_box boxes = cast_kitti_format_to_nusc_box_3d( pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, cur_attribute=pred_attr_op, cur_velocity=pred_velo_op, classes=self.idx2cls_dict) for box in boxes: velocity = box.velocity[:2].tolist() if len(sweeps) == 0: velocity = (np.nan, np.nan) box.velocity = np.array([*velocity, 0.0]) # then cast the box from ego to global boxes = _lidar_nusc_box_to_global(cur_transformation_matrix, boxes, self.idx2cls_dict, eval_version='cvpr_2019') annos = [] for box in boxes: name = self.idx2cls_dict[box.label] if box.name == -1: attr = self.DefaultAttribute[name] else: attr = self.AttributeIdxLabelMapping[name][box.name] velocity = box.velocity[:2].tolist() nusc_anno = { "sample_token": cur_sample_token, "translation": box.center.tolist(), "size": box.wlh.tolist(), "rotation": box.orientation.elements.tolist(), "velocity": velocity, "detection_name": name, "detection_score": box.score, "attribute_name": attr, } annos.append(nusc_anno) submissions_results[info['sample_token']] = annos submissions['results'] = submissions_results res_path = os.path.join(log_dir, "results_nusc_1.json") with open(res_path, "w") as f: json.dump(submissions, f) eval_main_file = os.path.join(cfg.ROOT_DIR, 'lib/core/nusc_eval.py') root_path = self.dataset_dir cmd = f"python3 {str(eval_main_file)} --root_path=\"{str(root_path)}\"" cmd += f" --version={'v1.0-trainval'} --eval_version={'cvpr_2019'}" cmd += f" --res_path=\"{str(res_path)}\" --eval_set={'val'}" cmd += f" --output_dir=\"{LOG_FOUT_DIR}\"" # use subprocess can release all nusc memory after evaluation subprocess.check_output(cmd, shell=True) os.system('rm \"%s\"' % res_path) # remove former result file with open(os.path.join(log_dir, "metrics_summary.json"), "r") as f: metrics = json.load(f) return metrics def evaluate_recall(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): pass def logger_and_select_best_map(self, metrics, log_string): detail = {} result = f"Nusc v1.0-trainval Evaluation\n" final_score = [] for name in self.cls_list: detail[name] = {} for k, v in metrics["label_aps"][name].items(): detail[name][f"dist@{k}"] = v tp_errs = [] tp_names = [] for k, v in metrics["label_tp_errors"][name].items(): detail[name][k] = v tp_errs.append(f"{v:.4f}") tp_names.append(k) threshs = ', '.join(list(metrics["label_aps"][name].keys())) scores = list(metrics["label_aps"][name].values()) final_score.append(np.mean(scores)) scores = ', '.join([f"{s * 100:.2f}" for s in scores]) result += f"{name} Nusc dist AP@{threshs} and TP errors\n" result += scores result += "\n" result += "mAP: %0.2f\n" % ( np.mean(list(metrics["label_aps"][name].values())) * 100) result += ', '.join(tp_names) + ": " + ', '.join(tp_errs) result += "\n" result += 'NDS score: %0.2f\n' % (metrics['nd_score'] * 100) log_string(result) cur_result = metrics['nd_score'] return cur_result def logger_and_select_best_recall(self, metrics, log_string): pass # save prediction results def save_predictions(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): pass
def main(): SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"] if not os.path.exists(DATA_PATH): os.mkdir(DATA_PATH) if not os.path.exists(OUT_PATH): os.mkdir(OUT_PATH) for split in SPLITS: data_path = DATA_PATH # + '{}/'.format(SPLITS[split]) nusc = NuScenes(version=SPLITS[split], dataroot=data_path, verbose=True) out_path = OUT_PATH + "{}.json".format(split) categories_info = [{ "name": CATS[i], "id": i + 1 } for i in range(len(CATS))] ret = { "images": [], "annotations": [], "categories": categories_info, "videos": [], "attributes": ATTRIBUTE_TO_ID, } num_images = 0 num_anns = 0 num_videos = 0 # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR. for sample in nusc.sample: scene_name = nusc.get("scene", sample["scene_token"])["name"] if not (split in ["mini", "test" ]) and not (scene_name in SCENE_SPLITS[split]): continue if sample["prev"] == "": print("scene_name", scene_name) num_videos += 1 ret["videos"].append({ "id": num_videos, "file_name": scene_name }) frame_ids = {k: 0 for k in sample["data"]} track_ids = {} # We decompose a sample into 6 images in our case. for sensor_name in sample["data"]: if sensor_name in USED_SENSOR: image_token = sample["data"][sensor_name] image_data = nusc.get("sample_data", image_token) num_images += 1 # Complex coordinate transform. This will take time to understand. sd_record = nusc.get("sample_data", image_token) cs_record = nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) pose_record = nusc.get("ego_pose", sd_record["ego_pose_token"]) global_from_car = transform_matrix( pose_record["translation"], Quaternion(pose_record["rotation"]), inverse=False, ) car_from_sensor = transform_matrix( cs_record["translation"], Quaternion(cs_record["rotation"]), inverse=False, ) trans_matrix = np.dot(global_from_car, car_from_sensor) _, boxes, camera_intrinsic = nusc.get_sample_data( image_token, box_vis_level=BoxVisibility.ANY) calib = np.eye(4, dtype=np.float32) calib[:3, :3] = camera_intrinsic calib = calib[:3] frame_ids[sensor_name] += 1 # image information in COCO format image_info = { "id": num_images, "file_name": image_data["filename"], "calib": calib.tolist(), "video_id": num_videos, "frame_id": frame_ids[sensor_name], "sensor_id": SENSOR_ID[sensor_name], "sample_token": sample["token"], "trans_matrix": trans_matrix.tolist(), "width": sd_record["width"], "height": sd_record["height"], "pose_record_trans": pose_record["translation"], "pose_record_rot": pose_record["rotation"], "cs_record_trans": cs_record["translation"], "cs_record_rot": cs_record["rotation"], } ret["images"].append(image_info) anns = [] for box in boxes: det_name = category_to_detection_name(box.name) if det_name is None: continue num_anns += 1 v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) box.translate(np.array([0, box.wlh[2] / 2, 0])) category_id = CAT_IDS[det_name] amodel_center = project_to_image( np.array( [ box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2], ], np.float32, ).reshape(1, 3), calib, )[0].tolist() sample_ann = nusc.get("sample_annotation", box.token) instance_token = sample_ann["instance_token"] if not (instance_token in track_ids): track_ids[instance_token] = len(track_ids) + 1 attribute_tokens = sample_ann["attribute_tokens"] attributes = [ nusc.get("attribute", att_token)["name"] for att_token in attribute_tokens ] att = "" if len(attributes) == 0 else attributes[0] if len(attributes) > 1: print(attributes) import pdb pdb.set_trace() track_id = track_ids[instance_token] vel = nusc.box_velocity(box.token) # global frame vel = np.dot( np.linalg.inv(trans_matrix), np.array([vel[0], vel[1], vel[2], 0], np.float32), ).tolist() # instance information in COCO format ann = { "id": num_anns, "image_id": num_images, "category_id": category_id, "dim": [box.wlh[2], box.wlh[0], box.wlh[1]], "location": [box.center[0], box.center[1], box.center[2]], "depth": box.center[2], "occluded": 0, "truncated": 0, "rotation_y": yaw, "amodel_center": amodel_center, "iscrowd": 0, "track_id": track_id, "attributes": ATTRIBUTE_TO_ID[att], "velocity": vel, } bbox = KittiDB.project_kitti_box_to_image( copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900)) alpha = _rot_y2alpha( yaw, (bbox[0] + bbox[2]) / 2, camera_intrinsic[0, 2], camera_intrinsic[0, 0], ) ann["bbox"] = [ bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1], ] ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) ann["alpha"] = alpha anns.append(ann) # Filter out bounding boxes outside the image visable_anns = [] for i in range(len(anns)): vis = True for j in range(len(anns)): if anns[i]["depth"] - min(anns[i][ "dim"]) / 2 > anns[j]["depth"] + max( anns[j]["dim"]) / 2 and _bbox_inside( anns[i]["bbox"], anns[j]["bbox"]): vis = False break if vis: visable_anns.append(anns[i]) else: pass for ann in visable_anns: ret["annotations"].append(ann) if DEBUG: img_path = data_path + image_info["file_name"] img = cv2.imread(img_path) img_3d = img.copy() for ann in visable_anns: bbox = ann["bbox"] cv2.rectangle( img, (int(bbox[0]), int(bbox[1])), (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), (0, 0, 255), 3, lineType=cv2.LINE_AA, ) box_3d = compute_box_3d(ann["dim"], ann["location"], ann["rotation_y"]) box_2d = project_to_image(box_3d, calib) img_3d = draw_box_3d(img_3d, box_2d) pt_3d = unproject_2d_to_3d(ann["amodel_center"], ann["depth"], calib) pt_3d[1] += ann["dim"][0] / 2 print("location", ann["location"]) print("loc model", pt_3d) pt_2d = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32, ) pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"], calib) pt_3d[1] += ann["dim"][0] / 2 print("loc ", pt_3d) cv2.imshow("img", img) cv2.imshow("img_3d", img_3d) cv2.waitKey() nusc.render_sample_data(image_token) plt.show() print("reordering images") images = ret["images"] video_sensor_to_images = {} for image_info in images: tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"] if tmp_seq_id in video_sensor_to_images: video_sensor_to_images[tmp_seq_id].append(image_info) else: video_sensor_to_images[tmp_seq_id] = [image_info] ret["images"] = [] for tmp_seq_id in sorted(video_sensor_to_images): ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id] print("{} {} images {} boxes".format(split, len(ret["images"]), len(ret["annotations"]))) print("out_path", out_path) json.dump(ret, open(out_path, "w"))
class NuScenesPrepare(): NameMapping = { 'movable_object.barrier': 'barrier', 'movable_object.trafficcone': 'barrier', 'vehicle.bicycle': 'cyclist', 'vehicle.motorcycle': 'cyclist', 'vehicle.bus.bendy': 'vehicle', 'vehicle.bus.rigid': 'vehicle', 'vehicle.car': 'vehicle', 'vehicle.trailer': 'vehicle', 'vehicle.truck': 'vehicle', 'vehicle.construction': 'vehicle', 'human.pedestrian.adult': 'pedestrian', 'human.pedestrian.child': 'pedestrian', 'human.pedestrian.construction_worker': 'pedestrian', 'human.pedestrian.police_officer': 'pedestrian', } labelmapping=LabelEncoder() labelmapping.fit(['barrier','cyclist','pedestrian','vehicle']) DefaultAttribute = { "car": "vehicle.parked", "pedestrian": "pedestrian.moving", "trailer": "vehicle.parked", "truck": "vehicle.parked", "bus": "vehicle.parked", "motorcycle": "cycle.without_rider", "construction_vehicle": "vehicle.parked", "bicycle": "cycle.without_rider", "barrier": "", "traffic_cone": "", } def __init__(self,args): self.args_dp=args['DataPrepare'] self.args_vg=args['VoxelGenerator'] self.cache_path=self.args_dp.data_root+'/'+self.args_dp.cache_name if os.path.exists(self.cache_path): self._Data_frags=pickle.load(open(self.cache_path, 'rb')) else: self.nusc = NuScenes(version=self.args_dp.version, dataroot=self.args_dp.data_root, verbose=self.args_dp.verbose) self._Data_frags=self.getFragAnnotations() pickle.dump(self._Data_frags,open(self.cache_path, 'wb')) if True: self._Data_frags=[item for scene_data in self._Data_frags for item in scene_data] def __len__(self): return len(self._Data_frags) def __getitem__(self, idx): c_frag = self._Data_frags[idx] res = { "lidar": { "type": "lidar", "points": None, }, "gt_boxes": { "obs": None, "pred": None, }, "metadata": { "token": c_frag['Data_frags'][self.args_dp.obs_length-1] # token of last observed frame }, } sweep_voxels,sweep_coords,sweep_num_voxels,sweep_num_points=[],[],[],[] bev_imgs, cam_imgs = [], [] gt_boxes=[] #ts = c_frag['Data_frags'][self.args.obs_length-1]["timestamp"] / 1e6 #get BEV sweeps for fi,c_frame in enumerate(c_frag['Data_frags']): if fi < self.args_dp.obs_length: # get Annotations gt_boxes.append(c_frame['boxes']) # load lidar points lidar_path = c_frame['lidar_path'] points = np.fromfile( str(Path(self.args_dp.data_root)/lidar_path), dtype=np.float32, count=-1).reshape([-1, 5]) points[:, 3] /= 255 points[:, :3] = points[:, :3] @ c_frame["R_cl2rl"].T points[:, :3] += c_frame["T_cl2rl"] # generate voxel bev voxel_res=self.getVoxel(points) if 'bev_img' in self.args_vg.bev_data: bev_img_size = [np.int(np.ceil((self.args_vg.point_cloud_range[3 + i] - self.args_vg.point_cloud_range[i]) / self.args_vg.voxel_size[i])) for i in range(3)] bev_img = np.zeros(bev_img_size) bev_img[voxel_res['coordinates'][:, 2], voxel_res['coordinates'][:, 1], voxel_res['coordinates'][:, 0]]\ =voxel_res["num_points_per_voxel"] bev_imgs.append(bev_img) if 'bev_index' in self.args_vg.bev_data: sweep_voxels.append(voxel_res['voxels']) sweep_coords.append(voxel_res['coordinates']) sweep_num_voxels.append(np.array([voxel_res['voxels'].shape[0]], dtype=np.int64)) sweep_num_points.append( voxel_res["num_points_per_voxel"]) # Load image if self.args_dp.use_image == 'last_image': # only use image of the last observed frame load_image = fi == self.args_dp.obs_length - 1 elif self.args_dp.use_image == 'key_images': # use image of all key frames load_image = 'cam_path' in c_frame.keys() if load_image: if Path(c_frame['cam_path']).exists(): with open(str(c_frame['cam_path']), 'rb') as f: image_str = f.read() else: image_str=None cam_imgs.append(image_str) res["lidar"]["voxel_sweeps"] = np.concatenate(sweep_voxels, axis=0) res["lidar"]["bev_imgs"] = np.stack(bev_imgs, axis=0) res["lidar"]["coordinates"] = np.concatenate(sweep_coords, axis=0) res["lidar"]["num_voxels"] = np.concatenate(sweep_num_voxels, axis=0) res["lidar"]["num_points"] = np.concatenate(sweep_num_points, axis=0) res["cam"] = { "type": "camera", "data": image_str, "datatype": Path(c_frag['Data_frags'][self.args_dp.obs_length-1]['cam_path']).suffix[1:], } gt_boxes=np.stack(gt_boxes,axis=0) res["gt_boxes"]["obs"]=gt_boxes[:self.args_dp.obs_length] res["gt_boxes"]["pred"] =gt_boxes[self.args_dp.obs_length:] res['cls_label']=self.labelmapping.fit_transform(c_frag['names']) return res #Ground Truth (by instance) #res["GT"]["obs"]= #image of last def getVoxel(self,points): max_voxels=100000 voxel_generator = VoxelGeneratorV2( voxel_size=list(self.args_vg.voxel_size), point_cloud_range=list(self.args_vg.point_cloud_range), max_num_points=self.args_vg.max_number_of_points_per_voxel, max_voxels=max_voxels, full_mean=self.args_vg.full_empty_part_with_mean, block_filtering=self.args_vg.block_filtering, block_factor=self.args_vg.block_factor, block_size=self.args_vg.block_size, height_threshold=self.args_vg.height_threshold) res = voxel_generator.generate( points, max_voxels) return res def getSamplebyFrame(self): #################interplot data 10hz scene_all=[] for si,scene in enumerate(self.nusc.scene): sample_interp_all = [] first_sample = self.nusc.get('sample', scene['first_sample_token']) sd_rec = self.nusc.get('sample_data', first_sample['data']["LIDAR_TOP"]) sample_interp_all.append(sd_rec) while sd_rec['next'] != '': sd_rec = self.nusc.get('sample_data', sd_rec['next']) sample_interp_all.append(sd_rec) scene_all.append(sample_interp_all) return scene_all def getFragAnnotations(self): scene_frames = self.getSamplebyFrame() Data_frags=[] key_slide_window=int(self.args_dp.obs_length*2) #find key frame in this time window si_start=0 if os.path.exists(self.cache_path): Data_frags=pickle.load(open(self.cache_path, 'rb')) si_start=len(Data_frags) print('-------------Prepraing fragments--------------') for si in range(si_start,len(scene_frames)): scene_data=scene_frames[si] start = time.time() scene_frags = [] for di,sample_data in enumerate(scene_data): frag_info={} if sample_data['is_key_frame']: if di <= self.args_dp.obs_length or di >= len(scene_data)-self.args_dp.pred_length*self.args_dp.interval: continue cur_frag_index=[i+1 for i in range(di-self.args_dp.obs_length,di+self.args_dp.pred_length)]#the fragment index if di !=cur_frag_index[self.args_dp.obs_length-1]: print('error') start_key=max(0,min(di-self.args_dp.obs_length,di - key_slide_window)) end_key=min(len(scene_data)-1,max(di+self.args_dp.pred_length,di + key_slide_window)) cur_key_index = [i+1 for i in range(start_key,end_key)]#find key frame in this index ## Get reference coordinates refer_frame = sample_data refer_cs_rec = self.nusc.get('calibrated_sensor', refer_frame['calibrated_sensor_token']) refer_pos_rec = self.nusc.get('ego_pose', refer_frame['ego_pose_token']) R_rl2re, T_rl2re = refer_cs_rec['rotation'], refer_cs_rec['translation'] R_re2g, T_re2g = refer_pos_rec['rotation'], refer_pos_rec['translation'] R_rl2re_mat = Quaternion(R_rl2re).rotation_matrix R_re2g_mat = Quaternion(R_re2g).rotation_matrix # get key frame location key_frame_flag = np.zeros((len(cur_key_index),), dtype='bool') key_frame_index = [] for i, d in enumerate(cur_key_index): try: if scene_data[d]['is_key_frame'] == True: key_frame_index.append(i) key_frame_flag[i] = True except: print('error') key_frames = np.array(scene_data[cur_key_index[0]:(cur_key_index[-1]+1)])[key_frame_flag] # only key frame has annotations, so firstly get key frame infos key_sample, key_sample_token, key_instances, key_annotations, key_velocity = [], [], [], [], [] for k, key_frame in enumerate(key_frames): sample_token = key_frame['sample_token'] sample = self.nusc.get('sample', sample_token) annotations = [ self.nusc.get('sample_annotation', token) for token in sample['anns'] ] velocity = np.array( [self.nusc.box_velocity(token)[:2] for token in sample['anns']]) key_sample_token.append(sample_token) key_instances.append([anno['instance_token'] for anno in annotations]) key_sample.append(sample) key_annotations.append(annotations) key_velocity.append(velocity) # get full presented instance token in the candidate fragments instances_intersect = list(set.intersection(*[set(i) for i in key_instances])) #instances_union = list(set.union(*[set(i) for i in key_instances])) # full presented instance flags valid_inst_flags = [np.zeros(len(kinst), dtype='bool') for kinst in key_instances] for kinst, key_inst in enumerate(key_instances): for vkinst, valid_inst in enumerate(key_inst): if valid_inst in instances_intersect: valid_inst_flags[kinst][vkinst] = True ########################################## ## Prepare fragments Database ########################################## cur_key_frame_index = [] for i, d in enumerate(cur_frag_index): if scene_data[d]['is_key_frame'] == True: cur_key_frame_index.append(i) frag_info['Data_frags'] = [] frag_info['instance_token'] = instances_intersect frag_info['key_frame_index'] = cur_key_frame_index frag_info['last_obs_frame']=di frag_info['scene_No'] = si for i, d in enumerate(cur_frag_index): frag_data = {} sample_data=scene_data[d] sample_token = sample_data['sample_token'] sample = self.nusc.get('sample', sample_token) # find the key sample this frame data belongs to try: key_sample_ind = key_sample_token.index(sample_token) except: print('can not find corresponding key frame at scene {} frame {}'.format(si,d)) valid_inst_flag = valid_inst_flags[key_sample_ind] ## Pose matrix: lidar2ego and ego2global s_cs_rec = self.nusc.get('calibrated_sensor', sample_data['calibrated_sensor_token']) s_pos_rec = self.nusc.get('ego_pose', sample_data['ego_pose_token']) R_cl2ce, T_cl2ce = s_cs_rec['rotation'], s_cs_rec['translation'] R_ce2g, T_ce2g = s_pos_rec['rotation'], s_pos_rec['translation'] R_cl2ce_mat = Quaternion(R_cl2ce).rotation_matrix R_ce2g_mat = Quaternion(R_ce2g).rotation_matrix # Data_frag['Info_frags']['T_l2e'],Data_frag['Info_frags']['R_l2e'] = cs_record['translation'],cs_record['rotation'] # Data_frag['Info_frags']['T_e2g'],Data_frag['Info_frags']['R_e2g'] = pose_record['translation'], pose_record['rotation'] ## Get Relative Pose: R_cl2rl,T_cl2rl, based on R/T_rl2re, R/T_re2g, R/T_cl2ce, R/T_ce2g # r: reference, c: current, l: lidar, e: ego, g: global # Attention: R_b2a = inv(R_a2b), T_b2a = - T_b2a * inv(R_b2a), # R_cl2rl = R_cl2se * R_ce2g * [R_g2rl] # R_g2rl= R_g2re * R_re2rl = inv(R_re2g) * inv(R_rl2re) R_cl2rl = (R_cl2ce_mat.T @ R_ce2g_mat.T) @ ( np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(R_rl2re_mat).T) # T_cl2rl = (T_cl2ce * R_ce2g + T_ce2g) * [R_g2rl] + [T_g2rl] # T_g2rl = (T_g2re * R_re2rl + T_re2rl) = - T_re2g * inv(R_re2g) - T_rl2re * inv(R_rl2re) T_cl2rl = (T_cl2ce @ R_ce2g_mat.T + T_ce2g) @ ( np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(R_rl2re_mat).T) \ - T_re2g @ (np.linalg.inv(R_re2g_mat).T @ np.linalg.inv( R_rl2re_mat).T) - T_rl2re @ np.linalg.inv(R_rl2re_mat).T frag_data['R_cl2rl'], frag_data['T_cl2rl'] = R_cl2rl, T_cl2rl ### Get valid boxes.Then Transform to the reference coordinates boxes = self.nusc.get_boxes(sample_data['token']) # At global coordinate for box in boxes: # Move box to referred coord system box.translate(-np.array(refer_pos_rec['translation'])) box.rotate(Quaternion(refer_pos_rec['rotation']).inverse) box.translate(-np.array(refer_cs_rec['translation'])) box.rotate(Quaternion(refer_cs_rec['rotation']).inverse) boxes = np.array(boxes) # At reference coordinate try: valid_boxes = boxes[valid_inst_flag] except: print('can not find valid box at scene {} frame {}'.format(si, d)) ## Transform Boxes to [location,dimension,rotation] locs = np.array([b.center for b in valid_boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in valid_boxes]).reshape(-1, 3) rots = np.array([b.orientation.yaw_pitch_roll[0] for b in valid_boxes]).reshape(-1, 1) gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1) frag_data['boxes'] = gt_boxes ## Datapath if i<self.args_dp.obs_length: frag_data['lidar_path'] = sample_data['filename'] if sample_data['is_key_frame']: cam_front_token = sample["data"]["CAM_FRONT"] cam_path, _, _ = self.nusc.get_sample_data(cam_front_token) frag_data['cam_path'] = cam_path ## Object name if 'names' not in frag_info.keys(): names = [b.name for b in valid_boxes] for i in range(len(names)): if names[i] in self.NameMapping: names[i] = self.NameMapping[names[i]] names = np.array(names) frag_info['names'] = names ##Velocity (without interplotion) valid_velo = key_velocity[key_sample_ind][valid_inst_flag] # convert velo from global to current lidar for i in range(len(valid_boxes)): velo = np.array([*valid_velo[i], 0.0]) velo = velo @ np.linalg.inv(R_ce2g_mat).T @ np.linalg.inv(R_cl2ce_mat).T valid_velo[i] = velo[:2] frag_data['Velocity'] = valid_velo frag_data['FrameNo.'] = d frag_data['Token'] = sample_data['token'] frag_data['timestamp'] = sample_data['timestamp'] frag_info['Data_frags'].append(frag_data) scene_frags.append(frag_info) Data_frags.append(scene_frags) end = time.time() print('scene {}/{}: total frags: {} time: {} '.format(si, len(scene_frames), len(scene_frags), end - start)) if si%200==0 and si>0: pickle.dump(Data_frags, open(self.cache_path, 'wb')) return Data_frags