def main(args): args.path = "/home/jo/training_data/nuscenes/nuscenes-v1.0" args.resize = [640, 256, 0] client = MongoClient(args.conn) collection = client[args.db][args.collection] nusc = NuScenes(version="v1.0-mini", dataroot=args.path, verbose=True) nusc.list_scenes() for scene in tqdm(nusc.scene): next_sample_token = scene["first_sample_token"] while True: sample = nusc.get('sample', next_sample_token) next_sample_token = sample["next"] sample_data_token = sample["data"]["CAM_FRONT"] sample_data = nusc.get_sample_data(sample_data_token) cam_front_data = nusc.get('sample_data', sample_data_token) # Create image data img_path = sample_data[0] if os.path.exists(img_path): img = cv2.imread(img_path) nusc.render_pointcloud_in_image depth_map = map_pointcloud_to_image(nusc, sample['token'], pointsensor_channel='LIDAR_TOP', camera_channel='CAM_FRONT') # Not sure about radar data, seems to not allign with lidar data very often, leaving it out for now # depth_map = map_pointcloud_to_image(nusc, sample['token'], pointsensor_channel='RADAR_FRONT', camera_channel='CAM_FRONT', depth_map=depth_map) roi = Roi() if args.resize: img, roi = resize_img(img, args.resize[0], args.resize[1], args.resize[2]) depth_map, _ = resize_img(depth_map, args.resize[0], args.resize[1], args.resize[2], cv2.INTER_NEAREST) img_bytes = cv2.imencode('.jpeg', img)[1].tobytes() depth_bytes = cv2.imencode(".png", depth_map)[1].tobytes() content_type = "image/jpeg" else: print("WARNING: file not found: " + img_path + ", continue with next image") continue # Get sensor extrinsics, Not sure why roll and yaw seem to be PI/2 off compared to nuImage calibarted sensor sensor = nusc.get('calibrated_sensor', cam_front_data['calibrated_sensor_token']) q = MyQuaternion(sensor["rotation"][0], sensor["rotation"][1], sensor["rotation"][2], sensor["rotation"][3]) roll = math.atan2(2.0 * (q.z * q.y + q.w * q.x) , 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) + math.pi * 0.5 pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) yaw = math.atan2(2.0 * (q.z * q.w + q.x * q.y) , - 1.0 + 2.0 * (q.w * q.w + q.x * q.x)) + math.pi * 0.5 # print(sensor["translation"]) # print(f"Pitch: {pitch*57.2} Yaw: {yaw*57.2} Roll: {roll*57.2}") # Sensor calibration is static, pose would be dynamic. TODO: Somehow also add some sort of cam to cam motion to be learned # ego_pose = nusc.get("ego_pose", cam_front_data["ego_pose_token"]) # q = MyQuaternion(ego_pose["rotation"][0], ego_pose["rotation"][1], ego_pose["rotation"][2], ego_pose["rotation"][3]) # roll = math.atan2(2.0 * (q.z * q.y + q.w * q.x) , 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) + math.pi * 0.5 # pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) # yaw = math.atan2(2.0 * (q.z * q.w + q.x * q.y) , - 1.0 + 2.0 * (q.w * q.w + q.x * q.x)) + math.pi * 0.5 # print(ego_pose["translation"]) # print(f"Pitch: {pitch*57.2} Yaw: {yaw*57.2} Roll: {roll*57.2}") entry = Entry( img=img_bytes, content_type=content_type, depth=depth_bytes, org_source="nuscenes", org_id=img_path, objects=[], ignore=[], has_3D_info=True, has_track_info=True, sensor_valid=True, yaw=wrap_angle(yaw), roll=wrap_angle(roll), pitch=wrap_angle(pitch), translation=sensor["translation"], scene_token=sample["scene_token"], timestamp=sample["timestamp"] ) labels = sample_data[1] idx_counter = 0 for box in labels: if box.name in CLASS_MAP.keys(): box.translate(np.array([0, box.wlh[2] / 2, 0])) # translate center center to bottom center pos_3d = np.array([*box.center, 1.0]) cam_mat = np.hstack((sample_data[2], np.zeros((3, 1)))) cam_mat[0][2] += roi.offset_left cam_mat[1][2] += roi.offset_top cam_mat *= roi.scale cam_mat[2][2] = 1.0 # create rotation matrix, somehow using the rotation matrix directly from nuscenes does not work # instead, we calc the rotation as it is in kitti and use the same code v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) rot_angle = wrap_angle(float(yaw) + math.pi * 0.5) # because parallel to optical view of camera = 90 deg rot_mat = np.array([ [math.cos(rot_angle), 0.0, math.sin(rot_angle), 0.0], [0.0, 1.0, 0.0, 0.0], [-math.sin(rot_angle), 0.0, math.cos(rot_angle), 0.0], [0.0, 0.0, 0.0, 1.0], ]) pos_2d = np.matmul(cam_mat, pos_3d) pos_2d /= pos_2d[2] box3d = calc_cuboid_from_3d(pos_3d, cam_mat, rot_mat, box.wlh[0], box.wlh[2], box.wlh[1]) box2d = bbox_from_cuboid(box3d) annotation = nusc.get("sample_annotation", box.token) instance_token = annotation["instance_token"] entry.objects.append(Object( obj_class=CLASS_MAP[box.name], box2d=box2d, box3d=box3d, box3d_valid=True, instance_token=instance_token, truncated=None, occluded=None, width=box.wlh[0], length=box.wlh[1], height=box.wlh[2], orientation=rot_angle, # converted to autosar coordinate system x=pos_3d[2], y=-pos_3d[0], z=-pos_3d[1] )) # For debugging show the data in the image box2d = list(map(int, box2d)) cv2.circle(img, (int(pos_2d[0]), int(pos_2d[1])), 3, (255, 0, 0)) cv2.rectangle(img, (box2d[0], box2d[1]), (box2d[0] + box2d[2], box2d[1] + box2d[3]), (255, 255, 0), 1) cv2.putText(img,f"{idx_counter}", (box2d[0], box2d[1] + int(box2d[3])), 0, 0.4, (255, 255, 255)) idx_counter += 1 print(f"{idx_counter}: {math.degrees(rot_angle)}") f, (ax1, ax2) = plt.subplots(2, 1) ax1.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) max_val = np.amax(depth_map) ax2.imshow(depth_map, cmap="gray", vmin=1, vmax=10000) plt.show() # collection.insert_one(entry.get_dict()) if next_sample_token == "": break
def create_dataset(dataroot, save_dir, width=672, height=672, grid_range=70., nusc_version='v1.0-mini', use_constant_feature=True, use_intensity_feature=True, end_id=None): os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True) os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True) nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True) ref_chan = 'LIDAR_TOP' if width == height: size = width else: raise Exception( 'Currently only supported if width and height are equal') grid_length = 2. * grid_range / size label_half_length = 10 data_id = 0 grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length) grid_centers \ = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1] for my_scene in nusc.scene: first_sample_token = my_scene['first_sample_token'] token = first_sample_token while (token != ''): print('--- {} '.format(data_id) + token + ' ---') my_sample = nusc.get('sample', token) sd_record = nusc.get('sample_data', my_sample['data'][ref_chan]) sample_rec = nusc.get('sample', sd_record['sample_token']) chan = sd_record['channel'] pc, times = LidarPointCloud.from_file_multisweep(nusc, sample_rec, chan, ref_chan, nsweeps=10) _, boxes, _ = nusc.get_sample_data(sd_record['token'], box_vis_level=0) out_feature = np.zeros((size, size, 7), dtype=np.float32) for box_idx, box in enumerate(boxes): label = 0 if box.name.split('.')[0] == 'vehicle': if box.name.split('.')[1] == 'car': label = 1 elif (box.name.split('.')[1] in ['bus', 'construction', 'trailer', 'truck']): label = 2 elif box.name.split('.')[1] == 'emergency': if box.name.split('.')[2] == 'ambulance': label = 2 # police else: label = 1 elif box.name.split('.')[1] in ['bicycle', 'motorcycle']: label = 3 elif box.name.split('.')[0] == 'human': label = 4 elif (box.name.split('.')[0] in ['animal', 'movable_object', 'static_object']): label = 5 else: continue height_pt = np.linalg.norm(box.corners().T[0] - box.corners().T[3]) corners2d = box.corners()[:2, :] box2d = corners2d.T[[2, 3, 7, 6]] box2d_center = box2d.mean(axis=0) generate_out_feature(width, height, size, grid_centers, box2d, box2d_center, height_pt, label, label_half_length, out_feature) out_feature = out_feature.astype(np.float16) feature_generator = fg.Feature_generator(grid_range, width, height, use_constant_feature, use_intensity_feature) feature_generator.generate(pc.points.T) in_feature = feature_generator.feature if use_constant_feature and use_intensity_feature: in_feature = in_feature.reshape(size, size, 8) elif use_constant_feature or use_intensity_feature: in_feature = in_feature.reshape(size, size, 6) else: in_feature = in_feature.reshape(size, size, 4) # instance_pt is flipped due to flip out_feature = np.flip(np.flip(out_feature, axis=0), axis=1) out_feature[:, :, 1:3] *= -1 np.save(os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)), in_feature) np.save( os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)), out_feature) token = my_sample['next'] data_id += 1 if data_id == end_id: return
def train(): net.train() current_lr = config['learning_rate'] print('Loading Dataset...') dataset = TrainDataset( args.nuscenes_data_root, SSJAugmentation( config['image_size'], means, ), ) print('length = ', len(dataset)) epoch_size = len(dataset) // args.batch_size step_index = 0 batch_iterator = None batch_size = 1 data_loader = data.DataLoader( dataset, batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=collate_fn, pin_memory=False, ) if args.Joint: nusc = NuScenes( version='v1.0-trainval', verbose=True, dataroot=args.nuscenes_root, ) for iteration in range(0, 380000): print(iteration) if (not batch_iterator) or (iteration % epoch_size == 0): # create batch iterator batch_iterator = iter(data_loader) all_epoch_loss = [] print('pass 0') if iteration in stepvalues: step_index += 1 current_lr = current_lr * 0.5 for param_group in optimizer.param_groups: param_group['lr'] = current_lr # load train data img_pre, img_next, boxes_pre, boxes_next, labels, valid_pre, valid_next, current_poses, next_poses, pre_bbox, next_bbox, img_org_pre, img_org_next, current_tokens, next_tokens =\ next(batch_iterator) if args.Joint: current_token = current_tokens[0] next_token = next_tokens[0] first_token = current_tokens[1] current_cam_record = nusc.get('sample_data', current_token) current_cam_path = nusc.get_sample_data_path(current_token) current_cam_path, boxes, current_camera_intrinsic = nusc.get_sample_data( current_cam_record['token'], ) current_cs_record = nusc.get( 'calibrated_sensor', current_cam_record['calibrated_sensor_token'], ) current_poserecord = nusc.get( 'ego_pose', current_cam_record['ego_pose_token'], ) next_cam_record = nusc.get('sample_data', next_token) next_cam_path = nusc.get_sample_data_path(next_token) next_cam_path, boxes, next_camera_intrinsic = nusc.get_sample_data( next_cam_record['token'], ) next_cs_record = nusc.get( 'calibrated_sensor', next_cam_record['calibrated_sensor_token'], ) next_poserecord = nusc.get( 'ego_pose', next_cam_record['ego_pose_token'], ) first_cam_record = nusc.get('sample_data', first_token) first_cam_path = nusc.get_sample_data_path(first_token) first_cam_path, boxes, first_camera_intrinsic = nusc.get_sample_data( first_cam_record['token'], ) first_cs_record = nusc.get( 'calibrated_sensor', first_cam_record['calibrated_sensor_token'], ) first_poserecord = nusc.get( 'ego_pose', first_cam_record['ego_pose_token'], ) if args.cuda: img_pre = (img_pre.cuda()) img_next = (img_next.cuda()) boxes_pre = (boxes_pre.cuda()) boxes_next = (boxes_next.cuda()) valid_pre = (valid_pre.cuda()) valid_next = (valid_next.cuda()) labels = (labels.cuda()) current_poses = (current_poses.cuda()) next_poses = (next_poses.cuda()) pre_bbox = (pre_bbox.cuda()) next_bbox = (next_bbox.cuda()) img_org_pre = (img_org_pre.cuda()) img_org_next = (img_org_next.cuda()) # forward if args.Joint: out, pose_loss = net( args.Joint, img_pre, img_next, boxes_pre, boxes_next, valid_pre, valid_next, current_poses, next_poses, pre_bbox, next_bbox, img_org_pre, img_org_next, current_cs_record, current_poserecord, next_cs_record, next_poserecord, first_cs_record, first_poserecord, first_camera_intrinsic, ) else: out = net( img_pre, img_next, boxes_pre, boxes_next, valid_pre, valid_next, ) optimizer.zero_grad() loss_pre, loss_next, loss_similarity, loss, accuracy_pre, accuracy_next, accuracy, predict_indexes = criterion( out, labels, valid_pre, valid_next, ) total_loss = loss + 0.1 * pose_loss total_loss.backward() optimizer.step() optimizer.zero_grad() all_epoch_loss += [loss.data.cpu()] print( 'iter ' + repr(iteration) + ', ' + repr(epoch_size) + ' || epoch: %.4f ' % (iteration / (float)(epoch_size)) + ' || Loss: %.4f ||' % (loss.data.item()), end=' ', ) if iteration % 1000 == 0: result_image = show_batch_circle_image( img_pre, img_next, boxes_pre, boxes_next, valid_pre, valid_next, predict_indexes, iteration, ) if iteration % 1000 == 0: print('Saving state, iter:', iteration) torch.save( net.state_dict(), os.path.join( args.save_folder, 'model_' + repr(iteration) + '.pth', ), )
def get_nuscenes_dicts(path="./", version='v1.0-mini', categories=None): """ This is a helper fuction that create dicts from nuscenes to detectron2 format. Nuscenes annotation use 3d bounding box, but for detectron we need 2d bounding box. The simplest solution is get max x, min x, max y and min y coordinates from 3d bb and create 2d box. So we lost accuracy, but this is not critical. :param path: <string>. Path to Nuscenes dataset. :param version: <string>. Nuscenes dataset version name. :param categories <list<string>>. List of selected categories for detection. Get from https://www.nuscenes.org/data-annotation Categories names: ['human.pedestrian.adult', 'human.pedestrian.child', 'human.pedestrian.wheelchair', 'human.pedestrian.stroller', 'human.pedestrian.personal_mobility', 'human.pedestrian.police_officer', 'human.pedestrian.construction_worker', 'animal', 'vehicle.car', 'vehicle.motorcycle', 'vehicle.bicycle', 'vehicle.bus.bendy', 'vehicle.bus.rigid', 'vehicle.truck', 'vehicle.construction', 'vehicle.emergency.ambulance', 'vehicle.emergency.police', 'vehicle.trailer', 'movable_object.barrier', 'movable_object.trafficcone', 'movable_object.pushable_pullable', 'movable_object.debris', 'static_object.bicycle_rack'] :return: <dict>. Return dict with data annotation in detectron2 format. """ logging.info('your nuscenes root: {}'.format(path)) # assert(path[-1] == "/"), "Insert '/' in the end of path" path = os.path.abspath(path) + '/' nusc = NuScenes(version=version, dataroot=path, verbose=False) logging.info('nuScenes object loaded done.') # Select all catecategories if not set if categories == None: categories = [data["name"] for data in nusc.category] assert (isinstance(categories, list)), "Categories type must be list" dataset_dicts = [] cache_f = os.path.join(path, 'ct_nuscenes.cache') if os.path.exists(cache_f): dataset_dicts = pickle.load(open(cache_f, 'rb')) logging.info('resumed nuscenes cache file: {}'.format(cache_f)) return dataset_dicts idx = 0 logging.info('start parsing nuscenes data to train dict format...') for i in tqdm(range(0, len(nusc.scene))): scene = nusc.scene[i] scene_rec = nusc.get('scene', scene['token']) sample_rec_cur = nusc.get('sample', scene_rec['first_sample_token']) # Go through all frame in current scene while True: # we maybe add CAM_LEFT as well data = nusc.get('sample_data', sample_rec_cur['data']["CAM_FRONT"]) record = {} record["file_name"] = path + data["filename"] if os.path.exists(record['file_name']): record["image_id"] = idx record["height"] = data["height"] record["width"] = data["width"] idx += 1 # Get boxes from front camera _, boxes, camera_intrinsic = nusc.get_sample_data( sample_rec_cur['data']["CAM_FRONT"], BoxVisibility.ANY) # Get only necessary boxes boxes = [box for box in boxes if box.name in categories] # Go through all bounding boxes objs = [] for box in boxes: corners = view_points(box.corners(), camera_intrinsic, normalize=True)[:2, :] max_x = int(max(corners[:][0])) min_x = int(min(corners[:][0])) max_y = int(max(corners[:][1])) min_y = int(min(corners[:][1])) obj = { "bbox": [min_x, min_y, max_x, max_y], "bbox_mode": BoxMode.XYXY_ABS, "category_id": categories.index(box.name), "iscrowd": 0 } objs.append(obj) record["annotations"] = objs dataset_dicts.append(record) # Get next frame sample_rec_cur = nusc.get('sample', sample_rec_cur['next']) # End of scene condition if (sample_rec_cur["next"] == ""): break else: logging.info('pass record: {} since image not found!'.format( record['file_name'])) sample_rec_cur = nusc.get('sample', sample_rec_cur['next']) if (sample_rec_cur["next"] == ""): break continue logging.info('data convert done! all samples: {}'.format( len(dataset_dicts))) if not os.path.exists(cache_f): os.makedirs(os.path.dirname(cache_f), exist_ok=True) pickle.dump(dataset_dicts, open(cache_f, 'wb')) logging.info('nuscenes cache saved into: {}'.format(cache_f)) return dataset_dicts
Quaternion(calib_sensor_prev['rotation'])) lidar_to_global = np.dot(ego_to_global, lidar_to_ego) lidar_to_global_prev = np.dot(ego_to_global_prev, lidar_to_ego_prev) delta = inv(lidar_to_global_prev).dot(lidar_to_global) y_translation.append(delta[1, 3]) delta_angles = rotationMatrixToEulerAngles(delta[0:3, 0:3]) * 180 / np.pi yaw.append(delta_angles[2]) for annotation_token in sample['anns']: annotation_metadata = nusc.get('sample_annotation', annotation_token) if annotation_metadata['num_lidar_pts'] == 0: continue detection_name = category_to_detection_name(annotation_metadata['category_name']) if detection_name is None: continue _, box_lidar, _ = nusc.get_sample_data(sample['data'][sensor], box_vis_level=BoxVisibility.NONE, selected_anntokens=[annotation_token]) box_lidar = box_lidar[0] box_lidar.rotate(nu_to_kitti_lidar) if x_positions[detection_name] is None: x_positions[detection_name] = [box_lidar.center[0]] y_positions[detection_name] = [box_lidar.center[1]] length[detection_name] = [box_lidar.wlh[1]] width[detection_name] = [box_lidar.wlh[0]] num_lidar_points[detection_name] = [annotation_metadata['num_lidar_pts']] else: x_positions[detection_name].append(box_lidar.center[0]) y_positions[detection_name].append(box_lidar.center[1]) length[detection_name].append(box_lidar.wlh[1]) width[detection_name].append(box_lidar.wlh[0]) num_lidar_points[detection_name].append(annotation_metadata['num_lidar_pts']) current_sample_token = sample['next']
num_samples = len(nusc.sample) for ind, sample in enumerate(nusc.sample): sample_json = dict() category = [] box_3d = [] rotations = [] my_sample = nusc.get('sample', sample['token']) sample_data = nusc.get('sample_data', my_sample['data'][SENSOR]) filename = os.path.basename(sample_data['filename']) my_annotation_token_list = my_sample['anns'] sensor_modality = sample_data['sensor_modality'] assert sensor_modality == 'camera' data_path, boxes, camera_intrinsic = nusc.get_sample_data( my_sample['data'][SENSOR]) # for anno_token in my_annotation_token_list: # my_annotation_metadata = nusc.get('sample_annotation', anno_token) # # class # cat_name = my_annotation_metadata['category_name'] for box in boxes: cat_name = box.name if classes_map.get(cat_name) is None: continue else: category.append(classes_map[cat_name]) # xyz (forward, left, up) trans = box.center # wlh (yxz) (left, forward, up) dim = box.wlh
'rotation'] c2e_t_mat = np.array(cam2ego_translation).reshape(3, 1) e2g_t_mat = np.array(ego2global_translation).reshape(3, 1) c2e_r_mat = Quaternion(cam2ego_rotation).rotation_matrix # (3, 3) e2g_r_mat = Quaternion(ego2global_rotation).rotation_matrix # (3, 3) calib[present_sensor+'_'+'cam2ego_translation'] = c2e_t_mat calib[present_sensor+'_'+'cam2ego_rotation'] = c2e_r_mat calib[present_sensor+'_'+'ego2global_translation'] = e2g_t_mat calib[present_sensor+'_'+'ego2global_rotation'] = e2g_r_mat sensor_img_output_dir = os.path.join(img_output_root, 'image_' + present_sensor) sensor_label_output_dir = os.path.join(label_output_root, 'label_' + present_sensor) # each sensor_data corresponds to one specific image in the dataset sensor_data = nusc.get('sample_data', img_token) data_path, box_list, cam_intrinsic = nusc.get_sample_data(img_token, getattr(BoxVisibility,truncation_level)) """ get_sample_data: Returns the data path as well as all annotations related to that sample_data. Note that the boxes are transformed into the current sensor's coordinate frame. """ calib[present_sensor] = cam_intrinsic img_file = data_root + sensor_data['filename'] seqname = str(frame_counter).zfill(6) output_label_file = sensor_label_output_dir +'/' + seqname + '.txt' with open(output_label_file, 'a') as output_f: for box in box_list: # obtaining visibility level of each 3D box present_visibility_token = nusc.get('sample_annotation', box.token)['visibility_token'] if present_visibility_token > min_visibility_level: if not (category_reflection[box.name] == 'DontCare' and delete_dontcare_objects):
root_dir = '../v1.0-trainval01' nusc = NuScenes(version='v1.0-trainval', dataroot=root_dir, verbose=False) all_category_db = list(NameMapping.keys()) all_category = list(NameMapping.values()) my_scene = nusc.scene[15] sample_token = my_scene['first_sample_token'] end_token = my_scene['last_sample_token'] my_sample = nusc.get('sample', sample_token) i = 0 while sample_token != end_token: my_sample = nusc.get('sample', my_sample['next']) img = get_image(my_sample, root_dir) sample_data = nusc.get_sample_data(my_sample['data']['CAM_FRONT']) labels = sample_data[1] # this label is according to camera, what we have is lidar # converts top lidar intrinsic = sample_data[2] for label in labels: c2d = project_cam_coords_to_pixel([label.center], intrinsic)[0] cv2.circle(img, (int(c2d[0]), int(c2d[1])), 3, (0, 255, 255), -1) # convert center and wlh to 3d box box = Box(center=label.center, size=label.wlh, orientation=label.orientation) corners3d = box.corners() corners3d_2d = project_cam_coords_to_pixel(corners3d, intrinsic) idx = all_category_db.index(label.name)
class NuScenesDataset(Dataset): """ NuScenes dataset loader and producer """ def __init__(self, mode, split='training', img_list='trainval', is_training=True, workers_num=1): """ mode: 'loading', 'preprocessing' """ self.mode = mode self.dataset_dir = os.path.join(cfg.ROOT_DIR, cfg.DATASET.KITTI.BASE_DIR_PATH) self.max_sweeps = cfg.DATASET.NUSCENES.NSWEEPS self.is_training = is_training self.img_list = img_list self.workers_num = workers_num # cast labels from NuScenes name to useful name self.useful_cls_dict = { 'animal': 'ignore', 'human.pedestrian.personal_mobility': 'ignore', 'human.pedestrian.stroller': 'ignore', 'human.pedestrian.wheelchair': 'ignore', 'movable_object.debris': 'ignore', 'movable_object.pushable_pullable': 'ignore', 'static_object.bicycle_rack': 'ignore', 'vehicle.emergency.ambulance': 'ignore', 'vehicle.emergency.police': 'ignore', 'movable_object.barrier': 'barrier', 'vehicle.bicycle': 'bicycle', 'vehicle.bus.bendy': 'bus', 'vehicle.bus.rigid': 'bus', 'vehicle.car': 'car', 'vehicle.construction': 'construction_vehicle', 'vehicle.motorcycle': 'motorcycle', 'human.pedestrian.adult': 'pedestrian', 'human.pedestrian.child': 'pedestrian', 'human.pedestrian.construction_worker': 'pedestrian', 'human.pedestrian.police_officer': 'pedestrian', 'movable_object.trafficcone': 'traffic_cone', 'vehicle.trailer': 'trailer', 'vehicle.truck': 'truck' } # cast attribute to index self.attribute_idx_list = { 'vehicle.moving': 0, 'vehicle.stopped': 1, 'vehicle.parked': 2, 'cycle.with_rider': 3, 'cycle.without_rider': 4, 'pedestrian.sitting_lying_down': 5, 'pedestrian.standing': 6, 'pedestrian.moving': 7, 'default': -1, } self.idx_attribute_list = dict([ (v, k) for k, v in self.attribute_idx_list.items() ]) self.AttributeIdxLabelMapping = { "car": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "truck": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "bus": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "trailer": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "construction_vehicle": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'], "pedestrian": [ 'pedestrian.sitting_lying_down', 'pedestrian.standing', 'pedestrian.moving' ], "motorcycle": ['cycle.with_rider', 'cycle.without_rider', ''], "bicycle": ['cycle.with_rider', 'cycle.without_rider', ''], "traffic_cone": ['', '', ''], "barrier": ['', '', ''], } self.DefaultAttribute = { "car": "vehicle.parked", "pedestrian": "pedestrian.moving", "trailer": "vehicle.parked", "truck": "vehicle.parked", "bus": "vehicle.parked", "motorcycle": "cycle.without_rider", "construction_vehicle": "vehicle.parked", "bicycle": "cycle.without_rider", "barrier": "", "traffic_cone": "", } self.cls_list = cfg.DATASET.KITTI.CLS_LIST self.idx2cls_dict = dict([(idx + 1, cls) for idx, cls in enumerate(self.cls_list)]) self.cls2idx_dict = dict([(cls, idx + 1) for idx, cls in enumerate(self.cls_list)]) self.sv_npy_path = os.path.join( cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, 'NuScenes', '{}_{}'.format(img_list, self.max_sweeps)) self.train_list = os.path.join(self.sv_npy_path, 'infos.pkl') self.voxel_generator = VoxelGenerator() self.test_mode = cfg.TEST.TEST_MODE if self.test_mode == 'mAP': self.evaluation = self.evaluate_map self.logger_and_select_best = self.logger_and_select_best_map elif self.test_mode == 'Recall': self.evaluation = self.evaluate_recall self.logger_and_select_best = self.logger_and_select_best_recall else: raise Exception('No other evaluation mode.') if mode == 'loading': # data loader with open(self.train_list, 'rb') as f: self.train_npy_list = pickle.load(f) self.sample_num = len(self.train_npy_list) if self.is_training: self.data_augmentor = DataAugmentor( 'NuScenes', workers_num=self.workers_num) elif mode == 'preprocessing': # preprocess raw data if img_list == 'train': self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-trainval') self.scenes = [ scene for scene in self.nusc.scene if scene['name'] in train_scene ] elif img_list == 'val': self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-trainval') self.scenes = [ scene for scene in self.nusc.scene if scene['name'] in val_scene ] else: # test self.nusc = NuScenes(dataroot=self.dataset_dir, version='v1.0-test') self.scenes = self.nusc.scene self.sample_data_token_list = OrderedDict() sample_num = 0 for scene in self.scenes: # static the sample num, and save all sample_data_token self.sample_data_token_list[scene['token']] = [] all_sample = self.nusc.field2token('sample', 'scene_token', scene['token']) sample_num += len(all_sample) for sample in all_sample: # all sample token sample = self.nusc.get('sample', sample) cur_token = sample['token'] cur_data_token = sample['data']['LIDAR_TOP'] self.sample_data_token_list[scene['token']].append( cur_data_token) self.sample_num = sample_num self.extents = cfg.DATASET.POINT_CLOUD_RANGE self.extents = np.reshape(self.extents, [3, 2]) if not os.path.exists(self.sv_npy_path): os.makedirs(self.sv_npy_path) # also calculate the mean size here self.cls_size_dict = dict([(cls, np.array([0, 0, 0], dtype=np.float32)) for cls in self.cls_list]) self.cls_num_dict = dict([(cls, 0) for cls in self.cls_list]) # the save path for MixupDB if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: self.mixup_db_cls_path = dict() self.mixup_db_trainlist_path = dict() self.mixup_db_class = cfg.TRAIN.AUGMENTATIONS.MIXUP.CLASS for cls in self.mixup_db_class: mixup_db_cls_path = os.path.join( cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, cfg.TRAIN.AUGMENTATIONS.MIXUP.SAVE_NUMPY_PATH, cfg.TRAIN.AUGMENTATIONS.MIXUP.PC_LIST, '{}'.format(cls)) mixup_db_trainlist_path = os.path.join( mixup_db_cls_path, 'train_list.txt') if not os.path.exists(mixup_db_cls_path): os.makedirs(mixup_db_cls_path) self.mixup_db_cls_path[cls] = mixup_db_cls_path self.mixup_db_trainlist_path[cls] = mixup_db_trainlist_path def __len__(self): return self.sample_num def load_samples(self, sample_idx, pipename): """ load data per thread """ pipename = int(pipename) biggest_label_num = 0 sample_dict = self.train_npy_list[sample_idx] points_path = sample_dict[maps_dict.KEY_POINT_CLOUD] sweeps = sample_dict[maps_dict.KEY_SWEEPS] sample_name = sample_dict[maps_dict.KEY_SAMPLE_NAME] cur_transformation_matrix = sample_dict[ maps_dict.KEY_TRANSFORMRATION_MATRIX] ts = sample_dict[maps_dict.KEY_TIMESTAMPS] / 1e6 # then first read points and stack points from multiple frame points = np.fromfile(points_path, dtype=np.float32) points = points.reshape((-1, 5)) points = cast_points_to_kitti(points) points[:, 3] /= 255 points[:, 4] = 0 sweep_points_list = [points] original_cur_sweep_points = points cur_sweep_points_num = points.shape[0] for sweep in sweeps: points_sweep = np.fromfile(sweep['lidar_path'], dtype=np.float32) points_sweep = points_sweep.reshape((-1, 5)) sweep_ts = sweep['timestamp'] / 1e6 points_sweep[:, 3] /= 255 points_sweep[:, :3] = points_sweep[:, :3] @ sweep[ 'sweep2lidar_rotation'].T points_sweep[:, :3] += sweep['sweep2lidar_translation'] points_sweep[:, 4] = ts - sweep_ts points_sweep = cast_points_to_kitti(points_sweep) sweep_points_list.append(points_sweep) if cfg.DATASET.NUSCENES.INPUT_FEATURE_CHANNEL == 4: points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]] else: points = np.concatenate(sweep_points_list, axis=0) # then read groundtruth file if have if self.is_training or cfg.TEST.WITH_GT: label_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D] label_boxes_3d = cast_box_3d_to_kitti_format(label_boxes_3d) label_classes_name = sample_dict[maps_dict.KEY_LABEL_CLASSES] label_classes = np.array([ self.cls2idx_dict[label_class] for label_class in label_classes_name ]) label_attributes = sample_dict[maps_dict.KEY_LABEL_ATTRIBUTES] label_velocity = sample_dict[ maps_dict.KEY_LABEL_VELOCITY] # [-1, 2] ry_cls_label, residual_angle = encode_angle2class_np( label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM) else: # not is_training and no_gt label_boxes_3d = np.zeros([1, 7], np.float32) label_classes = np.zeros([1], np.int32) label_attributes = np.zeros([1], np.int32) label_velocity = np.zeros([1, 2], np.float32) ry_cls_label = np.zeros([1], np.int32) residual_angle = np.zeros([1], np.float32) if self.is_training: # data augmentation points, label_boxes_3d, label_classes, label_attributes, label_velocity, cur_sweep_points_num = self.data_augmentor.nuscenes_forward( points, label_boxes_3d, label_classes, pipename, label_attributes, label_velocity, cur_sweep_points_num) ry_cls_label, residual_angle = encode_angle2class_np( label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM) cur_label_num = len(label_boxes_3d) # then randomly choose some points cur_sweep_points = points[:cur_sweep_points_num, :] # [-1, 4] other_sweep_points = points[cur_sweep_points_num:, :] # [-1, 4] if len(other_sweep_points) == 0: other_sweep_points = cur_sweep_points.copy() np.random.shuffle(cur_sweep_points) np.random.shuffle(other_sweep_points) input_sample_points, num_points_per_voxel = self.voxel_generator.generate_nusc( cur_sweep_points, other_sweep_points, cfg.DATASET.NUSCENE.MAX_CUR_SAMPLE_POINTS_NUM ) # points, [num_voxels, num_points, 5], sem_labels, [num_voxels, num_points] cur_sample_points = input_sample_points[:cfg.DATASET.NUSCENE. MAX_CUR_SAMPLE_POINTS_NUM] other_sample_points = input_sample_points[cfg.DATASET.NUSCENE. MAX_CUR_SAMPLE_POINTS_NUM:] biggest_label_num = max(biggest_label_num, cur_label_num) return biggest_label_num, input_sample_points, cur_sample_points, other_sample_points, label_boxes_3d, ry_cls_label, residual_angle, label_classes, label_attributes, label_velocity, sample_name, cur_transformation_matrix, sweeps, original_cur_sweep_points def load_batch(self, batch_size): perm = np.arange( self.sample_num).tolist() # a list indicates each data dp = DataFromList(perm, is_train=self.is_training, shuffle=self.is_training) dp = MultiProcessMapData(dp, self.load_samples, self.workers_num) use_list = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1] use_concat = [0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0] dp = BatchDataNuscenes(dp, batch_size, use_concat=use_concat, use_list=use_list) dp.reset_state() dp = dp.get_data() return dp # Preprocess data def preprocess_samples(self, cur_scene_key, sample_data_token): sample_dicts = [] biggest_label_num = 0 cur_sample_data = self.nusc.get('sample_data', sample_data_token) cur_sample_token = cur_sample_data['sample_token'] cur_sample = self.nusc.get('sample', cur_sample_token) ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token']) calibrated_sensor = self.nusc.get( 'calibrated_sensor', cur_sample_data['calibrated_sensor_token']) l2e_r = calibrated_sensor['rotation'] l2e_t = calibrated_sensor['translation'] e2g_r = ego_pose['rotation'] e2g_t = ego_pose['translation'] l2e_r_mat = Quaternion(l2e_r).rotation_matrix e2g_r_mat = Quaternion(e2g_r).rotation_matrix cur_timestamp = cur_sample['timestamp'] cur_transformation_matrix = { 'lidar2ego_translation': l2e_t, 'lidar2ego_rotation': l2e_r, 'ego2global_translation': e2g_t, 'ego2global_rotation': e2g_r, } # get point cloud in former 0.5 second sweeps = [] while len(sweeps) < self.max_sweeps: if not cur_sample_data['prev'] == '': # has next frame cur_sample_data = self.nusc.get('sample_data', cur_sample_data['prev']) cur_ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token']) cur_calibrated_sensor = self.nusc.get( 'calibrated_sensor', cur_sample_data['calibrated_sensor_token']) cur_lidar_path, cur_sweep_boxes, _ = self.nusc.get_sample_data( cur_sample_data['token']) sweep = { "lidar_path": cur_lidar_path, "sample_data_token": cur_sample_data['token'], "lidar2ego_translation": cur_calibrated_sensor['translation'], "lidar2ego_rotation": cur_calibrated_sensor['rotation'], "ego2global_translation": cur_ego_pose['translation'], "ego2global_rotation": cur_ego_pose['rotation'], "timestamp": cur_sample_data["timestamp"] } l2e_r_s = sweep["lidar2ego_rotation"] l2e_t_s = sweep["lidar2ego_translation"] e2g_r_s = sweep["ego2global_rotation"] e2g_t_s = sweep["ego2global_translation"] # sweep->ego->global->ego'->lidar l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T) T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T sweep["sweep2lidar_rotation"] = R.T # points @ R.T + T sweep["sweep2lidar_translation"] = T sweeps.append(sweep) else: # prev is none break # then load gt_boxes_3d if self.img_list in ['train', 'val'] and cfg.TEST.WITH_GT: cur_data_path, all_boxes, _ = self.nusc.get_sample_data( sample_data_token) # then first parse boxes labels locs = np.array([box.center for box in all_boxes]).reshape(-1, 3) sizes = np.array([box.wlh for box in all_boxes]).reshape(-1, 3) rots = np.array([ box.orientation.yaw_pitch_roll[0] for box in all_boxes ]).reshape(-1, 1) all_boxes_3d = np.concatenate([locs, sizes, -rots], axis=-1) annos_tokens = cur_sample['anns'] all_velocity = np.array([ self.nusc.box_velocity(ann_token)[:2] for ann_token in annos_tokens ]) # [-1, 2] for i in range(len(all_boxes)): velo = np.array([*all_velocity[i], 0.0]) velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T all_velocity[i] = velo[:2] # [-1, 2] attribute_tokens = [ self.nusc.get('sample_annotation', ann_token)['attribute_tokens'] for ann_token in annos_tokens ] all_attribute = [] for attribute_token in attribute_tokens: if len(attribute_token) == 0: all_attribute.append([]) else: all_attribute.append( self.nusc.get('attribute', attribute_token[0])['name']) # then filter these ignore labels categories = np.array([box.name for box in all_boxes]) if self.img_list == 'train': useful_idx = [ index for index, category in enumerate(categories) if self.useful_cls_dict[category] != 'ignore' ] else: useful_idx = [ index for index, category in enumerate(categories) ] if len(useful_idx) == 0: if self.img_list == 'train': return None, biggest_label_num else: all_boxes_3d = np.ones([1, 7], dtype=np.float32) all_boxes_classes = np.array(['ignore']) all_attribute = np.array([-1]) all_velocity = np.array([[0, 0]], dtype=np.float32) else: all_boxes_3d = all_boxes_3d[useful_idx] categories = categories[useful_idx] all_boxes_classes = np.array( [self.useful_cls_dict[cate] for cate in categories]) # now calculate the mean size of each box for tmp_idx, all_boxes_class in enumerate(all_boxes_classes): cur_mean_size = self.cls_size_dict[ all_boxes_class] * self.cls_num_dict[all_boxes_class] cur_cls_num = self.cls_num_dict[all_boxes_class] + 1 cur_total_size = cur_mean_size + all_boxes_3d[tmp_idx, [ 4, 5, 3 ]] # [l, w, h] cur_mean_size = cur_total_size / cur_cls_num self.cls_size_dict[all_boxes_class] = cur_mean_size self.cls_num_dict[all_boxes_class] = cur_cls_num all_attribute = [ all_attribute[tmp_idx] for tmp_idx in useful_idx ] tmp_attribute = [] for attr in all_attribute: if attr == []: tmp_attribute.append(-1) else: tmp_attribute.append(self.attribute_idx_list[attr]) all_attribute = tmp_attribute all_attribute = np.array(all_attribute, dtype=np.int32) all_velocity = [ all_velocity[tmp_idx] for tmp_idx in useful_idx ] all_velocity = np.array(all_velocity, dtype=np.float32) else: cur_data_path = self.nusc.get_sample_data_path(sample_data_token) # then generate the bev_maps if self.img_list in ['train', 'val', 'trainval'] and cfg.TEST.WITH_GT: sample_dict = { maps_dict.KEY_LABEL_BOXES_3D: all_boxes_3d, maps_dict.KEY_LABEL_CLASSES: all_boxes_classes, maps_dict.KEY_LABEL_ATTRIBUTES: all_attribute, maps_dict.KEY_LABEL_VELOCITY: all_velocity, maps_dict.KEY_LABEL_NUM: len(all_boxes_3d), maps_dict.KEY_POINT_CLOUD: cur_data_path, maps_dict.KEY_TRANSFORMRATION_MATRIX: cur_transformation_matrix, maps_dict.KEY_SAMPLE_NAME: '{}/{}/{}'.format(cur_scene_key, cur_sample_token, sample_data_token), maps_dict.KEY_SWEEPS: sweeps, maps_dict.KEY_TIMESTAMPS: cur_timestamp, } biggest_label_num = max(len(all_boxes_3d), biggest_label_num) else: # img_list is test sample_dict = { maps_dict.KEY_POINT_CLOUD: cur_data_path, maps_dict.KEY_SAMPLE_NAME: '{}/{}/{}'.format(cur_scene_key, cur_sample_token, sample_data_token), maps_dict.KEY_TRANSFORMRATION_MATRIX: cur_transformation_matrix, maps_dict.KEY_SWEEPS: sweeps, maps_dict.KEY_TIMESTAMPS: cur_timestamp, } return sample_dict, biggest_label_num def preprocess_batch(self): # if create_gt_dataset, then also create a boxes_numpy, saving all points if cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: # also save mixup database mixup_label_dict = dict([(cls, []) for cls in self.mixup_db_class]) sample_dicts_list = [] for scene_key, v in tqdm.tqdm(self.sample_data_token_list.items()): for sample_data_token in v: sample_dict, tmp_biggest_label_num = self.preprocess_samples( scene_key, sample_data_token) if sample_dict is None: continue # else save the result sample_dicts_list.append(sample_dict) # create_gt_dataset if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: mixup_sample_dicts = self.generate_mixup_sample( sample_dict) if mixup_sample_dicts is None: continue for mixup_sample_dict in mixup_sample_dicts: cur_cls = mixup_sample_dict[ maps_dict.KEY_SAMPLED_GT_CLSES] mixup_label_dict[cur_cls].append(mixup_sample_dict) # save preprocessed data with open(self.train_list, 'wb') as f: pickle.dump(sample_dicts_list, f) for k, v in self.cls_num_dict.items(): print('class name: %s / class num: %d / mean size: (%f, %f, %f)' % (k, v, self.cls_size_dict[k][0], self.cls_size_dict[k][1], self.cls_size_dict[k][2])) # [l, w, h] if self.img_list in [ 'train', 'val', 'trainval' ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN: print('**** Generating groundtruth database ****') for cur_cls_name, mixup_sample_dict in mixup_label_dict.items(): cur_mixup_db_cls_path = self.mixup_db_cls_path[cur_cls_name] cur_mixup_db_trainlist_path = self.mixup_db_trainlist_path[ cur_cls_name] print('**** Class %s ****' % cur_cls_name) with open(cur_mixup_db_trainlist_path, 'w') as f: for tmp_idx, tmp_cur_mixup_sample_dict in tqdm.tqdm( enumerate(mixup_sample_dict)): f.write('%06d.npy\n' % tmp_idx) np.save( os.path.join(cur_mixup_db_cls_path, '%06d.npy' % tmp_idx), tmp_cur_mixup_sample_dict) print('Ending of the preprocess !!!') def generate_mixup_sample(self, sample_dict): """ This function is bound for generating mixup dataset """ all_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D] all_boxes_classes = sample_dict[maps_dict.KEY_LABEL_CLASSES] point_cloud_path = sample_dict[maps_dict.KEY_POINT_CLOUD] # then we first cast all_boxes_3d to kitti format all_boxes_3d = cast_box_3d_to_kitti_format(all_boxes_3d) # load points points = np.fromfile(point_cloud_path, dtype=np.float32).reshape( (-1, 5)) points = cast_points_to_kitti(points) points[:, 3] /= 255 points[:, 4] = 0 # timestamp is zero points_mask = check_inside_points(points, all_boxes_3d) # [pts_num, gt_num] points_masks_num = np.sum(points_masks, axis=0) # [gt_num] valid_box_idx = np.where( points_masks_num >= cfg.DATASET.MIN_POINTS_NUM)[0] if len(valid_box_idx) == 0: return None valid_label_boxes_3d = all_boxes_3d[valid_box_idx] valid_label_classes = all_boxes_classes[valid_box_idx] sample_dicts = [] for index, i in enumerate(valid_box_idx): cur_points_mask = points_mask[:, i] cur_points_idx = np.where(cur_points_mask)[0] cur_inside_points = points[cur_points_idx, :] sample_dict = { # 0 timestamp and /255 reflectance maps_dict.KEY_SAMPLED_GT_POINTS: cur_inside_points, # kitti format points maps_dict.KEY_SAMPLED_GT_LABELS_3D: valid_label_boxes_3d[index], maps_dict.KEY_SAMPLED_GT_CLSES: valid_label_classes[index], } sample_dicts.append(sample_dict) return sample_dicts # Evaluation def set_evaluation_tensor(self, model): # get prediction results, bs = 1 pred_bbox_3d = tf.squeeze(model.output[maps_dict.PRED_3D_BBOX][-1], axis=0) pred_cls_score = tf.squeeze(model.output[maps_dict.PRED_3D_SCORE][-1], axis=0) pred_cls_category = tf.squeeze( model.output[maps_dict.PRED_3D_CLS_CATEGORY][-1], axis=0) pred_list = [pred_bbox_3d, pred_cls_score, pred_cls_category] if len(model.output[maps_dict.PRED_3D_ATTRIBUTE]) > 0: pred_attribute = tf.squeeze( model.output[maps_dict.PRED_3D_ATTRIBUTE][-1], axis=0) pred_velocity = tf.squeeze( model.output[maps_dict.PRED_3D_VELOCITY][-1], axis=0) pred_list.extend([pred_attribute, pred_velocity]) return pred_list def evaluate_map(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): submissions = {} submissions['meta'] = dict() submissions['meta']['use_camera'] = False submissions['meta']['use_lidar'] = True submissions['meta']['use_radar'] = False submissions['meta']['use_map'] = False submissions['meta']['use_external'] = False submissions_results = dict() pred_attr_velo = (len(pred_list) == 5) for i in tqdm.tqdm(range(val_size)): feed_dict = feeddict_producer.create_feed_dict() if pred_attr_velo: pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, pred_attr_op, pred_velo_op = sess.run( pred_list, feed_dict=feed_dict) else: pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op = sess.run( pred_list, feed_dict=feed_dict) pred_cls_category_op += 1 # label from 1 to n sample_name, cur_transformation_matrix, sweeps = feeddict_producer.info sample_name = sample_name[0] cur_transformation_matrix = cur_transformation_matrix[0] sweeps = sweeps[0] cur_scene_key, cur_sample_token, cur_sample_data_token = sample_name.split( '/') select_idx = np.where(pred_cls_score_op >= cls_thresh)[0] pred_cls_score_op = pred_cls_score_op[select_idx] pred_cls_category_op = pred_cls_category_op[select_idx] pred_bbox_3d_op = pred_bbox_3d_op[select_idx] if pred_attr_velo: pred_attr_op = pred_attr_op[select_idx] pred_velo_op = pred_velo_op[select_idx] else: pred_attr_op, pred_velo_op = None, None if len(pred_bbox_3d_op) > 500: arg_sort_idx = np.argsort(pred_cls_score_op)[::-1] arg_sort_idx = arg_sort_idx[:500] pred_cls_score_op = pred_cls_score_op[arg_sort_idx] pred_cls_category_op = pred_cls_category_op[arg_sort_idx] pred_bbox_3d_op = pred_bbox_3d_op[arg_sort_idx] if pred_attr_velo: pred_attr_op = pred_attr_op[arg_sort_idx] pred_velo_op = pred_velo_op[arg_sort_idx] # then transform pred_bbox_op to nuscenes_box boxes = cast_kitti_format_to_nusc_box_3d( pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, cur_attribute=pred_attr_op, cur_velocity=pred_velo_op, classes=self.idx2cls_dict) for box in boxes: velocity = box.velocity[:2].tolist() if len(sweeps) == 0: velocity = (np.nan, np.nan) box.velocity = np.array([*velocity, 0.0]) # then cast the box from ego to global boxes = _lidar_nusc_box_to_global(cur_transformation_matrix, boxes, self.idx2cls_dict, eval_version='cvpr_2019') annos = [] for box in boxes: name = self.idx2cls_dict[box.label] if box.name == -1: attr = self.DefaultAttribute[name] else: attr = self.AttributeIdxLabelMapping[name][box.name] velocity = box.velocity[:2].tolist() nusc_anno = { "sample_token": cur_sample_token, "translation": box.center.tolist(), "size": box.wlh.tolist(), "rotation": box.orientation.elements.tolist(), "velocity": velocity, "detection_name": name, "detection_score": box.score, "attribute_name": attr, } annos.append(nusc_anno) submissions_results[info['sample_token']] = annos submissions['results'] = submissions_results res_path = os.path.join(log_dir, "results_nusc_1.json") with open(res_path, "w") as f: json.dump(submissions, f) eval_main_file = os.path.join(cfg.ROOT_DIR, 'lib/core/nusc_eval.py') root_path = self.dataset_dir cmd = f"python3 {str(eval_main_file)} --root_path=\"{str(root_path)}\"" cmd += f" --version={'v1.0-trainval'} --eval_version={'cvpr_2019'}" cmd += f" --res_path=\"{str(res_path)}\" --eval_set={'val'}" cmd += f" --output_dir=\"{LOG_FOUT_DIR}\"" # use subprocess can release all nusc memory after evaluation subprocess.check_output(cmd, shell=True) os.system('rm \"%s\"' % res_path) # remove former result file with open(os.path.join(log_dir, "metrics_summary.json"), "r") as f: metrics = json.load(f) return metrics def evaluate_recall(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): pass def logger_and_select_best_map(self, metrics, log_string): detail = {} result = f"Nusc v1.0-trainval Evaluation\n" final_score = [] for name in self.cls_list: detail[name] = {} for k, v in metrics["label_aps"][name].items(): detail[name][f"dist@{k}"] = v tp_errs = [] tp_names = [] for k, v in metrics["label_tp_errors"][name].items(): detail[name][k] = v tp_errs.append(f"{v:.4f}") tp_names.append(k) threshs = ', '.join(list(metrics["label_aps"][name].keys())) scores = list(metrics["label_aps"][name].values()) final_score.append(np.mean(scores)) scores = ', '.join([f"{s * 100:.2f}" for s in scores]) result += f"{name} Nusc dist AP@{threshs} and TP errors\n" result += scores result += "\n" result += "mAP: %0.2f\n" % ( np.mean(list(metrics["label_aps"][name].values())) * 100) result += ', '.join(tp_names) + ": " + ', '.join(tp_errs) result += "\n" result += 'NDS score: %0.2f\n' % (metrics['nd_score'] * 100) log_string(result) cur_result = metrics['nd_score'] return cur_result def logger_and_select_best_recall(self, metrics, log_string): pass # save prediction results def save_predictions(self, sess, feeddict_producer, pred_list, val_size, cls_thresh, log_dir, placeholders=None): pass
class LogConverter: __radar_reader = scan_readers.RadarScanReader() __lidar_reader = scan_readers.LidarScanReader() __vision_reader = scan_readers.VisionScanReader() __host_data_reader = scan_readers.HostDataReader() def __init__(self, data_set_name, data_set_path, output_path=''): self.__scan_data = {} self.__writers = {} if os.path.exists(data_set_path): self.__nusc = NuScenes(version=data_set_name, dataroot=data_set_path, verbose=True) else: print('Given path: {}, does not exist'.format(data_set_path)) self.__output_path = output_path if not (self.__output_path == ''): if not os.path.exists(self.__output_path): os.makedirs(self.__output_path) def convertAllScenes(self): for scene in self.__nusc.scene: self.__scene_name = scene['name'] self.convertScene(scene) def convertScene(self, scene): first_sample_token = scene['first_sample_token'] sample = self.__nusc.get('sample', first_sample_token) while sample['next']: sample = self.__nusc.get('sample', sample['next']) for sensor in sample['data']: token = sample['data'][sensor] self.__sensor_data = self.__nusc.get('sample_data', token) self.readHostData() self.readSensorSampleData(sensor, token) self.__writeScan() self.__writeScene() def readHostData(self): ego_pose_raw = self.__nusc.get('ego_pose', self.__sensor_data['ego_pose_token']) self.__scan_data['HOST_DATA'] = self.__host_data_reader.readHostData( ego_pose_raw) def readSensorSampleData(self, sensor, token): data_file_path = self.__nusc.dataroot + '/' + self.__sensor_data[ 'filename'] calibrated_sensor_raw = self.__nusc.get( 'calibrated_sensor', self.__sensor_data['calibrated_sensor_token']) ts = self.__sensor_data['timestamp'] if 'RADAR' in sensor: self.__scan_data[sensor] = self.__radar_reader.readScan( data_file_path, ts, calibrated_sensor_raw) elif 'LIDAR' in sensor: self.__scan_data[sensor] = self.__lidar_reader.readScan( data_file_path, ts, calibrated_sensor_raw) elif 'CAM' in sensor: _, boxes, _ = self.__nusc.get_sample_data(token) self.__scan_data[sensor] = self.__vision_reader.readScan( data_file_path, ts, calibrated_sensor_raw, boxes) else: print('Sensor unknown') self.__scan_data = None def __writeScan(self): for sensor_name, sensor_data in self.__scan_data.items(): if not (sensor_name in self.__writers): self.__writers[sensor_name] = self.__createWriter(sensor_name) self.__writers[sensor_name].writeScan(sensor_data) def __createWriter(self, sensor_key): writer = None ext = None if 'RADAR' in sensor_key: writer = writers.ProtobufSceneWriter('radar') ext = '.rad' elif 'LIDAR' in sensor_key: writer = writers.ProtobufSceneWriter('lidar') ext = '.lid' elif 'CAM' in sensor_key: writer = writers.ProtobufSceneWriter('camera') ext = '.cam' elif 'HOST' in sensor_key: writer = writers.ProtobufSceneWriter('host') ext = '.host' else: writer = None if writer: file_name = self.__scene_name + '_' + sensor_key + ext writer.createFile(self.__createFilePath(file_name)) return writer def __createFilePath(self, file_name): if self.__output_path == '': return file_name else: return self.__output_path + '/' + file_name def __writeScene(self): for writer in self.__writers: self.__writers[writer].closeFile() self.__writers = {}
def create_dataset(dataroot, save_dir, width=672, height=672, grid_range=70., nusc_version='v1.0-mini', use_constant_feature=False, use_intensity_feature=True, end_id=None, augmentation_num=0, add_noise=False): """Create a learning dataset from Nuscens Parameters ---------- dataroot : str Nuscenes dataroot path. save_dir : str Dataset save directory. width : int, optional feature map width, by default 672 height : int, optional feature map height, by default 672 grid_range : float, optional feature map range, by default 70. nusc_version : str, optional Nuscenes version. v1.0-mini or v1.0-trainval, by default 'v1.0-mini' use_constant_feature : bool, optional Whether to use constant feature, by default False use_intensity_feature : bool, optional Whether to use intensity feature, by default True end_id : int, optional How many data to generate. If None, all data, by default None augmentation_num : int, optional How many data augmentations for one sample, by default 0 add_noise : bool, optional Whether to add noise to pointcloud, by default True Raises ------ Exception Width and height are not equal """ os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True) os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True) nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True) ref_chan = 'LIDAR_TOP' if width == height: size = width else: raise Exception( 'Currently only supported if width and height are equal') grid_length = 2. * grid_range / size z_trans_range = 0.5 sample_id = 0 data_id = 0 grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length) grid_centers \ = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1] for my_scene in nusc.scene: first_sample_token = my_scene['first_sample_token'] token = first_sample_token # try: while (token != ''): print('sample:{} {} created_data={}'.format( sample_id, token, data_id)) my_sample = nusc.get('sample', token) sd_record = nusc.get('sample_data', my_sample['data'][ref_chan]) sample_rec = nusc.get('sample', sd_record['sample_token']) chan = sd_record['channel'] pc_raw, _ = LidarPointCloud.from_file_multisweep(nusc, sample_rec, chan, ref_chan, nsweeps=1) _, boxes_raw, _ = nusc.get_sample_data(sd_record['token'], box_vis_level=0) z_trans = 0 q = Quaternion() for augmentation_idx in range(augmentation_num + 1): pc = copy.copy(pc_raw) if add_noise: pc.points = add_noise_points(pc.points.T).T boxes = copy.copy(boxes_raw) if augmentation_idx > 0: z_trans = (np.random.rand() - 0.5) * 2 * z_trans_range pc.translate([0, 0, z_trans]) z_rad = np.random.rand() * np.pi * 2 q = Quaternion(axis=[0, 0, 1], radians=z_rad) pc.rotate(q.rotation_matrix) pc_points = pc.points.astype(np.float32) out_feature = np.zeros((size, size, 8), dtype=np.float32) for box_idx, box in enumerate(boxes): if augmentation_idx > 0: box.translate([0, 0, z_trans]) box.rotate(q) label = 0 if box.name.split('.')[0] == 'vehicle': if box.name.split('.')[1] == 'car': label = 1 elif box.name.split('.')[1] == 'bus': label = 2 elif box.name.split('.')[1] == 'truck': label = 2 elif box.name.split('.')[1] == 'construction': label = 2 elif box.name.split('.')[1] == 'emergency': label = 2 elif box.name.split('.')[1] == 'trailer': label = 2 elif box.name.split('.')[1] == 'bicycle': label = 3 elif box.name.split('.')[1] == 'motorcycle': label = 3 elif box.name.split('.')[0] == 'human': label = 4 # elif box.name.split('.')[0] == 'movable_object': # label = 1 # elif box.name.split('.')[0] == 'static_object': # label = 1 else: continue height_pt = np.linalg.norm(box.corners().T[0] - box.corners().T[3]) box_corners = box.corners().astype(np.float32) corners2d = box_corners[:2, :] box2d = corners2d.T[[2, 3, 7, 6]] box2d_center = box2d.mean(axis=0) yaw, pitch, roll = box.orientation.yaw_pitch_roll out_feature = generate_out_feature(size, grid_centers, box_corners, box2d, box2d_center, pc_points, height_pt, label, yaw, out_feature) # feature_generator = fg.FeatureGenerator( # grid_range, width, height, # use_constant_feature, use_intensity_feature) feature_generator = fgpb.FeatureGenerator( grid_range, size, size) in_feature = feature_generator.generate( pc_points.T, use_constant_feature, use_intensity_feature) if use_constant_feature and use_intensity_feature: channels = 8 elif use_constant_feature or use_intensity_feature: channels = 6 else: channels = 4 in_feature = np.array(in_feature).reshape( channels, size, size).astype(np.float16) in_feature = in_feature.transpose(1, 2, 0) np.save( os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)), in_feature) np.save( os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)), out_feature) token = my_sample['next'] data_id += 1 if data_id == end_id: return sample_id += 1
def test(estimator=None): if args.Joint: nusc = NuScenes( version='v1.0-trainval', verbose=True, dataroot=args.nuscenes_root, ) dataset_image_folder_format = os.path.join( args.nuscenes_data_root, args.type+'/'+'{}/img1', ) detection_file_name_format = os.path.join( args.nuscenes_data_root, args.type+'/'+'{}/gt/gt.txt', ) if not os.path.exists(args.log_folder): os.mkdir(args.log_folder) save_folder = args.log_folder if not os.path.exists(save_folder): os.mkdir(save_folder) saved_file_name_format = os.path.join(save_folder, 'Video'+'{}.txt') save_video_name_format = os.path.join(save_folder, 'Video'+'{}.avi') test_dataset_index = os.listdir( os.path.join(args.nuscenes_data_root, args.type), ) def f(format_str): return [ format_str.format(index) for index in test_dataset_index ] for image_folder, detection_file_name, saved_file_name, save_video_name in zip(f(dataset_image_folder_format), f(detection_file_name_format), f(saved_file_name_format), f(save_video_name_format)): if path.exists(image_folder) and os.path.getsize(detection_file_name) > 0: tracker = Tracker(estimator) reader = DataReader( image_folder=image_folder, detection_file_name=detection_file_name, ) result = list() first_run = True for i, item in enumerate(reader): if i > len(reader): break if item is None: print('item is none') continue img = item[0] det = item[1] if img is None or det is None or len(det) == 0: continue if len(det) > config['max_object']: det = det[:config['max_object'], :] h, w, _ = img.shape if first_run: vw = cv2.VideoWriter( save_video_name, cv2.VideoWriter_fourcc( 'M', 'J', 'P', 'G', ), 10, (w, h), ) first_run = False features = det[:, 6:12].astype(float) tokens = det[:, 12:] if args.Joint: tokens = tokens[0][1:] frame_token = tokens[0] first_frame_token = tokens[1] current_cam_record = nusc.get('sample_data', frame_token) current_cam_path = nusc.get_sample_data_path(frame_token) current_cam_path, boxes, current_camera_intrinsic = nusc.get_sample_data( current_cam_record['token'], ) current_cs_record = nusc.get( 'calibrated_sensor', current_cam_record['calibrated_sensor_token'], ) current_poserecord = nusc.get( 'ego_pose', current_cam_record['ego_pose_token'], ) first_cam_record = nusc.get( 'sample_data', first_frame_token, ) first_cam_path = nusc.get_sample_data_path( first_frame_token, ) first_cam_path, boxes, first_camera_intrinsic = nusc.get_sample_data( first_cam_record['token'], ) first_cs_record = nusc.get( 'calibrated_sensor', first_cam_record['calibrated_sensor_token'], ) first_poserecord = nusc.get( 'ego_pose', first_cam_record['ego_pose_token'], ) det[:, [2, 4]] /= float(w) det[:, [3, 5]] /= float(h) if args.Joint: image_org = tracker.update( args.Joint, img, det[ :, 2:6 ], args.show_image, i, features, tokens, False, current_cs_record, current_poserecord, first_cs_record, first_poserecord, first_camera_intrinsic, ) else: image_org = tracker.update( args.Joint, img, det[:, 2:6], args.show_image, i, ) vw.write(image_org) # save result for t in tracker.tracks: n = t.nodes[-1] if t.age == 1: b = n.get_box(tracker.frame_index-1, tracker.recorder) # print('n.pose ',n.pose.cpu().data.numpy()) if args.Joint: result.append( [i] + [t.id] + [ b[0]*w, b[1]*h, b[2]*w, b[3] * h, ] + list(n.pose.cpu().data.numpy()), ) else: result.append( [i] + [t.id] + [b[0]*w, b[1]*h, b[2]*w, b[3]*h], ) # save data np.savetxt(saved_file_name, np.array(result), fmt='%10f')
def create_annotations(self, version, max_sweeps): from nuscenes.nuscenes import NuScenes nusc = NuScenes(version=version, dataroot=self.cfg.DATA.ROOTDIR, verbose=True) from nuscenes.utils import splits available_vers = ["v1.0-trainval", "v1.0-test", "v1.0-mini"] assert version in available_vers if version == "v1.0-trainval": train_scenes = splits.train val_scenes = splits.val elif version == "v1.0-test": train_scenes = splits.test val_scenes = [] elif version == "v1.0-mini": train_scenes = splits.mini_train val_scenes = splits.mini_val else: raise ValueError("unknown") root_path = Path(self.cfg.DATA.ROOTDIR) available_scenes = self._get_available_scenes(nusc) available_scene_names = [s["name"] for s in available_scenes] train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes)) val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes)) train_scenes = set([ available_scenes[available_scene_names.index(s)]["token"] for s in train_scenes ]) val_scenes = set([ available_scenes[available_scene_names.index(s)]["token"] for s in val_scenes ]) print( f"train scene: {len(train_scenes)}, val scene: {len(val_scenes)}") self.train_infos = dict() self.val_infos = dict() train_index = 0 val_index = 0 for sample in tqdm(nusc.sample, desc="Generating train infos..."): lidar_token = sample["data"]["LIDAR_TOP"] sd_rec = nusc.get('sample_data', lidar_token) cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token']) lidar_path, boxes, _ = nusc.get_sample_data(lidar_token) assert Path(lidar_path).exists(), ("some lidar file miss...+_+") item = { "lidar_path": lidar_path, "token": sample["token"], "sweeps": [], "calib": cs_record, "ego_pose": pose_record, "timestamp": sample["timestamp"] } l2e_t = cs_record['translation'] l2e_r = cs_record['rotation'] e2g_t = pose_record['translation'] e2g_r = pose_record['rotation'] l2e_r_mat = Quaternion(l2e_r).rotation_matrix e2g_r_mat = Quaternion(e2g_r).rotation_matrix sweeps = [] while len(sweeps) < max_sweeps: if not sd_rec['prev'] == "": sd_rec = nusc.get('sample_data', sd_rec['prev']) cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token']) lidar_path = nusc.get_sample_data_path(sd_rec['token']) sweep = { "lidar_path": lidar_path, "sample_data_token": sd_rec['token'], "timestamp": sd_rec["timestamp"] } l2e_r_s = cs_record["rotation"] l2e_t_s = cs_record["translation"] e2g_r_s = pose_record["rotation"] e2g_t_s = pose_record["translation"] ## sweep->ego->global->ego'->lidar l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T ) T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ ( np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T ) T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T sweep["sweep2lidar_rotation"] = R.T sweep["sweep2lidar_translation"] = T sweeps.append(sweep) else: break item["sweeps"] = sweeps annotations = [ nusc.get('sample_annotation', token) for token in sample['anns'] ] locs = np.array([b.center for b in boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in boxes]).reshape(-1, 3) rots = np.array([b.orientation.yaw_pitch_roll[0] for b in boxes]).reshape(-1, 1) names = [b.name for b in boxes] class_idx = [] for i in range(len(names)): if names[i] in NuscenesDataset.NameMapping: names[i] = NuscenesDataset.NameMapping[names[i]] class_idx.append(nuscenes_class_name_to_idx(names[i])) names = np.array(names) class_idx = np.array(class_idx) gt_boxes = np.concatenate([locs, dims, rots], axis=1) ## In Nuscenes Dataset, need to filter some rare class mask = np.array([NuscenesDataset.Nuscenes_classes.count(s) > 0 for s in names], dtype=np.bool_) gt_boxes = gt_boxes[mask] names = names[mask] # assert len(gt_boxes) == len( # annotations), f"{len(gt_boxes)}, {len(annotations)}." assert len(gt_boxes) == len(names), f"{len(gt_boxes)}, {len(names)}" assert len(gt_boxes) == len(class_idx), f"{len(gt_boxes)}, {len(class_idx)}" item["boxes"] = gt_boxes item["class_idx"] = class_idx item["names"] = names item["num_lidar_pts"] = np.array( [a["num_lidar_pts"] for a in annotations] ) item["num_radar_pts"] = np.array( [a["num_radar_pts"] for a in annotations] ) if sample["scene_token"] in train_scenes: self.train_infos[train_index] = item train_index += 1 else: self.val_infos[val_index] = item val_index += 1
def create_dataset(dataroot, save_dir, pretrained_model, width=672, height=672, grid_range=70., nusc_version='v1.0-mini', use_constant_feature=True, use_intensity_feature=True, end_id=None): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') bcnn_model = BCNN(in_channels=6, n_class=5).to(device) # bcnn_model = torch.nn.DataParallel(bcnn_model) # multi gpu if os.path.exists(pretrained_model): print('Use pretrained model') bcnn_model.load_state_dict( fix_model_state_dict(torch.load(pretrained_model))) bcnn_model.eval() else: return os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True) os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True) os.makedirs(os.path.join(save_dir, 'inference_feature'), exist_ok=True) nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True) ref_chan = 'LIDAR_TOP' if width == height: size = width else: raise Exception( 'Currently only supported if width and height are equal') grid_length = 2. * grid_range / size label_half_length = 0 data_id = 0 grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length) grid_centers \ = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1] for my_scene in nusc.scene: first_sample_token = my_scene['first_sample_token'] token = first_sample_token # try: while (token != ''): print('--- {} '.format(data_id) + token + ' ---') my_sample = nusc.get('sample', token) sd_record = nusc.get('sample_data', my_sample['data'][ref_chan]) sample_rec = nusc.get('sample', sd_record['sample_token']) chan = sd_record['channel'] pc, times = LidarPointCloud.from_file_multisweep(nusc, sample_rec, chan, ref_chan, nsweeps=1) _, boxes, _ = nusc.get_sample_data(sd_record['token'], box_vis_level=0) out_feature = np.zeros((size, size, 8), dtype=np.float32) for box_idx, box in enumerate(boxes): label = 0 if box.name.split('.')[0] == 'vehicle': if box.name.split('.')[1] == 'car': label = 1 elif box.name.split('.')[1] == 'bus': label = 1 elif box.name.split('.')[1] == 'truck': label = 1 elif box.name.split('.')[1] == 'construction': label = 1 elif box.name.split('.')[1] == 'emergency': label = 1 elif box.name.split('.')[1] == 'trailer': label = 1 elif box.name.split('.')[1] == 'bicycle': label = 2 elif box.name.split('.')[1] == 'motorcycle': label = 2 elif box.name.split('.')[0] == 'human': label = 3 # elif box.name.split('.')[0] == 'movable_object': # label = 1 # elif box.name.split('.')[0] == 'static_object': # label = 1 else: continue height_pt = np.linalg.norm(box.corners().T[0] - box.corners().T[3]) corners2d = box.corners()[:2, :] box2d = corners2d.T[[2, 3, 7, 6]] box2d_center = box2d.mean(axis=0) yaw, pitch, roll = box.orientation.yaw_pitch_roll generate_out_feature(width, height, size, grid_centers, box2d, box2d_center, height_pt, label, label_half_length, yaw, out_feature) # import pdb; pdb.set_trace() out_feature = out_feature.astype(np.float16) in_feature_generator = Feature_generator(grid_range, width, height, use_constant_feature, use_intensity_feature) in_feature_generator.generate(pc.points.T) in_feature = in_feature_generator.feature if use_constant_feature and use_intensity_feature: in_feature = in_feature.reshape(size, size, 8) elif use_constant_feature or use_intensity_feature: in_feature = in_feature.reshape(size, size, 6) else: in_feature = in_feature.reshape(size, size, 4) # instance_pt is flipped due to flip # out_feature = np.flip(np.flip(out_feature, axis=0), axis=1) # out_feature[:, :, 1:3] *= -1 np.save(os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)), in_feature) np.save( os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)), out_feature) # inference feature # in_feature = torch.from_numpy(in_feature) in_feature = in_feature.astype(np.float32) transform = transforms.Compose([transforms.ToTensor()]) in_feature = transform(in_feature) in_feature = in_feature.to(device) in_feature = torch.unsqueeze(in_feature, 0) print(in_feature.size()) inference_feature = bcnn_model(in_feature) np.save( os.path.join(save_dir, 'inference_feature/{:05}'.format(data_id)), inference_feature.cpu().detach().numpy()) token = my_sample['next'] data_id += 1 if data_id == end_id: return
def main(): SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val'] if not os.path.exists(OUT_PATH): os.mkdir(OUT_PATH) for split in SPLITS: data_path = DATA_PATH + '{}/'.format(SPLITS[split]) nusc = NuScenes( version=SPLITS[split], dataroot=data_path, verbose=True) out_path = OUT_PATH + '{}.json'.format(split) categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))] ret = {'images': [], 'annotations': [], 'categories': categories_info, 'videos': [], 'attributes': ATTRIBUTE_TO_ID} num_images = 0 num_anns = 0 num_videos = 0 # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR. for sample in nusc.sample: scene_name = nusc.get('scene', sample['scene_token'])['name'] if not (split in ['mini', 'test']) and \ not (scene_name in SCENE_SPLITS[split]): continue if sample['prev'] == '': print('scene_name', scene_name) num_videos += 1 ret['videos'].append({'id': num_videos, 'file_name': scene_name}) frame_ids = {k: 0 for k in sample['data']} track_ids = {} # We decompose a sample into 6 images in our case. for sensor_name in sample['data']: if sensor_name in USED_SENSOR: image_token = sample['data'][sensor_name] image_data = nusc.get('sample_data', image_token) num_images += 1 # Complex coordinate transform. This will take time to understand. sd_record = nusc.get('sample_data', image_token) cs_record = nusc.get( 'calibrated_sensor', sd_record['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_record['ego_pose_token']) global_from_car = transform_matrix(pose_record['translation'], Quaternion(pose_record['rotation']), inverse=False) car_from_sensor = transform_matrix( cs_record['translation'], Quaternion(cs_record['rotation']), inverse=False) trans_matrix = np.dot(global_from_car, car_from_sensor) _, boxes, camera_intrinsic = nusc.get_sample_data( image_token, box_vis_level=BoxVisibility.ANY) calib = np.eye(4, dtype=np.float32) calib[:3, :3] = camera_intrinsic calib = calib[:3] frame_ids[sensor_name] += 1 # image information in COCO format image_info = {'id': num_images, 'file_name': image_data['filename'], 'calib': calib.tolist(), 'video_id': num_videos, 'frame_id': frame_ids[sensor_name], 'sensor_id': SENSOR_ID[sensor_name], 'sample_token': sample['token'], 'trans_matrix': trans_matrix.tolist(), 'width': sd_record['width'], 'height': sd_record['height'], 'pose_record_trans': pose_record['translation'], 'pose_record_rot': pose_record['rotation'], 'cs_record_trans': cs_record['translation'], 'cs_record_rot': cs_record['rotation']} ret['images'].append(image_info) anns = [] for box in boxes: det_name = category_to_detection_name(box.name) if det_name is None: continue num_anns += 1 v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) box.translate(np.array([0, box.wlh[2] / 2, 0])) category_id = CAT_IDS[det_name] amodel_center = project_to_image( np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], np.float32).reshape(1, 3), calib)[0].tolist() sample_ann = nusc.get( 'sample_annotation', box.token) instance_token = sample_ann['instance_token'] if not (instance_token in track_ids): track_ids[instance_token] = len(track_ids) + 1 attribute_tokens = sample_ann['attribute_tokens'] attributes = [nusc.get('attribute', att_token)['name'] \ for att_token in attribute_tokens] att = '' if len(attributes) == 0 else attributes[0] if len(attributes) > 1: print(attributes) import pdb; pdb.set_trace() track_id = track_ids[instance_token] vel = nusc.box_velocity(box.token) # global frame vel = np.dot(np.linalg.inv(trans_matrix), np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist() # instance information in COCO format ann = { 'id': num_anns, 'image_id': num_images, 'category_id': category_id, 'dim': [box.wlh[2], box.wlh[0], box.wlh[1]], 'location': [box.center[0], box.center[1], box.center[2]], 'depth': box.center[2], 'occluded': 0, 'truncated': 0, 'rotation_y': yaw, 'amodel_center': amodel_center, 'iscrowd': 0, 'track_id': track_id, 'attributes': ATTRIBUTE_TO_ID[att], 'velocity': vel } bbox = KittiDB.project_kitti_box_to_image( copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900)) alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, camera_intrinsic[0, 2], camera_intrinsic[0, 0]) ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]] ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) ann['alpha'] = alpha anns.append(ann) # Filter out bounding boxes outside the image visable_anns = [] for i in range(len(anns)): vis = True for j in range(len(anns)): if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \ anns[j]['depth'] + max(anns[j]['dim']) / 2 and \ _bbox_inside(anns[i]['bbox'], anns[j]['bbox']): vis = False break if vis: visable_anns.append(anns[i]) else: pass for ann in visable_anns: ret['annotations'].append(ann) if DEBUG: img_path = data_path + image_info['file_name'] img = cv2.imread(img_path) img_3d = img.copy() for ann in visable_anns: bbox = ann['bbox'] cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), (0, 0, 255), 3, lineType=cv2.LINE_AA) box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y']) box_2d = project_to_image(box_3d, calib) img_3d = draw_box_3d(img_3d, box_2d) pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib) pt_3d[1] += ann['dim'][0] / 2 print('location', ann['location']) print('loc model', pt_3d) pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32) pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib) pt_3d[1] += ann['dim'][0] / 2 print('loc ', pt_3d) cv2.imshow('img', img) cv2.imshow('img_3d', img_3d) cv2.waitKey() nusc.render_sample_data(image_token) plt.show() print('reordering images') images = ret['images'] video_sensor_to_images = {} for image_info in images: tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id'] if tmp_seq_id in video_sensor_to_images: video_sensor_to_images[tmp_seq_id].append(image_info) else: video_sensor_to_images[tmp_seq_id] = [image_info] ret['images'] = [] for tmp_seq_id in sorted(video_sensor_to_images): ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id] print('{} {} images {} boxes'.format( split, len(ret['images']), len(ret['annotations']))) print('out_path', out_path) json.dump(ret, open(out_path, 'w'))
class BaseNuScenesDataset(DatasetTemplate): NameMapping = { 'movable_object.barrier': 'Barrier', 'vehicle.bicycle': 'Bicycle', 'vehicle.bus.bendy': 'Bus', 'vehicle.bus.rigid': 'Bus', 'vehicle.car': 'Car', 'vehicle.construction': 'Construction_vehicle', 'vehicle.motorcycle': 'Motorcycle', 'human.pedestrian.adult': 'Pedestrian', 'human.pedestrian.child': 'Pedestrian', 'human.pedestrian.construction_worker': 'Pedestrian', 'human.pedestrian.police_officer': 'Pedestrian', 'movable_object.trafficcone': 'Traffic_cone', 'vehicle.trailer': 'Trailer', 'vehicle.truck': 'Truck' } DefaultAttribute = { "car": "vehicle.parked", "pedestrian": "pedestrian.moving", "trailer": "vehicle.parked", "truck": "vehicle.parked", "bus": "vehicle.parked", "motorcycle": "cycle.without_rider", "construction_vehicle": "vehicle.parked", "bicycle": "cycle.without_rider", "barrier": "", "traffic_cone": "", } def __init__(self, root_path, split='train', init_nusc=True): super().__init__() self.root_path = root_path self.split = split if (init_nusc): self.nusc = NuScenes(version='v1.0-trainval', dataroot=root_path, verbose=True) splits = create_splits_scenes() split_scenes = splits[split] all_scene_names = [scene['name'] for scene in self.nusc.scene] split_scene_tokens = [ self.nusc.scene[all_scene_names.index(scene_name)]['token'] for scene_name in split_scenes ] self.sample_id_list = self.get_sample_tokens_from_scenes( split_scene_tokens) def get_sample_tokens_from_scenes(self, scene_tokens): """ :param scene_tokens: List of scene tokens :ret: List of sample tokens """ sample_tokens = [] for token in scene_tokens: sample_tokens.append( self.nusc.get('scene', token)['first_sample_token']) sample = self.nusc.get('sample', sample_tokens[-1]) while (sample['next'] != ''): sample_tokens.append(sample['next']) sample = self.nusc.get('sample', sample_tokens[-1]) return sample_tokens def set_split(self, split): self.__init__(self.root_path, split, False) def get_lidar(self, sample_token): lidar_token = self.nusc.get('sample', sample_token)['data']['LIDAR_TOP'] lidar_file = os.path.join( self.root_path, self.nusc.get('sample_data', lidar_token)['filename']) assert os.path.exists(lidar_file) points = np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4) points[:, 3] /= 255 #points[:, 4] = 0 return points def get_image_shape(self, sample_token): img_token = self.nusc.get('sample', sample_token)['data']['CAM_FRONT'] img_file = os.path.join( self.root_path, self.nusc.get('sample_data', img_token)['filename']) assert os.path.exists(img_file) return np.array(io.imread(img_file).shape[:2], dtype=np.int32) def get_label(self, sample_token, sensor='LIDAR_TOP'): sensor_token = self.nusc.get('sample', sample_token)['data'][sensor] data_path, boxes, camera_intrinsic = self.nusc.get_sample_data( sensor_token) return boxes def get_calib(self, sample_token): cam_token = self.nusc.get('sample', sample_token)['data']['CAM_FRONT'] lidar_token = self.nusc.get('sample', sample_token)['data']['LIDAR_TOP'] cam_calibrated_token = self.nusc.get( 'sample_data', cam_token)['calibrated_sensor_token'] lidar_calibrated_token = self.nusc.get( 'sample_data', lidar_token)['calibrated_sensor_token'] ego_pose_token = self.nusc.get('sample_data', cam_token)['ego_pose_token'] cam_calibrated = self.nusc.get('calibrated_sensor', cam_calibrated_token) lidar_calibrated = self.nusc.get('calibrated_sensor', lidar_calibrated_token) ego_pose = self.nusc.get('ego_pose', ego_pose_token) return nuscenes_calibration.Calibration(ego_pose, cam_calibrated, lidar_calibrated) def get_road_plane(self, sample_token): """ plane_file = os.path.join(self.root_path, 'planes', '%s.txt' % idx) with open(plane_file, 'r') as f: lines = f.readlines() lines = [float(i) for i in lines[3].split()] plane = np.asarray(lines) # Ensure normal is always facing up, this is in the rectified camera coordinate if plane[1] > 0: plane = -plane norm = np.linalg.norm(plane[0:3]) plane = plane / norm return plane """ # Currently unsupported in NuScenes raise NotImplementedError def get_annotation_from_label(self, calib, sample_token): box_list = self.get_label(sample_token, sensor='LIDAR_TOP') if (len(box_list) == 0): annotations = {} annotations['name'] = annotations['num_points_in_gt'] = annotations['gt_boxes_lidar'] = \ annotations['token'] = annotations['location'] = annotations['rotation_y'] = \ annotations['dimensions'] = annotations['score'] = annotations['difficulty'] = \ annotations['truncated'] = annotations['occluded'] = annotations['alpha'] = annotations['bbox'] = np.array([]) return None annotations = {} gt_names = np.array([ self.NameMapping[box.name] if box.name in self.NameMapping else 'DontCare' for box in box_list ]) num_points_in_gt = np.array([ self.nusc.get('sample_annotation', box.token)['num_lidar_pts'] for box in box_list ]) loc_lidar = np.array([box.center for box in box_list]) dims = np.array([box.wlh for box in box_list]) #loc_lidar[:,2] -= dims[:,2] / 2 # Translate true center to bottom middle coordinate rots = np.array( [box.orientation.yaw_pitch_roll[0] for box in box_list]) gt_boxes_lidar = np.concatenate( [loc_lidar, dims, rots[..., np.newaxis]], axis=1) annotations['name'] = gt_names annotations['num_points_in_gt'] = num_points_in_gt annotations['gt_boxes_lidar'] = gt_boxes_lidar annotations['token'] = np.array([box.token for box in box_list]) # in CAM_FRONT frame. Probably meaningless as most objects aren't in frame. annotations['location'] = calib.lidar_to_rect(loc_lidar) annotations['rotation_y'] = rots annotations['dimensions'] = np.array( [[box.wlh[1], box.wlh[2], box.wlh[0]] for box in box_list]) # lhw format occluded = np.zeros([num_points_in_gt.shape[0]]) easy_mask = num_points_in_gt > 15 moderate_mask = num_points_in_gt > 7 occluded = np.zeros([num_points_in_gt.shape[0]]) occluded[:] = 2 occluded[moderate_mask] = 1 occluded[easy_mask] = 0 gt_boxes_camera = box_utils.boxes3d_lidar_to_camera( gt_boxes_lidar, calib) assert len(gt_boxes_camera) == len(gt_boxes_lidar) == len(box_list) # Currently unused for NuScenes, and don't make too much since as we primarily use 360 degree 3d LIDAR boxes. annotations['score'] = np.array([1 for _ in box_list]) annotations['difficulty'] = np.array([0 for _ in box_list], np.int32) annotations['truncated'] = np.array([0 for _ in box_list]) annotations['occluded'] = occluded annotations['alpha'] = np.array([ -np.arctan2(-gt_boxes_lidar[i][1], gt_boxes_lidar[i][0]) + gt_boxes_camera[i][6] for i in range(len(gt_boxes_camera)) ]) annotations['bbox'] = gt_boxes_camera return annotations @staticmethod def get_fov_flag(pts_rect, img_shape, calib): ''' Valid point should be in the image (and in the PC_AREA_SCOPE) :param pts_rect: :param img_shape: :return: ''' pts_img, pts_rect_depth = calib.rect_to_img(pts_rect) val_flag_1 = np.logical_and(pts_img[:, 0] >= 0, pts_img[:, 0] < img_shape[1]) val_flag_2 = np.logical_and(pts_img[:, 1] >= 0, pts_img[:, 1] < img_shape[0]) val_flag_merge = np.logical_and(val_flag_1, val_flag_2) pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0) return pts_valid_flag def get_infos(self, num_workers=4, has_label=True, count_inside_pts=True, sample_id_list=None): import concurrent.futures as futures def process_single_scene(sample_token): print('%s sample_token: %s ' % (self.split, sample_token)) info = {} pc_info = {'num_features': 4, 'lidar_idx': sample_token} info['point_cloud'] = pc_info image_info = { 'image_idx': sample_token, 'image_shape': self.get_image_shape(sample_token) } info['image'] = image_info calib = self.get_calib(sample_token) calib_info = { 'T_IMG_CAM': calib.t_img_cam, 'T_CAR_CAM': calib.t_car_cam, 'T_CAR_LIDAR': calib.t_car_lidar, 'T_GLOBAL_CAR': calib.t_global_car } info['calib'] = calib_info if has_label: annotations = self.get_annotation_from_label( calib, sample_token) if (annotations == None): return None info['annos'] = annotations return info # temp = process_single_scene(self.sample_id_list[0]) sample_id_list = sample_id_list if sample_id_list is not None else self.sample_id_list with futures.ThreadPoolExecutor(num_workers) as executor: infos = executor.map(process_single_scene, sample_id_list) # Remove samples with no gt boxes infos = [sample for sample in infos if sample] return list(infos) def create_groundtruth_database(self, info_path=None, used_classes=None, split='train'): database_save_path = Path( self.root_path) / ('gt_database' if split == 'train' else ('gt_database_%s' % split)) db_info_save_path = Path( self.root_path) / ('nuscenes_dbinfos_%s.pkl' % split) database_save_path.mkdir(parents=True, exist_ok=True) all_db_infos = {} with open(info_path, 'rb') as f: infos = pickle.load(f) for k in range(len(infos)): print('gt_database sample: %d/%d' % (k + 1, len(infos))) info = infos[k] sample_idx = info['point_cloud']['lidar_idx'] points = self.get_lidar(sample_idx) annos = info['annos'] names = annos['name'] difficulty = annos['difficulty'] bbox = annos['bbox'] gt_boxes = annos['gt_boxes_lidar'] if (len(gt_boxes) == 0): continue num_obj = gt_boxes.shape[0] point_indices = roiaware_pool3d_utils.points_in_boxes_cpu( torch.from_numpy(points[:, 0:3]), torch.from_numpy(gt_boxes)).numpy() # (nboxes, npoints) for i in range(num_obj): filename = '%s_%s_%d.bin' % (sample_idx, names[i], i) filepath = database_save_path / filename gt_points = points[point_indices[i] > 0] gt_points[:, :3] -= gt_boxes[i, :3] with open(filepath, 'w') as f: gt_points.tofile(f) if (used_classes is None) or names[i] in used_classes: db_path = str(filepath.relative_to( self.root_path)) # gt_database/xxxxx.bin db_info = { 'name': names[i], 'path': db_path, 'image_idx': sample_idx, 'gt_idx': i, 'box3d_lidar': gt_boxes[i], 'num_points_in_gt': gt_points.shape[0], 'difficulty': difficulty[i], 'bbox': bbox[i], 'score': annos['score'][i] } if names[i] in all_db_infos: all_db_infos[names[i]].append(db_info) else: all_db_infos[names[i]] = [db_info] for k, v in all_db_infos.items(): print('Database %s: %d' % (k, len(v))) with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f) @staticmethod def generate_prediction_dict(input_dict, index, record_dict): # finally generate predictions. sample_idx = input_dict['sample_idx'][ index] if 'sample_idx' in input_dict else -1 boxes3d_lidar_preds = record_dict['boxes'].cpu().numpy() if boxes3d_lidar_preds.shape[0] == 0: return {'sample_idx': sample_idx} calib = input_dict['calib'][index] image_shape = input_dict['image_shape'][index] boxes3d_camera_preds = box_utils.boxes3d_lidar_to_camera( boxes3d_lidar_preds, calib) boxes2d_image_preds = box_utils.boxes3d_camera_to_imageboxes( boxes3d_camera_preds, calib, image_shape=image_shape) # predictions predictions_dict = { 'bbox': boxes2d_image_preds, 'box3d_camera': boxes3d_camera_preds, 'box3d_lidar': boxes3d_lidar_preds, 'scores': record_dict['scores'].cpu().numpy(), 'label_preds': record_dict['labels'].cpu().numpy(), 'sample_idx': sample_idx, } return predictions_dict @staticmethod def generate_annotations(input_dict, pred_dicts, class_names, save_to_file=False, output_dir=None): def get_empty_prediction(): ret_dict = { 'name': np.array([]), 'truncated': np.array([]), 'occluded': np.array([]), 'alpha': np.array([]), 'bbox': np.zeros([0, 4]), 'dimensions': np.zeros([0, 3]), 'location': np.zeros([0, 3]), 'rotation_y': np.array([]), 'score': np.array([]), 'boxes_lidar': np.zeros([0, 7]) } return ret_dict def generate_single_anno(idx, box_dict): num_example = 0 if 'bbox' not in box_dict: return get_empty_prediction(), num_example sample_idx = box_dict['sample_idx'] box_preds_image = box_dict['bbox'] box_preds_camera = box_dict['box3d_camera'] box_preds_lidar = box_dict['box3d_lidar'] scores = box_dict['scores'] label_preds = box_dict['label_preds'] anno = { 'name': [], 'truncated': [], 'occluded': [], 'alpha': [], 'bbox': [], 'dimensions': [], 'location': [], 'rotation_y': [], 'score': [], 'boxes_lidar': [] } for box_camera, box_lidar, bbox, score, label in zip( box_preds_camera, box_preds_lidar, box_preds_image, scores, label_preds): if not (np.all(box_lidar[3:6] > -0.1)): print('Invalid size(sample %s): ' % str(sample_idx), box_lidar) continue anno['name'].append(class_names[int(label - 1)]) anno['truncated'].append(0.0) anno['occluded'].append(0) anno['alpha'].append(-np.arctan2(-box_lidar[1], box_lidar[0]) + box_camera[6]) anno['bbox'].append(bbox) anno['dimensions'].append(box_camera[3:6]) anno['location'].append(box_camera[:3]) anno['rotation_y'].append(box_camera[6]) anno['score'].append(score) anno['boxes_lidar'].append(box_lidar) num_example += 1 if num_example != 0: anno = {k: np.stack(v) for k, v in anno.items()} else: anno = get_empty_prediction() return anno, num_example annos = [] for i, box_dict in enumerate(pred_dicts): sample_idx = box_dict['sample_idx'] single_anno, num_example = generate_single_anno(i, box_dict) single_anno['num_example'] = num_example single_anno['sample_idx'] = np.array([sample_idx] * num_example) annos.append(single_anno) if save_to_file: cur_det_file = os.path.join(output_dir, '%s.json' % (sample_idx)) boxes_lidar = single_anno['boxes_lidar'] # x y z w l h yaw pred_json = {} pred_json['cuboids'] = [] for idx in range(len(boxes_lidar)): data['cuboids'].append({ 'label': single_anno['name'][idx], 'position': { 'x': boxes_lidar[idx][0], 'y': boxes_lidar[idx][1], 'z': boxes_lidar[idx][2], }, 'dimension': { 'x': boxes_lidar[idx][3], 'y': boxes_lidar[idx][4], 'z': boxes_lidar[idx][5], }, "yaw": boxes_lidar[idx][6], "score": single_anno['score'][idx] }) with open(cur_det_file, 'w') as f: json.dump(pred_json, f) return annos def evaluation(self, det_annos, class_names, **kwargs): eval_det_annos = copy.deepcopy(det_annos) # Create NuScenes JSON output file nusc_annos = {} for sample in eval_det_annos: try: sample_idx = sample['sample_idx'][0] except: continue sample_results = [] calib = self.get_calib(sample_idx) sample['boxes_lidar'] = np.array(sample['boxes_lidar']) positions = sample['boxes_lidar'][:, :3] dimensions = sample['boxes_lidar'][:, 3:6] rotations = sample['boxes_lidar'][:, 6] for center, dimension, yaw, label, score in zip( positions, dimensions, rotations, sample['name'], sample['score']): quaternion = Quaternion(axis=[0, 0, 1], radians=yaw) box = Box(center, dimension, quaternion) # Move box to ego vehicle coord system box.rotate(Quaternion(calib.lidar_calibrated['rotation'])) box.translate(np.array(calib.lidar_calibrated['translation'])) # Move box to global coord system box.rotate(Quaternion(calib.ego_pose['rotation'])) box.translate(np.array(calib.ego_pose['translation'])) if (float(score) < 0): score = 0 if (float(score) > 1): score = 1 if (label == 'Cyclist'): label = 'bicycle' sample_results.append({ "sample_token": sample_idx, "translation": box.center.tolist(), "size": box.wlh.tolist(), "rotation": box.orientation.elements.tolist(), "lidar_yaw": float(yaw), "velocity": (0, 0), "detection_name": label.lower(), "detection_score": float(score), "attribute_name": self.DefaultAttribute[label.lower()], }) nusc_annos[sample_idx] = sample_results for sample_id in self.sample_id_list: if sample_id not in nusc_annos: nusc_annos[sample_id] = [] nusc_submission = { "meta": { "use_camera": False, "use_lidar": True, "use_radar": False, "use_map": False, "use_external": False, }, "results": nusc_annos, } eval_file = os.path.join(kwargs['output_dir'], 'nusc_results.json') with open(eval_file, "w") as f: json.dump(nusc_submission, f, indent=2) # Call NuScenes evaluation cfg = config_factory('detection_cvpr_2019') nusc_eval = DetectionEval(self.nusc, config=cfg, result_path=eval_file, eval_set=self.split, output_dir=kwargs['output_dir'], verbose=True) metric_summary = nusc_eval.main(plot_examples=10, render_curves=True) # Reformat the metrics summary a bit for the tensorboard logger err_name_mapping = { 'trans_err': 'mATE', 'scale_err': 'mASE', 'orient_err': 'mAOE', 'vel_err': 'mAVE', 'attr_err': 'mAAE' } result = {} result['mean_ap'] = metric_summary['mean_ap'] for tp_name, tp_val in metric_summary['tp_errors'].items(): result[tp_name] = tp_val class_aps = metric_summary['mean_dist_aps'] class_tps = metric_summary['label_tp_errors'] for class_name in class_aps.keys(): result['mAP_' + class_name] = class_aps[class_name] for key, val in err_name_mapping.items(): result[val + '_' + class_name] = class_tps[class_name][key] return str(result), result
class KittiConverter: def __init__(self, nusc_kitti_dir: str = '/home/developer/nuscenes/nusc_kitti', cam_name: str = 'CAM_FRONT', lidar_name: str = 'LIDAR_TOP', image_count: int = 10, nusc_version: str = 'v1.0-mini', split: str = 'mini_train'): """ :param nusc_kitti_dir: Where to write the KITTI-style annotations. :param cam_name: Name of the camera to export. Note that only one camera is allowed in KITTI. :param lidar_name: Name of the lidar sensor. :param image_count: Number of images to convert. :param nusc_version: nuScenes version to use. :param split: Dataset split to use. """ self.nusc_kitti_dir = os.path.expanduser(nusc_kitti_dir) self.cam_name = cam_name self.lidar_name = lidar_name self.image_count = image_count self.nusc_version = nusc_version self.split = split # Create nusc_kitti_dir. if not os.path.isdir(self.nusc_kitti_dir): os.makedirs(self.nusc_kitti_dir) # Select subset of the data to look at. self.nusc = NuScenes(version=nusc_version, dataroot='/home/developer/nuscenes') def nuscenes_gt_to_kitti(self) -> None: """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) token_idx = 0 # Start tokens from 0. # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) # Create output folders. label_folder = os.path.join(self.nusc_kitti_dir, self.split, 'label_2') calib_folder = os.path.join(self.nusc_kitti_dir, self.split, 'calib') image_folder = os.path.join(self.nusc_kitti_dir, self.split, 'image_2') lidar_folder = os.path.join(self.nusc_kitti_dir, self.split, 'velodyne') for folder in [label_folder, calib_folder, image_folder, lidar_folder]: if not os.path.isdir(folder): os.makedirs(folder) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) sample_tokens = sample_tokens[:self.image_count] tokens = [] for sample_token in sample_tokens: # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get( 'calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get( 'calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion( cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion( cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4)) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] # token = '%06d' % token_idx # Alternative to use KITTI names. token_idx += 1 # Convert image (jpg to png). src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) dst_im_path = os.path.join(image_folder, sample_token + '.png') if not os.path.exists(dst_im_path): im = Image.open(src_im_path) im.save(dst_im_path, "PNG") # Convert lidar. # Note that we are only using a single sweep, instead of the commonly used n sweeps. src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full) dst_lid_path = os.path.join(lidar_folder, sample_token + '.bin') assert not dst_lid_path.endswith('.pcd.bin') pcl = LidarPointCloud.from_file(src_lid_path) pcl.rotate( kitti_to_nu_lidar_inv.rotation_matrix) # In KITTI lidar frame. with open(dst_lid_path, "w") as lid_file: pcl.points.T.tofile(lid_file) # Add to tokens. tokens.append(sample_token) # Create calibration file. kitti_transforms = dict() kitti_transforms['P0'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P1'] = np.zeros((3, 4)) # Dummy values. kitti_transforms['P2'] = p_left_kitti # Left camera transform. kitti_transforms['P3'] = np.zeros((3, 4)) # Dummy values. kitti_transforms[ 'R0_rect'] = r0_rect.rotation_matrix # Cameras are already rectified. kitti_transforms['Tr_velo_to_cam'] = np.hstack( (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1))) kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti calib_path = os.path.join(calib_folder, sample_token + '.txt') with open(calib_path, "w") as calib_file: for (key, val) in kitti_transforms.items(): val = val.flatten() val_str = '%.12e' % val[0] for v in val[1:]: val_str += ' %.12e' % v calib_file.write('%s: %s\n' % (key, val_str)) # Write label file. label_path = os.path.join(label_folder, sample_token + '.txt') if os.path.exists(label_path): print('Skipping existing file: %s' % label_path) continue else: print('Writing file: %s' % label_path) with open(label_path, "w") as label_file: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: continue # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) # Write to disk. label_file.write(output + '\n') def render_kitti(self, render_2d: bool) -> None: """ Renders the annotations in the KITTI dataset from a lidar and a camera view. :param render_2d: Whether to render 2d boxes (only works for camera data). """ if render_2d: print('Rendering 2d boxes from KITTI format') else: print('Rendering 3d boxes projected from 3d KITTI format') # Load the KITTI dataset. kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split, )) # Create output folder. render_dir = os.path.join(self.nusc_kitti_dir, 'render') if not os.path.isdir(render_dir): os.mkdir(render_dir) # Render each image. for token in kitti.tokens[:self.image_count]: for sensor in ['lidar', 'camera']: out_path = os.path.join(render_dir, '%s_%s.png' % (token, sensor)) print('Rendering file to disk: %s' % out_path) kitti.render_sample_data(token, sensor_modality=sensor, out_path=out_path, render_2d=render_2d) plt.close( ) # Close the windows to avoid a warning of too many open windows. def kitti_res_to_nuscenes(self, meta: Dict[str, bool] = None) -> None: """ Converts a KITTI detection result to the nuScenes detection results format. :param meta: Meta data describing the method used to generate the result. See nuscenes.org/object-detection. """ # Dummy meta data, please adjust accordingly. if meta is None: meta = { 'use_camera': False, 'use_lidar': True, 'use_radar': False, 'use_map': False, 'use_external': False, } # Init. results = {} # Load the KITTI dataset. kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split, )) # Get assignment of scenes to splits. split_logs = create_splits_logs(self.split, self.nusc) # Use only the samples from the current split. sample_tokens = self._split_to_samples(split_logs) sample_tokens = sample_tokens[:self.image_count] for sample_token in sample_tokens: # Get the KITTI boxes we just generated in LIDAR frame. kitti_token = '%s_%s' % (self.split, sample_token) boxes = kitti.get_boxes(token=kitti_token) # Convert KITTI boxes to nuScenes detection challenge result format. sample_results = [ self._box_to_sample_result(sample_token, box) for box in boxes ] # Store all results for this image. results[sample_token] = sample_results # Store submission file to disk. submission = {'meta': meta, 'results': results} submission_path = os.path.join(self.nusc_kitti_dir, 'submission.json') print('Writing submission to: %s' % submission_path) with open(submission_path, 'w') as f: json.dump(submission, f, indent=2) def _box_to_sample_result(self, sample_token: str, box: Box, attribute_name: str = '') -> Dict[str, Any]: # Prepare data translation = box.center size = box.wlh rotation = box.orientation.q velocity = box.velocity detection_name = box.name detection_score = box.score # Create result dict sample_result = dict() sample_result['sample_token'] = sample_token sample_result['translation'] = translation.tolist() sample_result['size'] = size.tolist() sample_result['rotation'] = rotation.tolist() sample_result['velocity'] = velocity.tolist()[:2] # Only need vx, vy. sample_result['detection_name'] = detection_name sample_result['detection_score'] = detection_score sample_result['attribute_name'] = attribute_name return sample_result def _split_to_samples(self, split_logs: List[str]) -> List[str]: """ Convenience function to get the samples in a particular split. :param split_logs: A list of the log names in this split. :return: The list of samples. """ samples = [] for sample in self.nusc.sample: scene = self.nusc.get('scene', sample['scene_token']) log = self.nusc.get('log', scene['log_token']) logfile = log['logfile'] if logfile in split_logs: samples.append(sample['token']) return samples
def create_dataset(dataroot, save_dir, width=672, height=672, grid_range=70., nusc_version='v1.0-mini', use_constant_feature=True, use_intensity_feature=True, end_id=None): os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True) os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True) nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True) ref_chan = 'LIDAR_TOP' if width == height: size = width else: raise Exception( 'Currently only supported if width and height are equal') grid_length = 2. * grid_range / size label_half_length = 0 data_id = 0 grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length) grid_centers \ = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1] for my_scene in nusc.scene: first_sample_token = my_scene['first_sample_token'] token = first_sample_token # try: while (token != ''): print('--- {} '.format(data_id) + token + ' ---') my_sample = nusc.get('sample', token) sd_record = nusc.get('sample_data', my_sample['data'][ref_chan]) sample_rec = nusc.get('sample', sd_record['sample_token']) chan = sd_record['channel'] pc, times = LidarPointCloud.from_file_multisweep(nusc, sample_rec, chan, ref_chan, nsweeps=1) _, boxes, _ = nusc.get_sample_data(sd_record['token'], box_vis_level=0) out_feature = np.zeros((size, size, 8), dtype=np.float32) start = time.time() for box_idx, box in enumerate(boxes): label = 0 if box.name.split('.')[0] == 'vehicle': if box.name.split('.')[1] == 'car': label = 1 elif box.name.split('.')[1] == 'bus': label = 2 elif box.name.split('.')[1] == 'truck': label = 2 elif box.name.split('.')[1] == 'construction': label = 2 elif box.name.split('.')[1] == 'emergency': label = 2 elif box.name.split('.')[1] == 'trailer': label = 2 elif box.name.split('.')[1] == 'bicycle': label = 3 elif box.name.split('.')[1] == 'motorcycle': label = 3 elif box.name.split('.')[0] == 'human': label = 4 # elif box.name.split('.')[0] == 'movable_object': # label = 1 # elif box.name.split('.')[0] == 'static_object': # label = 1 else: continue height_pt = np.linalg.norm(box.corners().T[0] - box.corners().T[3]) box_corners = box.corners().astype(np.float32) corners2d = box_corners[:2, :] # corners2d = box.corners()[:2, :].astype(np.float32) box2d = corners2d.T[[2, 3, 7, 6]] box2d_center = box2d.mean(axis=0) yaw, pitch, roll = box.orientation.yaw_pitch_roll # print('--') # print(box.name) p1_reshape = box_corners out_feature = generate_out_feature( width, height, size, grid_centers, box_corners, box2d, box2d_center, pc.points.astype(np.float32), height_pt, label, label_half_length, yaw, out_feature) # generate_out_feature(width, height, size, grid_centers, pc.points, # box.corners(), height_pt, # label, label_half_length, yaw, # out_feature) # out_feature = out_feature.astype(np.float32) out_end = time.time() # feature_generator = fg.FeatureGenerator( # grid_range, width, height, # use_constant_feature, use_intensity_feature) feature_generator = fgpb.FeatureGenerator(grid_range, width, height) in_feature = feature_generator.generate(pc.points.T, use_constant_feature, use_intensity_feature) in_end = time.time() print('time total {} out {} in {}'.format(in_end - start, out_end - start, in_end - out_end)) if use_constant_feature and use_intensity_feature: channels = 8 elif use_constant_feature or use_intensity_feature: channels = 6 else: channels = 4 in_feature = np.array(in_feature).reshape(channels, size, size).astype(np.float16) in_feature = in_feature.transpose(1, 2, 0) # instance_pt is flipped due to flip # out_feature = np.flip(np.flip(out_feature, axis=0), axis=1) # out_feature[:, :, 1:3] *= -1 np.save(os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)), in_feature) np.save( os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)), out_feature) token = my_sample['next'] data_id += 1 if data_id == end_id: return
class nuScenesDataset(Dataset): def __init__(self, cfg, cam_name: str = 'CAM_FRONT', lidar_name: str = 'LIDAR_TOP', nusc_version: str = 'v1.0-trainval', is_train=True, transforms=None): super(nuScenesDataset, self).__init__() self.split = cfg.DATASETS.TRAIN_SPLIT if is_train else cfg.DATASETS.TEST_SPLIT if self.split == "train_detect": self.image_count = 14059 elif self.split == "val": self.image_count = 6019 self.cam_name = cam_name self.lidar_name = lidar_name self.nusc_version = nusc_version # Select subset of the data to look at. self.nusc = NuScenes(version=nusc_version) self.is_train = is_train self.transforms = transforms self.classes = cfg.DATASETS.DETECT_CLASSES self.flip_prob = cfg.INPUT.FLIP_PROB_TRAIN if is_train else 0 self.aug_prob = cfg.INPUT.SHIFT_SCALE_PROB_TRAIN if is_train else 0 self.shift_scale = cfg.INPUT.SHIFT_SCALE_TRAIN self.num_classes = len(self.classes) self.input_width = cfg.INPUT.WIDTH_TRAIN self.input_height = cfg.INPUT.HEIGHT_TRAIN self.output_width = self.input_width // cfg.MODEL.BACKBONE.DOWN_RATIO self.output_height = self.input_height // cfg.MODEL.BACKBONE.DOWN_RATIO self.max_objs = cfg.DATASETS.MAX_OBJECTS self.logger = logging.getLogger(__name__) self.logger.info( "Initializing nuScenes {} set with {} files loaded".format( self.split, self.image_count)) # Get assignment of scenes to splits. self.split_logs = create_splits_logs(self.split, self.nusc) self.sample_tokens = self._split_to_samples(self.split_logs) def __len__(self): return self.image_count def __getitem__(self, idx): sample_token = self.sample_tokens[idx] sample = self.nusc.get('sample', sample_token) cam_front_token = sample['data'][self.cam_name] sd_record_cam = self.nusc.get('sample_data', cam_front_token) filename_cam_full = sd_record_cam['filename'] src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) img = Image.open(src_im_path) anns, K = self.load_annotations(idx) center = np.array([i / 2 for i in img.size], dtype=np.float32) size = np.array([i for i in img.size], dtype=np.float32) """ resize, horizontal flip, and affine augmentation are performed here. since it is complicated to compute heatmap w.r.t transform. """ flipped = False if (self.is_train) and (random.random() < self.flip_prob): flipped = True img = img.transpose(Image.FLIP_LEFT_RIGHT) center[0] = size[0] - center[0] - 1 K[0, 2] = size[0] - K[0, 2] - 1 affine = False if (self.is_train) and (random.random() < self.aug_prob): affine = True shift, scale = self.shift_scale[0], self.shift_scale[1] shift_ranges = np.arange(-shift, shift + 0.1, 0.1) center[0] += size[0] * random.choice(shift_ranges) center[1] += size[1] * random.choice(shift_ranges) scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1) size *= random.choice(scale_ranges) center_size = [center, size] trans_affine = get_transfrom_matrix( center_size, [self.input_width, self.input_height]) trans_affine_inv = np.linalg.inv(trans_affine) img = img.transform( (self.input_width, self.input_height), method=Image.AFFINE, data=trans_affine_inv.flatten()[:6], resample=Image.BILINEAR, ) trans_mat = get_transfrom_matrix( center_size, [self.output_width, self.output_height]) if not self.is_train: # for inference we parametrize with original size target = ParamsList(image_size=size, is_train=self.is_train) target.add_field("trans_mat", trans_mat) target.add_field("K", K) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, sample_token heat_map = np.zeros( [self.num_classes, self.output_height, self.output_width], dtype=np.float32) regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32) cls_ids = np.zeros([self.max_objs], dtype=np.int32) proj_points = np.zeros([self.max_objs, 2], dtype=np.int32) p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32) dimensions = np.zeros([self.max_objs, 3], dtype=np.float32) locations = np.zeros([self.max_objs, 3], dtype=np.float32) rotys = np.zeros([self.max_objs], dtype=np.float32) reg_mask = np.zeros([self.max_objs], dtype=np.uint8) flip_mask = np.zeros([self.max_objs], dtype=np.uint8) for i, a in enumerate(anns): a = a.copy() cls = a["label"] locs = np.array(a["locations"]) rot_y = np.array(a["rot_y"]) if flipped: locs[0] *= -1 rot_y *= -1 point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs) point = affine_transform(point, trans_mat) box2d[:2] = affine_transform(box2d[:2], trans_mat) box2d[2:] = affine_transform(box2d[2:], trans_mat) box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1) box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1) h, w = box2d[3] - box2d[1], box2d[2] - box2d[0] if (0 < point[0] < self.output_width) and (0 < point[1] < self.output_height): point_int = point.astype(np.int32) p_offset = point - point_int radius = gaussian_radius(h, w) radius = max(0, int(radius)) heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int, radius) cls_ids[i] = cls regression[i] = box3d proj_points[i] = point_int p_offsets[i] = p_offset dimensions[i] = np.array(a["dimensions"]) locations[i] = locs rotys[i] = rot_y reg_mask[i] = 1 if not affine else 0 flip_mask[i] = 1 if not affine and flipped else 0 target = ParamsList(image_size=img.size, is_train=self.is_train) target.add_field("hm", heat_map) target.add_field("reg", regression) target.add_field("cls_ids", cls_ids) target.add_field("proj_p", proj_points) target.add_field("dimensions", dimensions) target.add_field("locations", locations) target.add_field("rotys", rotys) target.add_field("trans_mat", trans_mat) target.add_field("K", K) target.add_field("reg_mask", reg_mask) target.add_field("flip_mask", flip_mask) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, sample_token def load_annotations(self, idx): annotations = [] """ Converts nuScenes GT annotations to KITTI format. """ kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse sample_token = self.sample_tokens[idx] sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion(cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion(cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) # Currently not used. imu_to_velo_kitti = np.zeros((3, 4), dtype=np.float32) # Dummy values. r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4), dtype=np.float32) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. p_left_kitti = p_left_kitti[:3, :3] # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] filename_lid_full = sd_record_lid['filename'] if self.is_train: for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) if detection_name in self.classes: # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Convert quaternion to yaw angle. v = np.dot(box_cam_kitti.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) annotations.append({ "class": detection_name, "label": TYPE_ID_CONVERSION[detection_name], "dimensions": [ float(box_cam_kitti.wlh[2]), float(box_cam_kitti.wlh[0]), float(box_cam_kitti.wlh[1]) ], "locations": [ float(box_cam_kitti.center[0]), float(box_cam_kitti.center[1]), float(box_cam_kitti.center[2]) ], "rot_y": float(yaw) }) return annotations, p_left_kitti def _split_to_samples(self, split_logs: List[str]) -> List[str]: """ Convenience function to get the samples in a particular split. :param split_logs: A list of the log names in this split. :return: The list of samples. """ samples = [] for sample in self.nusc.sample: scene = self.nusc.get('scene', sample['scene_token']) log = self.nusc.get('log', scene['log_token']) logfile = log['logfile'] if logfile in split_logs: samples.append(sample['token']) return samples
os.listdir( '/home/marcel/Documents/Datasets/NuScenes/samples/CAM_FRONT')): if i < 3: asdf.append([ sd for sd in nusc.sample_data if sd['filename'] == os.path.join('samples/CAM_FRONT', file) ][0]) #sd2_im = [sd for sd in nusc.sample_data if sd['filename'] == 'samples/CAM_FRONT/n015-2018-07-18-11-18-34+0800__CAM_FRONT__1531884168362460.jpg'][0] sd1_im = asdf[0] s1 = nusc.get('sample', sd1_im['sample_token']) #s2 = nusc.get('sample', sd2_im['sample_token']) # Camera sd1 = sd1_im #sd2 = sd2_im _, boxes1, intrinsics = nusc.get_sample_data(sd1['token'], box_vis_level=0) #_, boxes2, _ = nusc.get_sample_data(sd2['token'], box_vis_level=0) box1 = boxes1[0] #box2 = boxes2[3] box2d = box1.box_2d(intrinsics) #yaw2 = quaternion_yaw(box2.orientation) / np.pi * 180 #yaw_diff = yaw1 - yaw2 print(box2d) #print(yaw2) #print(yaw_diff) nusc.render_sample_data(sd1['token'], box_vis_level=0) rect = patches.Rectangle((box2d[0], box2d[1]), box2d[2] - box2d[0], box2d[3] - box2d[1], linewidth=1, edgecolor='r',
class NuscenesReader: scene_split_lists = {"train": set(split_train), "val": set(split_val)} sample_id_template = "ns_{}_{:02d}" np_str_to_cat = np.vectorize(nuscenes_category_to_object_class, otypes=[np.int64]) np_str_to_sem = np.vectorize(nuscenes_category_to_semantic_class, otypes=[np.int64]) # Nuscenes LiDAR has x-axis to vehicle right and y-axis to front. # Turn this 90 degrees to have x-axis facing the front turn_matrix = np.linalg.inv( np.asarray([[0, 1, 0, 0], [-1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float64)) turn_quaternion = Quaternion(axis=(0.0, 0.0, 1.0), radians=-np.pi / 2.0) def __init__(self, nuscenes_root, version="v1.0-trainval", max_scenes=None, *, read_radar: bool = True, read_camera: bool = True, read_semantics: bool = True, read_bounding_boxes: bool = True): self.nusc = NuScenes(version=version, dataroot=nuscenes_root, verbose=False) self.root = pathlib.Path(nuscenes_root) # global counter to sanity-check if we calculate the same number of points # within boxes as the dataset authors self.ns_lidar_pts_to_calculated_diff = 0 # flags defining the data entries to return from 'read' self.read_radar = read_radar self.read_camera = read_camera self.read_semantics = read_semantics self.read_bounding_boxes = read_bounding_boxes if self.read_semantics and not hasattr(self.nusc, "lidarseg"): raise RuntimeError("Error: nuScenes-lidarseg not installed!") # assert that the training targets range from 0 - (|mapping| - 1) assert len(set( NUSCENES_SEM_CLASSES.values())) == len(NUSCENES_SEM_CLASSES) assert all(a == b for a, b in zip(sorted(NUSCENES_SEM_CLASSES.values()), range(len(NUSCENES_SEM_CLASSES)))) split_name = { "v1.0-trainval": "nuscenes_default", "v1.0-mini": "nuscenes_mini", }.get(version, "nuscenes_{}".format(version)) # create split dict self.split = { "name": split_name, "data": {k: [] for k in self.scene_split_lists.keys()}, } for i, scene in enumerate(self.nusc.scene): if max_scenes is not None and i > max_scenes: break name = scene["name"] for k, v in self.scene_split_lists.items(): if name in v: split_list = self.split["data"][k] split_list.extend([ self.sample_id_template.format(name, i) for i in range(0, scene["nbr_samples"]) ]) break else: raise RuntimeError( "Found scene that is not in a split: {}".format(name)) def _make_path(self, filename: str): return str(self.root / filename) def __iter__(self): self._scene_iter = self.nusc.scene.__iter__() self._next_scene() return self def __next__(self) -> typing.Tuple[dict, int]: try: return self._sample_iter.__next__() except StopIteration: self._next_scene() return self._sample_iter.__next__() def make_sample_id(self, sample: typing.Tuple[dict, int]) -> str: scene = self.nusc.get("scene", sample[0]["scene_token"]) return self.sample_id_template.format(scene["name"], sample[1]) def _next_scene(self): self._current_scene = self._scene_iter.__next__() class SampleIteration: """Iterate over nuscenes samples of a scene. Add an additional sample index. """ def __init__(self, scene, nusc): self.nusc = nusc self._pos = scene["first_sample_token"] self._index = itertools.count().__iter__() def __next__(self): if not self._pos: raise StopIteration() sample = self.nusc.get("sample", self._pos) self._pos = sample["next"] return sample, self._index.__next__() self._sample_iter = SampleIteration(self._current_scene, self.nusc) def read(self, sample: typing.Tuple[dict, int], _sample_id: str): d = self._read_sample(sample) def convert(entry): """Convert data types to be compatible with tfrecords features.""" if isinstance(entry, str): return entry.encode("utf-8") if isinstance(entry, np.ndarray): entry = entry.flatten() if not np.issubdtype(entry.dtype, np.number): entry = tf.io.serialize_tensor(tf.convert_to_tensor(entry)) elif entry.dtype == np.float64: entry = entry.astype(np.float32) return entry return entry return {k: convert(v) for k, v in d.items()} def _read_sample(self, sample: typing.Tuple[dict, int]): radars = ["RADAR_FRONT", "RADAR_FRONT_LEFT", "RADAR_FRONT_RIGHT"] cameras = ["CAM_FRONT"] sample_id = self.make_sample_id(sample) sample, sample_index = sample sample_data_token: str = sample["data"]["LIDAR_TOP"] lidar_sample_data = self.nusc.get("sample_data", sample_data_token) assert lidar_sample_data["is_key_frame"] data_path, box_list, cam_intrinsic = self.nusc.get_sample_data( sample_data_token) # POINT CLOUD [(x y z I) x P]. Intensity from 0.0 to 255.0 point_cloud_orig = LidarPointCloud.from_file(data_path) if point_cloud_orig.points.dtype != np.float32: raise RuntimeError("Point cloud has wrong data type.") # -> [P x (x y z I)] point_cloud_orig = point_cloud_orig.points.transpose() camera_data = {} radar_data = {} box_data = {} lidarseg_data = {} if self.read_semantics: lidarseg_data = self._read_lidarseg_data(sample_data_token, point_cloud_orig) if self.read_bounding_boxes: box_data = self._read_bounding_boxes(box_list, point_cloud_orig, sample) # rotate point cloud 90 degrees around z-axis, # so that x-axis faces front of vehicle x = point_cloud_orig[:, 0:1] y = point_cloud_orig[:, 1:2] point_cloud = np.concatenate((y, -x, point_cloud_orig[:, 2:4]), axis=-1) # LiDAR extrinsic calibration calibration_lidar = self.nusc.get( "calibrated_sensor", lidar_sample_data["calibrated_sensor_token"]) tf_lidar = transform_matrix(calibration_lidar["translation"], Quaternion(calibration_lidar["rotation"])) # vehicle -> point cloud coords (turned by 90 degrees) tf_vehicle_pc = np.linalg.inv( np.dot(tf_lidar, NuscenesReader.turn_matrix)) if self.read_camera: # CAMERAS camera_data: [{ str: typing.Any }] = [ self._read_sensor_data_and_extrinsics(sample, cam_name, tf_vehicle_pc, self._read_camera_data) for cam_name in cameras ] camera_data = {k: v for d in camera_data for k, v in d.items()} if self.read_radar: # RADARS radar_data: [{ str: typing.Any }] = [ self._read_sensor_data_and_extrinsics(sample, radar_name, tf_vehicle_pc, self._read_radar_data) for radar_name in radars ] radar_data = {k: v for d in radar_data for k, v in d.items()} assert box_data.keys().isdisjoint(camera_data.keys()) assert box_data.keys().isdisjoint(radar_data.keys()) assert box_data.keys().isdisjoint(lidarseg_data.keys()) # return feature array. Add sample ID. d = { "sample_id": sample_id, "point_cloud": point_cloud, **box_data, **camera_data, **radar_data, **lidarseg_data, } return d def _read_lidarseg_data(self, sample_data_token: str, point_cloud_orig: np.ndarray): # self.nusc.lidarseg_idx2name_mapping lidarseg_labels_filename = ( pathlib.Path(self.nusc.dataroot) / self.nusc.get("lidarseg", sample_data_token)["filename"]) # Load labels from .bin file. points_label = np.fromfile(str(lidarseg_labels_filename), dtype=np.uint8) # [num_points] if points_label.shape[0] != point_cloud_orig.shape[0]: raise ValueError("Semantic labels do not match point cloud.") return {"semantic_labels": tf.io.serialize_tensor(points_label)} def _read_bounding_boxes(self, box_list, point_cloud_orig, sample): # create transform matrices for all boxes dt = np.float32 if box_list: r = np.asarray([x.rotation_matrix for x in box_list]) c = np.asarray([x.center for x in box_list]) center_pos = np.asarray([x.center for x in box_list], dtype=dt) # NuScenes format: width, length, height. Reorder to l w h box_dims_lwh = np.asarray([x.wlh for x in box_list], dtype=np.float64)[:, [1, 0, 2]] box_rot = np.asarray([(self.turn_quaternion * x.orientation).q for x in box_list], dtype=dt) else: r = np.zeros(shape=[0, 3, 3], dtype=np.float64) c = np.zeros(shape=[0, 3], dtype=np.float64) center_pos = np.zeros(shape=[0, 3], dtype=np.float64) box_dims_lwh = np.zeros(shape=[0, 3], dtype=np.float64) box_rot = np.zeros(shape=[0, 4], dtype=np.float64) rc = np.concatenate((r, c[:, :, None]), axis=-1) tfs = np.concatenate( ( rc, np.broadcast_to( tf.constant([0, 0, 0, 1], dtype=rc.dtype)[None, None, :], [rc.shape[0], 1, 4], ), ), axis=1, ) total_points_per_box, mapping = assign_point_cloud_to_bounding_boxes( point_cloud_orig, bounding_boxes_tfs=tfs, bounding_boxes_dims=box_dims_lwh, ) # 3D BOXES IN LIDAR COORDS [(x y z) (w x y z) (l w h)] bboxes_spatial = np.empty(shape=(len(box_list), 10), dtype=dt) bboxes_spatial[:, 0] = center_pos[:, 1] bboxes_spatial[:, 1] = -center_pos[:, 0] bboxes_spatial[:, 2] = center_pos[:, 2] bboxes_spatial[:, 3:7] = box_rot bboxes_spatial[:, 7:10] = box_dims_lwh.astype(dt) object_str = np.array([x.name for x in box_list], dtype=np.unicode) object_category = NuscenesReader.np_str_to_cat(object_str).astype( np.int64) class_value = NuscenesReader.np_str_to_sem(object_str).astype(np.int64) lidar_pts = np.asarray([ x["num_lidar_pts"] for x in [ self.nusc.get("sample_annotation", ann_token) for ann_token in sample["anns"] ] ]) # rotate box transforms (same as bboxes_spatial) tfs = np.matmul(np.linalg.inv(self.turn_matrix)[None, ...], tfs) # track to see if box calculation gives the same results as the # lidar points counter from the nuscenes dataset diff = np.sum(np.abs(lidar_pts - total_points_per_box)) self.ns_lidar_pts_to_calculated_diff += diff return { "bounding_boxes_3d_spatial": bboxes_spatial, "bounding_boxes_3d_transforms": tfs, "bounding_boxes_class": class_value, "bounding_boxes_category": object_category, "bounding_boxes_class_str": object_str, "bounding_boxes_point_counter": total_points_per_box.astype(np.int64), "bounding_boxes_point_mapping": mapping, } @staticmethod def _read_sensor_data_and_extrinsics(sample, sensor_name: str, coord_transform: np.ndarray, reader_func): data, extrinsics = reader_func(sample["data"][sensor_name]) extrinsics = np.matmul(coord_transform, extrinsics) return { **{k.format(sensor_name.lower()): v for k, v in data.items()}, "{}_extrinsics".format(sensor_name.lower()): extrinsics, } def _read_camera_data(self, sample_data_token): sd_record = self.nusc.get("sample_data", sample_data_token) cs_record = self.nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = self.nusc.get("sensor", cs_record["sensor_token"]) assert sensor_record["modality"] == "camera" # currently using only keyframes assert sd_record["is_key_frame"] data_path = self.nusc.get_sample_data_path(sample_data_token) imsize = (sd_record["width"], sd_record["height"]) cam_intrinsic = np.array(cs_record["camera_intrinsic"]) cam_extrinsics = transform_matrix(cs_record["translation"], Quaternion(cs_record["rotation"])) with open(data_path, "rb") as in_file: img_bytes_jpg = in_file.read() return ( { "{}_jpg": img_bytes_jpg, "{}_size": np.asarray(imsize, dtype=np.int64), "{}_intrinsics": cam_intrinsic.astype(np.float32), }, cam_extrinsics, ) def _read_radar_data(self, sample_data_token): sd_record = self.nusc.get("sample_data", sample_data_token) cs_record = self.nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) sensor_record = self.nusc.get("sensor", cs_record["sensor_token"]) assert sensor_record["modality"] == "radar" # currently using only keyframes assert sd_record["is_key_frame"] data_path = self.nusc.get_sample_data_path(sample_data_token) radar_point_cloud = RadarPointCloud.from_file(data_path) points = tf.convert_to_tensor(radar_point_cloud.points.transpose()) radar_extrinsics = transform_matrix(cs_record["translation"], Quaternion(cs_record["rotation"])) return {"{}_points": tf.io.serialize_tensor(points)}, radar_extrinsics
def main(): SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"] if not os.path.exists(DATA_PATH): os.mkdir(DATA_PATH) if not os.path.exists(OUT_PATH): os.mkdir(OUT_PATH) for split in SPLITS: data_path = DATA_PATH # + '{}/'.format(SPLITS[split]) nusc = NuScenes(version=SPLITS[split], dataroot=data_path, verbose=True) out_path = OUT_PATH + "{}.json".format(split) categories_info = [{ "name": CATS[i], "id": i + 1 } for i in range(len(CATS))] ret = { "images": [], "annotations": [], "categories": categories_info, "videos": [], "attributes": ATTRIBUTE_TO_ID, } num_images = 0 num_anns = 0 num_videos = 0 # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR. for sample in nusc.sample: scene_name = nusc.get("scene", sample["scene_token"])["name"] if not (split in ["mini", "test" ]) and not (scene_name in SCENE_SPLITS[split]): continue if sample["prev"] == "": print("scene_name", scene_name) num_videos += 1 ret["videos"].append({ "id": num_videos, "file_name": scene_name }) frame_ids = {k: 0 for k in sample["data"]} track_ids = {} # We decompose a sample into 6 images in our case. for sensor_name in sample["data"]: if sensor_name in USED_SENSOR: image_token = sample["data"][sensor_name] image_data = nusc.get("sample_data", image_token) num_images += 1 # Complex coordinate transform. This will take time to understand. sd_record = nusc.get("sample_data", image_token) cs_record = nusc.get("calibrated_sensor", sd_record["calibrated_sensor_token"]) pose_record = nusc.get("ego_pose", sd_record["ego_pose_token"]) global_from_car = transform_matrix( pose_record["translation"], Quaternion(pose_record["rotation"]), inverse=False, ) car_from_sensor = transform_matrix( cs_record["translation"], Quaternion(cs_record["rotation"]), inverse=False, ) trans_matrix = np.dot(global_from_car, car_from_sensor) _, boxes, camera_intrinsic = nusc.get_sample_data( image_token, box_vis_level=BoxVisibility.ANY) calib = np.eye(4, dtype=np.float32) calib[:3, :3] = camera_intrinsic calib = calib[:3] frame_ids[sensor_name] += 1 # image information in COCO format image_info = { "id": num_images, "file_name": image_data["filename"], "calib": calib.tolist(), "video_id": num_videos, "frame_id": frame_ids[sensor_name], "sensor_id": SENSOR_ID[sensor_name], "sample_token": sample["token"], "trans_matrix": trans_matrix.tolist(), "width": sd_record["width"], "height": sd_record["height"], "pose_record_trans": pose_record["translation"], "pose_record_rot": pose_record["rotation"], "cs_record_trans": cs_record["translation"], "cs_record_rot": cs_record["rotation"], } ret["images"].append(image_info) anns = [] for box in boxes: det_name = category_to_detection_name(box.name) if det_name is None: continue num_anns += 1 v = np.dot(box.rotation_matrix, np.array([1, 0, 0])) yaw = -np.arctan2(v[2], v[0]) box.translate(np.array([0, box.wlh[2] / 2, 0])) category_id = CAT_IDS[det_name] amodel_center = project_to_image( np.array( [ box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2], ], np.float32, ).reshape(1, 3), calib, )[0].tolist() sample_ann = nusc.get("sample_annotation", box.token) instance_token = sample_ann["instance_token"] if not (instance_token in track_ids): track_ids[instance_token] = len(track_ids) + 1 attribute_tokens = sample_ann["attribute_tokens"] attributes = [ nusc.get("attribute", att_token)["name"] for att_token in attribute_tokens ] att = "" if len(attributes) == 0 else attributes[0] if len(attributes) > 1: print(attributes) import pdb pdb.set_trace() track_id = track_ids[instance_token] vel = nusc.box_velocity(box.token) # global frame vel = np.dot( np.linalg.inv(trans_matrix), np.array([vel[0], vel[1], vel[2], 0], np.float32), ).tolist() # instance information in COCO format ann = { "id": num_anns, "image_id": num_images, "category_id": category_id, "dim": [box.wlh[2], box.wlh[0], box.wlh[1]], "location": [box.center[0], box.center[1], box.center[2]], "depth": box.center[2], "occluded": 0, "truncated": 0, "rotation_y": yaw, "amodel_center": amodel_center, "iscrowd": 0, "track_id": track_id, "attributes": ATTRIBUTE_TO_ID[att], "velocity": vel, } bbox = KittiDB.project_kitti_box_to_image( copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900)) alpha = _rot_y2alpha( yaw, (bbox[0] + bbox[2]) / 2, camera_intrinsic[0, 2], camera_intrinsic[0, 0], ) ann["bbox"] = [ bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1], ] ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) ann["alpha"] = alpha anns.append(ann) # Filter out bounding boxes outside the image visable_anns = [] for i in range(len(anns)): vis = True for j in range(len(anns)): if anns[i]["depth"] - min(anns[i][ "dim"]) / 2 > anns[j]["depth"] + max( anns[j]["dim"]) / 2 and _bbox_inside( anns[i]["bbox"], anns[j]["bbox"]): vis = False break if vis: visable_anns.append(anns[i]) else: pass for ann in visable_anns: ret["annotations"].append(ann) if DEBUG: img_path = data_path + image_info["file_name"] img = cv2.imread(img_path) img_3d = img.copy() for ann in visable_anns: bbox = ann["bbox"] cv2.rectangle( img, (int(bbox[0]), int(bbox[1])), (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), (0, 0, 255), 3, lineType=cv2.LINE_AA, ) box_3d = compute_box_3d(ann["dim"], ann["location"], ann["rotation_y"]) box_2d = project_to_image(box_3d, calib) img_3d = draw_box_3d(img_3d, box_2d) pt_3d = unproject_2d_to_3d(ann["amodel_center"], ann["depth"], calib) pt_3d[1] += ann["dim"][0] / 2 print("location", ann["location"]) print("loc model", pt_3d) pt_2d = np.array( [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32, ) pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"], calib) pt_3d[1] += ann["dim"][0] / 2 print("loc ", pt_3d) cv2.imshow("img", img) cv2.imshow("img_3d", img_3d) cv2.waitKey() nusc.render_sample_data(image_token) plt.show() print("reordering images") images = ret["images"] video_sensor_to_images = {} for image_info in images: tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"] if tmp_seq_id in video_sensor_to_images: video_sensor_to_images[tmp_seq_id].append(image_info) else: video_sensor_to_images[tmp_seq_id] = [image_info] ret["images"] = [] for tmp_seq_id in sorted(video_sensor_to_images): ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id] print("{} {} images {} boxes".format(split, len(ret["images"]), len(ret["annotations"]))) print("out_path", out_path) json.dump(ret, open(out_path, "w"))
) current_time = first_sample_rec['timestamp'] while current_time < last_sample_rec['timestamp']: current_time += time_step for channel, sd_rec in current_recs.items(): while sd_rec['timestamp'] < current_time and sd_rec[ 'next'] != '': sd_rec = nusc.get('sample_data', sd_rec['next']) current_recs[channel] = sd_rec count_frames = count_frames + 1 cam_token = sd_rec['token'] cam_record = nusc.get('sample_data', cam_token) cam_path = nusc.get_sample_data_path(cam_token) cam_path, boxes, camera_intrinsic = nusc.get_sample_data( sd_rec['token'], ) im = Image.open(cam_path) im.save( data_directory + spl + str(scene_id) + camera_channel + '/img1/' + str(count_frames) + '.jpg', 'JPEG', ) TL_found = False bb = [] patch_radius = 700 sample_record = nusc.get('sample', sample_token) log_record = nusc.get('log', scene_record['log_token']) log_location = log_record['location'] nusc_map = NuScenesMap( dataroot=opt.nuscenes_root, map_name=log_location,
for present_sample in nusc.sample: # converting image data from 6 cameras (in the sensor list) for idx, present_sensor in enumerate(sensor_list): img_output_root = img_i_output_root.format(idx) label_output_root = label_i_output_root.format(idx) print('SENSOR: {}, img out: {}, label out: {}, idx: {}'.format(present_sensor, img_output_root, label_output_root, idx)) os.makedirs(img_output_root, exist_ok=True) os.makedirs(label_output_root, exist_ok=True) # each sensor_data corresponds to one specific image in the dataset sensor_data = nusc.get('sample_data', present_sample['data'][present_sensor]) data_path, box_list, cam_intrinsic = nusc.get_sample_data(present_sample['data'][present_sensor], BoxVisibility.ALL) img_file = os.path.join(data_root, sensor_data['filename']) seqname = str(frame_counter).zfill(6) output_label_file = os.path.join(label_output_root, seqname + '.txt') with open(output_label_file, 'a') as output_f: for box in box_list: # obtaining visibility level of each 3D box present_visibility_token = nusc.get('sample_annotation', box.token)['visibility_token'] if present_visibility_token > min_visibility_level: if not (category_reflection[box.name] == 'DontCare' and delete_dontcare_objects): w, l, h = box.wlh x, y, z = box.center yaw, pitch, roll = box.orientation.yaw_pitch_roll; yaw = -yaw alpha = yaw - np.arctan2(x, z)
class NuScenesDataset(Dataset): def __init__(self, cfg, root, is_train=True, transforms=None): super(NuScenesDataset, self).__init__() self.root = root self.image_dir = os.path.join(root, "image_2") self.label_dir = os.path.join(root, "label_2") self.calib_dir = os.path.join(root, "calib") self.split = cfg.DATASETS.TRAIN_SPLIT if is_train else cfg.DATASETS.TEST_SPLIT self.is_train = is_train self.transforms = transforms if self.split == "train": imageset_txt = os.path.join(root, "ImageSets", "train.txt") elif self.split == "val": imageset_txt = os.path.join(root, "ImageSets", "val.txt") elif self.split == "trainval": imageset_txt = os.path.join(root, "ImageSets", "trainval.txt") elif self.split == "test": imageset_txt = os.path.join(root, "ImageSets", "test.txt") else: raise ValueError("Invalid split!") image_files = [] for line in open(imageset_txt, "r"): base_name = line.replace("\n", "") image_name = base_name + ".png" image_files.append(image_name) self.image_files = image_files self.label_files = [ i.replace(".png", ".txt") for i in self.image_files ] self.num_samples = len(self.image_files) self.classes = cfg.DATASETS.DETECT_CLASSES self.flip_prob = cfg.INPUT.FLIP_PROB_TRAIN if is_train else 0 self.aug_prob = cfg.INPUT.SHIFT_SCALE_PROB_TRAIN if is_train else 0 self.shift_scale = cfg.INPUT.SHIFT_SCALE_TRAIN self.num_classes = len(self.classes) self.input_width = cfg.INPUT.WIDTH_TRAIN self.input_height = cfg.INPUT.HEIGHT_TRAIN self.output_width = self.input_width // cfg.MODEL.BACKBONE.DOWN_RATIO self.output_height = self.input_height // cfg.MODEL.BACKBONE.DOWN_RATIO self.max_objs = cfg.DATASETS.MAX_OBJECTS self.logger = logging.getLogger(__name__) self.logger.info( "Initializing KITTI {} set with {} files loaded".format( self.split, self.num_samples)) # NuScenes specific stuff here # KITTI unwanted lines not yet deleted above # Select subset of the data to look at. nusc_version = 'v1.0' self.nusc = NuScenes(version=nusc_version) # Get assignment of scenes to splits. self.split_logs = create_split_logs(self.split, self.nusc) # Use only the samples from the current split. self.sample_tokens = self._split_to_samples(self.split_logs) self.image_count = len(self.sample_tokens) self.image_count = 100 self.sample_tokens = self.sample_tokens[:self.image_count] def __len__(self): return self.image_count def _split_to_samples(self, split_logs: List[str]) -> List[str]: """ Convenience function to get the samples in a particular split. :param split_logs: A list of the log names in this split. :return: The list of samples. """ samples = [] for sample in self.nusc.sample: scene = self.nusc.get('scene', sample['scene_token']) log = self.nusc.get('log', scene['log_token']) logfile = log['logfile'] if logfile in split_logs: samples.append(sample['token']) return samples def __getitem__(self, idx): # set defaults here kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2) kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse imsize = (1600, 900) # sample_token based on index sample_token = self.sample_tokens[idx] # Get sample data. sample = self.nusc.get('sample', sample_token) sample_annotation_tokens = sample['anns'] cam_front_token = sample['data'][self.cam_name] lidar_token = sample['data'][self.lidar_name] # Retrieve sensor records. sd_record_cam = self.nusc.get('sample_data', cam_front_token) sd_record_lid = self.nusc.get('sample_data', lidar_token) cs_record_cam = self.nusc.get('calibrated_sensor', sd_record_cam['calibrated_sensor_token']) cs_record_lid = self.nusc.get('calibrated_sensor', sd_record_lid['calibrated_sensor_token']) # Combine transformations and convert to KITTI format. # Note: cam uses same conventions in KITTI and nuScenes. lid_to_ego = transform_matrix(cs_record_lid['translation'], Quaternion(cs_record_lid['rotation']), inverse=False) ego_to_cam = transform_matrix(cs_record_cam['translation'], Quaternion(cs_record_cam['rotation']), inverse=True) velo_to_cam = np.dot(ego_to_cam, lid_to_ego) # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam. velo_to_cam_kitti = np.dot(velo_to_cam, kitti_to_nu_lidar.transformation_matrix) r0_rect = Quaternion(axis=[1, 0, 0], angle=0) # Dummy values. # Projection matrix. p_left_kitti = np.zeros((3, 4)) p_left_kitti[:3, :3] = cs_record_cam[ 'camera_intrinsic'] # Cameras are always rectified. # Create KITTI style transforms. velo_to_cam_rot = velo_to_cam_kitti[:3, :3] velo_to_cam_trans = velo_to_cam_kitti[:3, 3] # Check that the rotation has the same format as in KITTI. assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])).all() assert (velo_to_cam_trans[1:3] < 0).all() # Retrieve the token from the lidar. # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar, # not the camera. filename_cam_full = sd_record_cam['filename'] # set the img variable to its data here src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full) img = Image.open(src_im_path) # Create calibration matrix. K = p_left_kitti K = [float(i) for i in K] K = np.array(K, dtype=np.float32).reshape(3, 4) K = K[:3, :3] # populate the list of object annotations for this sample anns = [] for sample_annotation_token in sample_annotation_tokens: sample_annotation = self.nusc.get('sample_annotation', sample_annotation_token) # Get box in LIDAR frame. _, box_lidar_nusc, _ = self.nusc.get_sample_data( lidar_token, box_vis_level=BoxVisibility.NONE, selected_anntokens=[sample_annotation_token]) box_lidar_nusc = box_lidar_nusc[0] # Truncated: Set all objects to 0 which means untruncated. truncated = 0.0 # Occluded: Set all objects to full visibility as this information is not available in nuScenes. occluded = 0 # Convert nuScenes category to nuScenes detection challenge category. detection_name = category_to_detection_name( sample_annotation['category_name']) # Skip categories that are not part of the nuScenes detection challenge. if detection_name is None: continue # Convert from nuScenes to KITTI box format. box_cam_kitti = KittiDB.box_nuscenes_to_kitti( box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot), velo_to_cam_trans, r0_rect) # Project 3d box to 2d box in image, ignore box if it does not fall inside. bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti, p_left_kitti, imsize=imsize) if bbox_2d is None: continue # Set dummy score so we can use this file as result. box_cam_kitti.score = 0 # Convert box to output string format. output = KittiDB.box_to_string(name=detection_name, box=box_cam_kitti, bbox_2d=bbox_2d, truncation=truncated, occlusion=occluded) fieldnames = [ 'type', 'truncated', 'occluded', 'alpha', 'xmin', 'ymin', 'xmax', 'ymax', 'dh', 'dw', 'dl', 'lx', 'ly', 'lz', 'ry' ] if self.is_train: f = StringIO(output) reader = csv.DictReader(f, delimiter=' ', fieldnames=fieldnames) for line, row in enumerate(reader): if row["type"] in self.classes: anns.append({ "class": row["type"], "label": TYPE_ID_CONVERSION[row["type"]], "truncation": float(row["truncated"]), "occlusion": float(row["occluded"]), "alpha": float(row["alpha"]), "dimensions": [ float(row['dl']), float(row['dh']), float(row['dw']) ], "locations": [ float(row['lx']), float(row['ly']), float(row['lz']) ], "rot_y": float(row["ry"]) }) center = np.array([i / 2 for i in img.size], dtype=np.float32) size = np.array([i for i in img.size], dtype=np.float32) """ resize, horizontal flip, and affine augmentation are performed here. since it is complicated to compute heatmap w.r.t transform. """ flipped = False if (self.is_train) and (random.random() < self.flip_prob): flipped = True img = img.transpose(Image.FLIP_LEFT_RIGHT) center[0] = size[0] - center[0] - 1 K[0, 2] = size[0] - K[0, 2] - 1 affine = False if (self.is_train) and (random.random() < self.aug_prob): affine = True shift, scale = self.shift_scale[0], self.shift_scale[1] shift_ranges = np.arange(-shift, shift + 0.1, 0.1) center[0] += size[0] * random.choice(shift_ranges) center[1] += size[1] * random.choice(shift_ranges) scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1) size *= random.choice(scale_ranges) center_size = [center, size] trans_affine = get_transfrom_matrix( center_size, [self.input_width, self.input_height]) trans_affine_inv = np.linalg.inv(trans_affine) img = img.transform( (self.input_width, self.input_height), method=Image.AFFINE, data=trans_affine_inv.flatten()[:6], resample=Image.BILINEAR, ) trans_mat = get_transfrom_matrix( center_size, [self.output_width, self.output_height]) if not self.is_train: # for inference we parametrize with original size target = ParamsList(image_size=size, is_train=self.is_train) target.add_field("trans_mat", trans_mat) target.add_field("K", K) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx heat_map = np.zeros( [self.num_classes, self.output_height, self.output_width], dtype=np.float32) regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32) cls_ids = np.zeros([self.max_objs], dtype=np.int32) proj_points = np.zeros([self.max_objs, 2], dtype=np.int32) p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32) dimensions = np.zeros([self.max_objs, 3], dtype=np.float32) locations = np.zeros([self.max_objs, 3], dtype=np.float32) rotys = np.zeros([self.max_objs], dtype=np.float32) reg_mask = np.zeros([self.max_objs], dtype=np.uint8) flip_mask = np.zeros([self.max_objs], dtype=np.uint8) for i, a in enumerate(anns): a = a.copy() cls = a["label"] locs = np.array(a["locations"]) rot_y = np.array(a["rot_y"]) if flipped: locs[0] *= -1 rot_y *= -1 point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs) point = affine_transform(point, trans_mat) box2d[:2] = affine_transform(box2d[:2], trans_mat) box2d[2:] = affine_transform(box2d[2:], trans_mat) box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1) box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1) h, w = box2d[3] - box2d[1], box2d[2] - box2d[0] if (0 < point[0] < self.output_width) and (0 < point[1] < self.output_height): point_int = point.astype(np.int32) p_offset = point - point_int radius = gaussian_radius(h, w) radius = max(0, int(radius)) heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int, radius) cls_ids[i] = cls regression[i] = box3d proj_points[i] = point_int p_offsets[i] = p_offset dimensions[i] = np.array(a["dimensions"]) locations[i] = locs rotys[i] = rot_y reg_mask[i] = 1 if not affine else 0 flip_mask[i] = 1 if not affine and flipped else 0 target = ParamsList(image_size=img.size, is_train=self.is_train) target.add_field("hm", heat_map) target.add_field("reg", regression) target.add_field("cls_ids", cls_ids) target.add_field("proj_p", proj_points) target.add_field("dimensions", dimensions) target.add_field("locations", locations) target.add_field("rotys", rotys) target.add_field("trans_mat", trans_mat) target.add_field("K", K) target.add_field("reg_mask", reg_mask) target.add_field("flip_mask", flip_mask) if self.transforms is not None: img, target = self.transforms(img, target) return img, target, idx
nbr_samples = nusc.scene[scene_index]['nbr_samples'] first_sample_token = my_scene['first_sample_token'] curr_sample = nusc.get('sample', first_sample_token) prog = 0 for _ in range(nbr_samples): sample_tokens[prog] = curr_sample['token'] if curr_sample['next']: next_token = curr_sample['next'] curr_sample = nusc.get('sample', next_token) prog += 1 my_sample_token = sample_tokens[sample_index] my_sample = nusc.get('sample', my_sample_token) sensor_channel = "RADAR_FRONT" my_sample_data = nusc.get('sample_data', my_sample['data'][sensor_channel]) radar_data = get_sensor_sample_data(nusc, my_sample, sensor_channel) my_annotation_token = my_sample['anns'][18] #note that the annotation here is in globar coordinate symstem my_annotation_metadata = nusc.get('sample_annotation', my_annotation_token) #pay attention to the class Box in nuscenes-devkit #in get_sample_data will transform the coordinate from global to sensor frame _, boxes, _ = nusc.get_sample_data(my_sample_data['token']) box_list = [] for box in boxes: mask = points_in_box(box, radar_data[0:3, :]) #caculate the number of points contains in a bounding box num_of_pts = np.count_nonzero(mask) if num_of_pts != 0: box_list.append((box.name, num_of_pts)) print(box_list)
'static_object.bicycle_rack': 'ignore', } for sample in nusc.sample: cam_token = sample['data']['CAM_FRONT'] pc_token = sample['data']['LIDAR_TOP'] pointsensor = nusc.get('sample_data', pc_token) # pcl_path = osp.join(nusc.dataroot, pointsensor['filename']) cam = nusc.get('sample_data', cam_token) # print(cam) # im = Image.open(osp.join(nusc.dataroot, cam['filename'])) # print(im.width, im.height) data_path, box_list, cam_intrinsic = nusc.get_sample_data(pc_token) # print(data_path) # print(pcl_path) # fname = pcl_path.split('/')[-1].split('.')[0] + '.txt' # print(fname) # if sample['scene_token'] in train_scenes if sample['scene_token'] in val_scenes: fout = open( osp.join(os.getcwd(), 'validation', 'label_2', pc_token + '.txt'), 'w') # pc = LidarPointCloud.from_file(pcl_path)
class NuScenesPrepare(): NameMapping = { 'movable_object.barrier': 'barrier', 'movable_object.trafficcone': 'barrier', 'vehicle.bicycle': 'cyclist', 'vehicle.motorcycle': 'cyclist', 'vehicle.bus.bendy': 'vehicle', 'vehicle.bus.rigid': 'vehicle', 'vehicle.car': 'vehicle', 'vehicle.trailer': 'vehicle', 'vehicle.truck': 'vehicle', 'vehicle.construction': 'vehicle', 'human.pedestrian.adult': 'pedestrian', 'human.pedestrian.child': 'pedestrian', 'human.pedestrian.construction_worker': 'pedestrian', 'human.pedestrian.police_officer': 'pedestrian', } labelmapping=LabelEncoder() labelmapping.fit(['barrier','cyclist','pedestrian','vehicle']) DefaultAttribute = { "car": "vehicle.parked", "pedestrian": "pedestrian.moving", "trailer": "vehicle.parked", "truck": "vehicle.parked", "bus": "vehicle.parked", "motorcycle": "cycle.without_rider", "construction_vehicle": "vehicle.parked", "bicycle": "cycle.without_rider", "barrier": "", "traffic_cone": "", } def __init__(self,args): self.args_dp=args['DataPrepare'] self.args_vg=args['VoxelGenerator'] self.cache_path=self.args_dp.data_root+'/'+self.args_dp.cache_name if os.path.exists(self.cache_path): self._Data_frags=pickle.load(open(self.cache_path, 'rb')) else: self.nusc = NuScenes(version=self.args_dp.version, dataroot=self.args_dp.data_root, verbose=self.args_dp.verbose) self._Data_frags=self.getFragAnnotations() pickle.dump(self._Data_frags,open(self.cache_path, 'wb')) if True: self._Data_frags=[item for scene_data in self._Data_frags for item in scene_data] def __len__(self): return len(self._Data_frags) def __getitem__(self, idx): c_frag = self._Data_frags[idx] res = { "lidar": { "type": "lidar", "points": None, }, "gt_boxes": { "obs": None, "pred": None, }, "metadata": { "token": c_frag['Data_frags'][self.args_dp.obs_length-1] # token of last observed frame }, } sweep_voxels,sweep_coords,sweep_num_voxels,sweep_num_points=[],[],[],[] bev_imgs, cam_imgs = [], [] gt_boxes=[] #ts = c_frag['Data_frags'][self.args.obs_length-1]["timestamp"] / 1e6 #get BEV sweeps for fi,c_frame in enumerate(c_frag['Data_frags']): if fi < self.args_dp.obs_length: # get Annotations gt_boxes.append(c_frame['boxes']) # load lidar points lidar_path = c_frame['lidar_path'] points = np.fromfile( str(Path(self.args_dp.data_root)/lidar_path), dtype=np.float32, count=-1).reshape([-1, 5]) points[:, 3] /= 255 points[:, :3] = points[:, :3] @ c_frame["R_cl2rl"].T points[:, :3] += c_frame["T_cl2rl"] # generate voxel bev voxel_res=self.getVoxel(points) if 'bev_img' in self.args_vg.bev_data: bev_img_size = [np.int(np.ceil((self.args_vg.point_cloud_range[3 + i] - self.args_vg.point_cloud_range[i]) / self.args_vg.voxel_size[i])) for i in range(3)] bev_img = np.zeros(bev_img_size) bev_img[voxel_res['coordinates'][:, 2], voxel_res['coordinates'][:, 1], voxel_res['coordinates'][:, 0]]\ =voxel_res["num_points_per_voxel"] bev_imgs.append(bev_img) if 'bev_index' in self.args_vg.bev_data: sweep_voxels.append(voxel_res['voxels']) sweep_coords.append(voxel_res['coordinates']) sweep_num_voxels.append(np.array([voxel_res['voxels'].shape[0]], dtype=np.int64)) sweep_num_points.append( voxel_res["num_points_per_voxel"]) # Load image if self.args_dp.use_image == 'last_image': # only use image of the last observed frame load_image = fi == self.args_dp.obs_length - 1 elif self.args_dp.use_image == 'key_images': # use image of all key frames load_image = 'cam_path' in c_frame.keys() if load_image: if Path(c_frame['cam_path']).exists(): with open(str(c_frame['cam_path']), 'rb') as f: image_str = f.read() else: image_str=None cam_imgs.append(image_str) res["lidar"]["voxel_sweeps"] = np.concatenate(sweep_voxels, axis=0) res["lidar"]["bev_imgs"] = np.stack(bev_imgs, axis=0) res["lidar"]["coordinates"] = np.concatenate(sweep_coords, axis=0) res["lidar"]["num_voxels"] = np.concatenate(sweep_num_voxels, axis=0) res["lidar"]["num_points"] = np.concatenate(sweep_num_points, axis=0) res["cam"] = { "type": "camera", "data": image_str, "datatype": Path(c_frag['Data_frags'][self.args_dp.obs_length-1]['cam_path']).suffix[1:], } gt_boxes=np.stack(gt_boxes,axis=0) res["gt_boxes"]["obs"]=gt_boxes[:self.args_dp.obs_length] res["gt_boxes"]["pred"] =gt_boxes[self.args_dp.obs_length:] res['cls_label']=self.labelmapping.fit_transform(c_frag['names']) return res #Ground Truth (by instance) #res["GT"]["obs"]= #image of last def getVoxel(self,points): max_voxels=100000 voxel_generator = VoxelGeneratorV2( voxel_size=list(self.args_vg.voxel_size), point_cloud_range=list(self.args_vg.point_cloud_range), max_num_points=self.args_vg.max_number_of_points_per_voxel, max_voxels=max_voxels, full_mean=self.args_vg.full_empty_part_with_mean, block_filtering=self.args_vg.block_filtering, block_factor=self.args_vg.block_factor, block_size=self.args_vg.block_size, height_threshold=self.args_vg.height_threshold) res = voxel_generator.generate( points, max_voxels) return res def getSamplebyFrame(self): #################interplot data 10hz scene_all=[] for si,scene in enumerate(self.nusc.scene): sample_interp_all = [] first_sample = self.nusc.get('sample', scene['first_sample_token']) sd_rec = self.nusc.get('sample_data', first_sample['data']["LIDAR_TOP"]) sample_interp_all.append(sd_rec) while sd_rec['next'] != '': sd_rec = self.nusc.get('sample_data', sd_rec['next']) sample_interp_all.append(sd_rec) scene_all.append(sample_interp_all) return scene_all def getFragAnnotations(self): scene_frames = self.getSamplebyFrame() Data_frags=[] key_slide_window=int(self.args_dp.obs_length*2) #find key frame in this time window si_start=0 if os.path.exists(self.cache_path): Data_frags=pickle.load(open(self.cache_path, 'rb')) si_start=len(Data_frags) print('-------------Prepraing fragments--------------') for si in range(si_start,len(scene_frames)): scene_data=scene_frames[si] start = time.time() scene_frags = [] for di,sample_data in enumerate(scene_data): frag_info={} if sample_data['is_key_frame']: if di <= self.args_dp.obs_length or di >= len(scene_data)-self.args_dp.pred_length*self.args_dp.interval: continue cur_frag_index=[i+1 for i in range(di-self.args_dp.obs_length,di+self.args_dp.pred_length)]#the fragment index if di !=cur_frag_index[self.args_dp.obs_length-1]: print('error') start_key=max(0,min(di-self.args_dp.obs_length,di - key_slide_window)) end_key=min(len(scene_data)-1,max(di+self.args_dp.pred_length,di + key_slide_window)) cur_key_index = [i+1 for i in range(start_key,end_key)]#find key frame in this index ## Get reference coordinates refer_frame = sample_data refer_cs_rec = self.nusc.get('calibrated_sensor', refer_frame['calibrated_sensor_token']) refer_pos_rec = self.nusc.get('ego_pose', refer_frame['ego_pose_token']) R_rl2re, T_rl2re = refer_cs_rec['rotation'], refer_cs_rec['translation'] R_re2g, T_re2g = refer_pos_rec['rotation'], refer_pos_rec['translation'] R_rl2re_mat = Quaternion(R_rl2re).rotation_matrix R_re2g_mat = Quaternion(R_re2g).rotation_matrix # get key frame location key_frame_flag = np.zeros((len(cur_key_index),), dtype='bool') key_frame_index = [] for i, d in enumerate(cur_key_index): try: if scene_data[d]['is_key_frame'] == True: key_frame_index.append(i) key_frame_flag[i] = True except: print('error') key_frames = np.array(scene_data[cur_key_index[0]:(cur_key_index[-1]+1)])[key_frame_flag] # only key frame has annotations, so firstly get key frame infos key_sample, key_sample_token, key_instances, key_annotations, key_velocity = [], [], [], [], [] for k, key_frame in enumerate(key_frames): sample_token = key_frame['sample_token'] sample = self.nusc.get('sample', sample_token) annotations = [ self.nusc.get('sample_annotation', token) for token in sample['anns'] ] velocity = np.array( [self.nusc.box_velocity(token)[:2] for token in sample['anns']]) key_sample_token.append(sample_token) key_instances.append([anno['instance_token'] for anno in annotations]) key_sample.append(sample) key_annotations.append(annotations) key_velocity.append(velocity) # get full presented instance token in the candidate fragments instances_intersect = list(set.intersection(*[set(i) for i in key_instances])) #instances_union = list(set.union(*[set(i) for i in key_instances])) # full presented instance flags valid_inst_flags = [np.zeros(len(kinst), dtype='bool') for kinst in key_instances] for kinst, key_inst in enumerate(key_instances): for vkinst, valid_inst in enumerate(key_inst): if valid_inst in instances_intersect: valid_inst_flags[kinst][vkinst] = True ########################################## ## Prepare fragments Database ########################################## cur_key_frame_index = [] for i, d in enumerate(cur_frag_index): if scene_data[d]['is_key_frame'] == True: cur_key_frame_index.append(i) frag_info['Data_frags'] = [] frag_info['instance_token'] = instances_intersect frag_info['key_frame_index'] = cur_key_frame_index frag_info['last_obs_frame']=di frag_info['scene_No'] = si for i, d in enumerate(cur_frag_index): frag_data = {} sample_data=scene_data[d] sample_token = sample_data['sample_token'] sample = self.nusc.get('sample', sample_token) # find the key sample this frame data belongs to try: key_sample_ind = key_sample_token.index(sample_token) except: print('can not find corresponding key frame at scene {} frame {}'.format(si,d)) valid_inst_flag = valid_inst_flags[key_sample_ind] ## Pose matrix: lidar2ego and ego2global s_cs_rec = self.nusc.get('calibrated_sensor', sample_data['calibrated_sensor_token']) s_pos_rec = self.nusc.get('ego_pose', sample_data['ego_pose_token']) R_cl2ce, T_cl2ce = s_cs_rec['rotation'], s_cs_rec['translation'] R_ce2g, T_ce2g = s_pos_rec['rotation'], s_pos_rec['translation'] R_cl2ce_mat = Quaternion(R_cl2ce).rotation_matrix R_ce2g_mat = Quaternion(R_ce2g).rotation_matrix # Data_frag['Info_frags']['T_l2e'],Data_frag['Info_frags']['R_l2e'] = cs_record['translation'],cs_record['rotation'] # Data_frag['Info_frags']['T_e2g'],Data_frag['Info_frags']['R_e2g'] = pose_record['translation'], pose_record['rotation'] ## Get Relative Pose: R_cl2rl,T_cl2rl, based on R/T_rl2re, R/T_re2g, R/T_cl2ce, R/T_ce2g # r: reference, c: current, l: lidar, e: ego, g: global # Attention: R_b2a = inv(R_a2b), T_b2a = - T_b2a * inv(R_b2a), # R_cl2rl = R_cl2se * R_ce2g * [R_g2rl] # R_g2rl= R_g2re * R_re2rl = inv(R_re2g) * inv(R_rl2re) R_cl2rl = (R_cl2ce_mat.T @ R_ce2g_mat.T) @ ( np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(R_rl2re_mat).T) # T_cl2rl = (T_cl2ce * R_ce2g + T_ce2g) * [R_g2rl] + [T_g2rl] # T_g2rl = (T_g2re * R_re2rl + T_re2rl) = - T_re2g * inv(R_re2g) - T_rl2re * inv(R_rl2re) T_cl2rl = (T_cl2ce @ R_ce2g_mat.T + T_ce2g) @ ( np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(R_rl2re_mat).T) \ - T_re2g @ (np.linalg.inv(R_re2g_mat).T @ np.linalg.inv( R_rl2re_mat).T) - T_rl2re @ np.linalg.inv(R_rl2re_mat).T frag_data['R_cl2rl'], frag_data['T_cl2rl'] = R_cl2rl, T_cl2rl ### Get valid boxes.Then Transform to the reference coordinates boxes = self.nusc.get_boxes(sample_data['token']) # At global coordinate for box in boxes: # Move box to referred coord system box.translate(-np.array(refer_pos_rec['translation'])) box.rotate(Quaternion(refer_pos_rec['rotation']).inverse) box.translate(-np.array(refer_cs_rec['translation'])) box.rotate(Quaternion(refer_cs_rec['rotation']).inverse) boxes = np.array(boxes) # At reference coordinate try: valid_boxes = boxes[valid_inst_flag] except: print('can not find valid box at scene {} frame {}'.format(si, d)) ## Transform Boxes to [location,dimension,rotation] locs = np.array([b.center for b in valid_boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in valid_boxes]).reshape(-1, 3) rots = np.array([b.orientation.yaw_pitch_roll[0] for b in valid_boxes]).reshape(-1, 1) gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1) frag_data['boxes'] = gt_boxes ## Datapath if i<self.args_dp.obs_length: frag_data['lidar_path'] = sample_data['filename'] if sample_data['is_key_frame']: cam_front_token = sample["data"]["CAM_FRONT"] cam_path, _, _ = self.nusc.get_sample_data(cam_front_token) frag_data['cam_path'] = cam_path ## Object name if 'names' not in frag_info.keys(): names = [b.name for b in valid_boxes] for i in range(len(names)): if names[i] in self.NameMapping: names[i] = self.NameMapping[names[i]] names = np.array(names) frag_info['names'] = names ##Velocity (without interplotion) valid_velo = key_velocity[key_sample_ind][valid_inst_flag] # convert velo from global to current lidar for i in range(len(valid_boxes)): velo = np.array([*valid_velo[i], 0.0]) velo = velo @ np.linalg.inv(R_ce2g_mat).T @ np.linalg.inv(R_cl2ce_mat).T valid_velo[i] = velo[:2] frag_data['Velocity'] = valid_velo frag_data['FrameNo.'] = d frag_data['Token'] = sample_data['token'] frag_data['timestamp'] = sample_data['timestamp'] frag_info['Data_frags'].append(frag_data) scene_frags.append(frag_info) Data_frags.append(scene_frags) end = time.time() print('scene {}/{}: total frags: {} time: {} '.format(si, len(scene_frames), len(scene_frags), end - start)) if si%200==0 and si>0: pickle.dump(Data_frags, open(self.cache_path, 'wb')) return Data_frags