def __call__(self, input_dict): """Call function to filter points by the range. Args: input_dict (dict): Result dict from loading pipeline. Returns: dict: Results after filtering, 'points' keys are updated \ in the result dict. """ points = input_dict['points'] gt_bboxes_3d = input_dict['gt_bboxes_3d'] gt_bboxes_3d_np = gt_bboxes_3d.tensor.numpy() gt_bboxes_3d_np[:, :3] = gt_bboxes_3d.gravity_center.numpy() enlarged_gt_bboxes_3d = gt_bboxes_3d_np.copy() enlarged_gt_bboxes_3d[:, 3:6] += self.bbox_enlarge_range foreground_masks = box_np_ops.points_in_rbbox(points, gt_bboxes_3d_np) enlarge_foreground_masks = box_np_ops.points_in_rbbox( points, enlarged_gt_bboxes_3d) foreground_masks = foreground_masks.max(1) enlarge_foreground_masks = enlarge_foreground_masks.max(1) valid_masks = ~np.logical_and(~foreground_masks, enlarge_foreground_masks) input_dict['points'] = points[valid_masks] pts_instance_mask = input_dict.get('pts_instance_mask', None) if pts_instance_mask is not None: input_dict['pts_instance_mask'] = pts_instance_mask[valid_masks] pts_semantic_mask = input_dict.get('pts_semantic_mask', None) if pts_semantic_mask is not None: input_dict['pts_semantic_mask'] = pts_semantic_mask[valid_masks] return input_dict
def remove_points_in_boxes(points, boxes): """Remove the points in the sampled bounding boxes. Args: points (:obj:`BasePoints`): Input point cloud array. boxes (np.ndarray): Sampled ground truth boxes. Returns: np.ndarray: Points with those in the boxes removed. """ masks = box_np_ops.points_in_rbbox(points.coord.numpy(), boxes) points = points[np.logical_not(masks.any(-1))] return points
def _calculate_num_points_in_gt(data_path, infos, relative_path, remove_outside=True, num_features=4): for info in mmcv.track_iter_progress(infos): pc_info = info['point_cloud'] image_info = info['image'] calib = info['calib'] if relative_path: v_path = str(Path(data_path) / pc_info['velodyne_path']) else: v_path = pc_info['velodyne_path'] points_v = np.fromfile(v_path, dtype=np.float32, count=-1).reshape([-1, num_features]) rect = calib['R0_rect'] Trv2c = calib['Tr_velo_to_cam'] P2 = calib['P2'] if remove_outside: points_v = box_np_ops.remove_outside_points( points_v, rect, Trv2c, P2, image_info['image_shape']) # points_v = points_v[points_v[:, 0] > 0] annos = info['annos'] num_obj = len([n for n in annos['name'] if n != 'DontCare']) # annos = kitti.filter_kitti_anno(annos, ['DontCare']) dims = annos['dimensions'][:num_obj] loc = annos['location'][:num_obj] rots = annos['rotation_y'][:num_obj] gt_boxes_camera = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1) gt_boxes_lidar = box_np_ops.box_camera_to_lidar( gt_boxes_camera, rect, Trv2c) indices = box_np_ops.points_in_rbbox(points_v[:, :3], gt_boxes_lidar) num_points_in_gt = indices.sum(0) num_ignored = len(annos['dimensions']) - num_obj num_points_in_gt = np.concatenate( [num_points_in_gt, -np.ones([num_ignored])]) annos['num_points_in_gt'] = num_points_in_gt.astype(np.int32)
def create_groundtruth_database(dataset_class_name, data_path, info_prefix, info_path=None, mask_anno_path=None, used_classes=None, database_save_path=None, db_info_save_path=None, relative_path=True, add_rgb=False, lidar_only=False, bev_only=False, coors_range=None, with_mask=False): """Given the raw data, generate the ground truth database. Args: dataset_class_name (str): Name of the input dataset. data_path (str): Path of the data. info_prefix (str): Prefix of the info file. info_path (str): Path of the info file. Default: None. mask_anno_path (str): Path of the mask_anno. Default: None. used_classes (list[str]): Classes have been used. Default: None. database_save_path (str): Path to save database. Default: None. db_info_save_path (str): Path to save db_info. Default: None. relative_path (bool): Whether to use relative path. Default: True. with_mask (bool): Whether to use mask. Default: False. """ print(f'Create GT Database of {dataset_class_name}') dataset_cfg = dict(type=dataset_class_name, data_root=data_path, ann_file=info_path) if dataset_class_name == 'KittiDataset': file_client_args = dict(backend='disk') dataset_cfg.update(test_mode=False, split='training', modality=dict( use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=with_mask, ), pipeline=[ dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4, file_client_args=file_client_args), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, file_client_args=file_client_args) ]) elif dataset_class_name == 'NuScenesDataset': dataset_cfg.update(use_valid_flag=True, pipeline=[ dict(type='LoadPointsFromFile', load_dim=5, use_dim=5), dict(type='LoadPointsFromMultiSweeps', coord_type='LIDAR', sweeps_num=10, use_dim=[0, 1, 2, 3, 4], pad_empty_sweeps=True, remove_close=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True) ]) elif dataset_class_name == 'WaymoDataset': file_client_args = dict(backend='disk') dataset_cfg.update(test_mode=False, split='training', modality=dict( use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=False, ), pipeline=[ dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=6, use_dim=5, file_client_args=file_client_args), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True, file_client_args=file_client_args) ]) dataset = build_dataset(dataset_cfg) if database_save_path is None: database_save_path = osp.join(data_path, f'{info_prefix}_gt_database') if db_info_save_path is None: db_info_save_path = osp.join(data_path, f'{info_prefix}_dbinfos_train.pkl') mmcv.mkdir_or_exist(database_save_path) all_db_infos = dict() if with_mask: coco = COCO(osp.join(data_path, mask_anno_path)) imgIds = coco.getImgIds() file2id = dict() for i in imgIds: info = coco.loadImgs([i])[0] file2id.update({info['file_name']: i}) group_counter = 0 for j in track_iter_progress(list(range(len(dataset)))): input_dict = dataset.get_data_info(j) dataset.pre_pipeline(input_dict) example = dataset.pipeline(input_dict) annos = example['ann_info'] image_idx = example['sample_idx'] points = example['points'].tensor.numpy() gt_boxes_3d = annos['gt_bboxes_3d'].tensor.numpy() names = annos['gt_names'] group_dict = dict() if 'group_ids' in annos: group_ids = annos['group_ids'] else: group_ids = np.arange(gt_boxes_3d.shape[0], dtype=np.int64) difficulty = np.zeros(gt_boxes_3d.shape[0], dtype=np.int32) if 'difficulty' in annos: difficulty = annos['difficulty'] num_obj = gt_boxes_3d.shape[0] point_indices = box_np_ops.points_in_rbbox(points, gt_boxes_3d) if with_mask: # prepare masks gt_boxes = annos['gt_bboxes'] img_path = osp.split(example['img_info']['filename'])[-1] if img_path not in file2id.keys(): print(f'skip image {img_path} for empty mask') continue img_id = file2id[img_path] kins_annIds = coco.getAnnIds(imgIds=img_id) kins_raw_info = coco.loadAnns(kins_annIds) kins_ann_info = _parse_coco_ann_info(kins_raw_info) h, w = annos['img_shape'][:2] gt_masks = [ _poly2mask(mask, h, w) for mask in kins_ann_info['masks'] ] # get mask inds based on iou mapping bbox_iou = bbox_overlaps(kins_ann_info['bboxes'], gt_boxes) mask_inds = bbox_iou.argmax(axis=0) valid_inds = (bbox_iou.max(axis=0) > 0.5) # mask the image # use more precise crop when it is ready # object_img_patches = np.ascontiguousarray( # np.stack(object_img_patches, axis=0).transpose(0, 3, 1, 2)) # crop image patches using roi_align # object_img_patches = crop_image_patch_v2( # torch.Tensor(gt_boxes), # torch.Tensor(mask_inds).long(), object_img_patches) object_img_patches, object_masks = crop_image_patch( gt_boxes, gt_masks, mask_inds, annos['img']) for i in range(num_obj): filename = f'{image_idx}_{names[i]}_{i}.bin' abs_filepath = osp.join(database_save_path, filename) rel_filepath = osp.join(f'{info_prefix}_gt_database', filename) # save point clouds and image patches for each object gt_points = points[point_indices[:, i]] gt_points[:, :3] -= gt_boxes_3d[i, :3] if with_mask: if object_masks[i].sum() == 0 or not valid_inds[i]: # Skip object for empty or invalid mask continue img_patch_path = abs_filepath + '.png' mask_patch_path = abs_filepath + '.mask.png' mmcv.imwrite(object_img_patches[i], img_patch_path) mmcv.imwrite(object_masks[i], mask_patch_path) with open(abs_filepath, 'w') as f: gt_points.tofile(f) if (used_classes is None) or names[i] in used_classes: db_info = { 'name': names[i], 'path': rel_filepath, 'image_idx': image_idx, 'gt_idx': i, 'box3d_lidar': gt_boxes_3d[i], 'num_points_in_gt': gt_points.shape[0], 'difficulty': difficulty[i], } local_group_id = group_ids[i] # if local_group_id >= 0: if local_group_id not in group_dict: group_dict[local_group_id] = group_counter group_counter += 1 db_info['group_id'] = group_dict[local_group_id] if 'score' in annos: db_info['score'] = annos['score'][i] if with_mask: db_info.update({'box2d_camera': gt_boxes[i]}) if names[i] in all_db_infos: all_db_infos[names[i]].append(db_info) else: all_db_infos[names[i]] = [db_info] for k, v in all_db_infos.items(): print(f'load {len(v)} {k} database infos') with open(db_info_save_path, 'wb') as f: pickle.dump(all_db_infos, f)
def _fill_trainval_infos(nusc, train_scenes, val_scenes, test=False, max_sweeps=10, trans_data_for_show=False, root_save_path=None, need_val_scenes=True): """Generate the train/val infos from the raw data. Args: nusc (:obj:`NuScenes`): Dataset class in the nuScenes dataset. train_scenes (list[str]): Basic information of training scenes. val_scenes (list[str]): Basic information of validation scenes. test (bool): Whether use the test mode. In the test mode, no annotations can be accessed. Default: False. max_sweeps (int): Max number of sweeps. Default: 10. Returns: tuple[list[dict]]: Information of training set and validation set that will be saved to the info file. """ train_nusc_infos = [] val_nusc_infos = [] if trans_data_for_show: point_label_mapping = [] for name in NuScenesDataset.POINT_CLASS_GENERAL: point_label_mapping.append( NuScenesDataset.POINT_CLASS_SEG.index( NuScenesDataset.PointClassMapping[name])) point_label_mapping = np.array(point_label_mapping, dtype=np.uint8) set_idx = 0 L_cam_path = [] R_cam_path = [] root_save_path = '/data/jk_save_dir/temp_dir' assert root_save_path != None from mmdet3d.core.bbox import box_np_ops as box_np_ops for sample in mmcv.track_iter_progress(nusc.sample): lidar_token = sample['data']['LIDAR_TOP'] sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP']) cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token']) pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token']) lidar_path, boxes, _ = nusc.get_sample_data(lidar_token) mmcv.check_file_exist(lidar_path) if need_val_scenes and sample['scene_token'] in train_scenes: continue info = { 'lidar_path': lidar_path, 'token': sample['token'], 'sweeps': [], 'cams': dict(), 'lidar2ego_translation': cs_record['translation'], 'lidar2ego_rotation': cs_record['rotation'], 'ego2global_translation': pose_record['translation'], 'ego2global_rotation': pose_record['rotation'], 'timestamp': sample['timestamp'], } l2e_r = info['lidar2ego_rotation'] l2e_t = info['lidar2ego_translation'] e2g_r = info['ego2global_rotation'] e2g_t = info['ego2global_translation'] l2e_r_mat = Quaternion(l2e_r).rotation_matrix e2g_r_mat = Quaternion(e2g_r).rotation_matrix # obtain 6 image's information per frame camera_types = [ 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT', 'CAM_BACK', 'CAM_BACK_LEFT', 'CAM_BACK_RIGHT', ] for cam in camera_types: cam_token = sample['data'][cam] cam_path, _, cam_intrinsic = nusc.get_sample_data(cam_token) cam_info = obtain_sensor2top(nusc, cam_token, l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, cam) cam_info.update(cam_intrinsic=cam_intrinsic) info['cams'].update({cam: cam_info}) # obtain sweeps for a single key-frame sd_rec = nusc.get('sample_data', sample['data']['LIDAR_TOP']) sweeps = [] while len(sweeps) < max_sweeps: if not sd_rec['prev'] == '': sweep = obtain_sensor2top(nusc, sd_rec['prev'], l2e_t, l2e_r_mat, e2g_t, e2g_r_mat, 'lidar') sweeps.append(sweep) sd_rec = nusc.get('sample_data', sd_rec['prev']) else: break info['sweeps'] = sweeps # obtain annotation if not test: annotations = [ nusc.get('sample_annotation', token) for token in sample['anns'] ] locs = np.array([b.center for b in boxes]).reshape(-1, 3) dims = np.array([b.wlh for b in boxes]).reshape(-1, 3) rots = np.array([b.orientation.yaw_pitch_roll[0] for b in boxes]).reshape(-1, 1) velocity = np.array( [nusc.box_velocity(token)[:2] for token in sample['anns']]) valid_flag = np.array( [(anno['num_lidar_pts'] + anno['num_radar_pts']) > 0 for anno in annotations], dtype=bool).reshape(-1) # convert velo from global to lidar for i in range(len(boxes)): velo = np.array([*velocity[i], 0.0]) velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv( l2e_r_mat).T velocity[i] = velo[:2] names = [b.name for b in boxes] for i in range(len(names)): if names[i] in NuScenesDataset.NameMapping: names[i] = NuScenesDataset.NameMapping[names[i]] names = np.array(names) # we need to convert rot to SECOND format. gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1) assert len(gt_boxes) == len( annotations), f'{len(gt_boxes)}, {len(annotations)}' info['gt_boxes'] = gt_boxes info['gt_names'] = names info['gt_velocity'] = velocity.reshape(-1, 2) info['num_lidar_pts'] = np.array( [a['num_lidar_pts'] for a in annotations]) info['num_radar_pts'] = np.array( [a['num_radar_pts'] for a in annotations]) info['valid_flag'] = valid_flag # lidarseg labels lidarseg_labels_filepath = os.path.join( nusc.dataroot, nusc.get('lidarseg', lidar_token)['filename']) info['lidar_pts_labels_filepath'] = lidarseg_labels_filepath if trans_data_for_show: need_raw_data, need_calib, need_pts_label, need_bbox = False, True, False, False points = np.fromfile(lidar_path, dtype=np.float32).reshape(-1, 5) file_name = str(set_idx).zfill(6) if need_raw_data: # point cloud pts_save_path = os.path.join(root_save_path, 'point_cloud') mmcv.mkdir_or_exist(pts_save_path) postfix = '.bin' points.tofile(pts_save_path + '/' + file_name + postfix) if need_pts_label: lidarseg_labels_filepath = os.path.join( nusc.dataroot, nusc.get('lidarseg', lidar_token)['filename']) pts_semantic_mask = np.fromfile(lidarseg_labels_filepath, dtype=np.uint8).reshape( [-1, 1]) pts_semantic_mask = point_label_mapping[pts_semantic_mask] need_ins_label = True if need_ins_label: # for instance label isinstance_label = np.zeros_like(pts_semantic_mask) point_indices = box_np_ops.points_in_rbbox(points, gt_boxes, origin=(0.5, 0.5, 0.5)) point_indices = point_indices.transpose() for idx, cls_name in enumerate(names): #ins label from 1, not 0 isinstance_label[point_indices[idx]] = idx + 1 pts_semantic_mask = np.concatenate( (pts_semantic_mask, isinstance_label), axis=-1) sem_save_path = os.path.join(root_save_path, 'sem_ins_mask') mmcv.mkdir_or_exist(sem_save_path) postfix = '.bin' # we use uint8 as the mask label file type pts_semantic_mask = np.array(pts_semantic_mask, dtype=np.uint8) pts_semantic_mask.tofile(sem_save_path + '/' + file_name + postfix) if need_calib: #cam_list = ['CAM_FRONT', 'CAM_BACK'] cam_list = ['CAM_FRONT_RIGHT', 'CAM_FRONT_LEFT'] for cam_name in cam_list: calib_s2l_r = info['cams'][cam_name][ 'sensor2lidar_rotation'] calib_s2l_t = info['cams'][cam_name][ 'sensor2lidar_translation'].reshape(1, 3) calib_s2l_i = info['cams'][cam_name]['cam_intrinsic'] calib = np.concatenate( (calib_s2l_r, calib_s2l_t, calib_s2l_i), axis=0) postfix = '.txt' calib_save_path = os.path.join(root_save_path, cam_name) mmcv.mkdir_or_exist(calib_save_path) np.savetxt(calib_save_path + '/' + file_name + postfix, calib, fmt='%.16f') #need name?? _, image_name = os.path.split( info['cams'][cam_name]['data_path']) line = image_name + ' ' + file_name + '.jpg' if cam_name == 'CAM_FRONT_LEFT': L_cam_path.append(line) elif cam_name == 'CAM_FRONT_RIGHT': R_cam_path.append(line) if need_bbox: save_boxes = np.zeros((0, 10)) num_bbox = len(info['gt_names']) if num_bbox > 0: mask = [ name in nus_categories for name in info['gt_names'] ] mask = np.array(mask) save_gt_names = info['gt_names'][mask] save_gt_boxes = info['gt_boxes'][mask] save_gt_boxes = save_gt_boxes.reshape(-1, 7) save_label = [ nus_categories.index(name) for name in save_gt_names ] save_label = np.array(save_label).reshape(-1, 1) save_boxes = save_gt_boxes[:, [0, 1, 2, 5, 3, 4, 6]] save_boxes = save_boxes.reshape(-1, 7) save_conf = np.zeros_like(save_label) save_id = np.zeros_like(save_conf) # need velocity? save_gt_velocity = info['gt_velocity'][mask] save_gt_velocity = save_gt_velocity.reshape(-1, 2) save_velocity_scale = np.linalg.norm(save_gt_velocity, axis=1, keepdims=True) save_velocity_dir = np.arctan2(save_gt_velocity[:, 1], save_gt_velocity[:, 0]) save_velocity_dir = save_velocity_dir.reshape(-1, 1) save_velocity_scale[np.isnan(save_velocity_scale)] = 0. save_velocity_dir[np.isnan(save_velocity_dir)] = 0. save_boxes = np.concatenate( (save_label, save_boxes, save_conf, save_id, save_velocity_scale, save_velocity_dir), axis=-1) postfix = '.txt' bbox_save_path = os.path.join(root_save_path, 'bbox_with_velocity') mmcv.mkdir_or_exist(bbox_save_path) np.savetxt(bbox_save_path + '/' + file_name + postfix, save_boxes, fmt='%.3f') set_idx = set_idx + 1 if sample['scene_token'] in train_scenes: train_nusc_infos.append(info) else: val_nusc_infos.append(info) L_cam_path = np.array(L_cam_path) R_cam_path = np.array(R_cam_path) np.savetxt(root_save_path + '/' + 'L_image_name.txt', L_cam_path, fmt='%s') np.savetxt(root_save_path + '/' + 'R_image_name.txt', R_cam_path, fmt='%s') return train_nusc_infos, val_nusc_infos
def map_func(idx): info = {} pc_info = {'num_features': 4, 'pc_idx': idx} info['point_cloud'] = pc_info image_info = {'image_idx': idx, 'image_shape': get_image_shape(root_path, idx)} info['image'] = image_info calib = get_calib(root_path, idx) info['calib'] = calib if label_info: obj_list = get_label(root_path, idx) for obj in obj_list: obj.from_radar_to_camera(calib) obj.from_radar_to_image(calib) ############################### # print(f'\n%s:' % sample_idx) # # # print(obj.loc, obj.orient) # # obj.from_lidar_to_radar(calib) # # print(obj.loc, obj.orient) # # # print(obj.loc_camera, obj.rot_camera) # # obj.from_lidar_to_camera(calib) # # print(obj.loc_camera, obj.rot_camera) # # print(obj.box2d) # obj.from_lidar_to_image(calib) # print(obj.box2d) ############################### annotations = {'name': np.array([obj.cls_type for obj in obj_list]), 'occluded': np.array([obj.occlusion for obj in obj_list]), 'alpha': np.array([-np.arctan2(obj.loc[1], obj.loc[0]) + obj.rot_camera for obj in obj_list]), 'bbox': np.concatenate([obj.box2d.reshape(1, 4) for obj in obj_list], axis=0), 'dimensions': np.array([[obj.l, obj.h, obj.w] for obj in obj_list]), 'location': np.concatenate([obj.loc_camera.reshape(1, 3) for obj in obj_list], axis=0), 'rotation_y': np.array([obj.rot_camera for obj in obj_list]), 'score': np.array([obj.score for obj in obj_list]), 'difficulty': np.array([obj.level for obj in obj_list], np.int32), 'truncated': -np.ones(len(obj_list))} num_objects = len([obj.cls_type for obj in obj_list if obj.cls_type != 'DontCare']) num_gt = len(annotations['name']) index = list(range(num_objects)) + [-1] * (num_gt - num_objects) annotations['index'] = np.array(index, dtype=np.int32) if pc_type == 'radar': gt_boxes = np.array([[*obj.loc, obj.w, obj.l, obj.h, obj.rot] for obj in obj_list]) else: for obj in obj_list: obj.from_radar_to_lidar(calib) gt_boxes = np.array([[*obj.loc_lidar, obj.w, obj.l, obj.h, obj.rot_lidar] for obj in obj_list]) annotations['gt_boxes'] = gt_boxes points = get_pointcloud(idx, pc_type) indices = box_np_ops.points_in_rbbox(points[:, :3], gt_boxes) num_points_in_gt = indices.sum(0) num_ignored = num_gt - num_objects num_points_in_gt = np.concatenate( [num_points_in_gt, -np.ones([num_ignored])]) annotations['num_points_in_gt'] = num_points_in_gt if annotations is not None: info['annos'] = annotations # add_difficulty_to_annos(info) return info