def test_BottomUpGenerateHeatmapTarget(): data_prefix = 'tests/data/coco/' ann_file = osp.join(data_prefix, 'test_coco.json') coco = COCO(ann_file) ann_info = {} ann_info['heatmap_size'] = np.array([128, 256]) ann_info['num_joints'] = 17 ann_info['num_scales'] = 2 ann_info['scale_aware_sigma'] = False ann_ids = coco.getAnnIds(785) anno = coco.loadAnns(ann_ids) mask = _get_mask(coco, anno, 785) anno = [ obj for obj in anno if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 ] joints = _get_joints(anno, ann_info, False) mask_list = [mask.copy() for _ in range(ann_info['num_scales'])] joints_list = [joints.copy() for _ in range(ann_info['num_scales'])] results = {} results['dataset'] = 'coco' results['image_file'] = osp.join(data_prefix, '000000000785.jpg') results['mask'] = mask_list results['joints'] = joints_list results['ann_info'] = ann_info generate_heatmap_target = BottomUpGenerateHeatmapTarget(2) results_generate_heatmap_target = generate_heatmap_target(results) assert 'target' in results_generate_heatmap_target assert len(results_generate_heatmap_target['target'] ) == results['ann_info']['num_scales']
class COCO_WHOLEBODYDataset(JointsDataset): """CocoWholeBodyDataset dataset for top-down pose estimation. `Whole-Body Human Pose Estimation in the Wild' ECCV'2020 More details can be found in the `paper <https://arxiv.org/abs/2007.11858>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. In total, we have 133 keypoints for wholebody pose estimation. COCO-WholeBody keypoint indexes:: 0-16: 17 body keypoints 17-22: 6 foot keypoints 23-90: 68 face keypoints 91-132: 42 hand keypoints Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, cfg, root, image_set, is_train, transform=None): super().__init__(cfg, root, image_set, is_train, transform) self.nms_thre = cfg.TEST.NMS_THRE self.image_thre = cfg.TEST.IMAGE_THRE self.soft_nms = cfg.TEST.SOFT_NMS self.oks_thre = cfg.TEST.OKS_THRE self.in_vis_thre = cfg.TEST.IN_VIS_THRE self.bbox_file = cfg.TEST.COCO_BBOX_FILE self.use_gt_bbox = cfg.TEST.USE_GT_BBOX self.image_width = cfg.MODEL.IMAGE_SIZE[0] self.image_height = cfg.MODEL.IMAGE_SIZE[1] self.aspect_ratio = self.image_width * 1.0 / self.image_height self.pixel_std = 200 self.coco = COCO(self._get_ann_file_keypoint()) # deal with class names cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats logger.info('=> classes: {}'.format(self.classes)) self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict([(self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]]) # load image file names self.image_set_index = self._load_image_set_index() self.num_images = len(self.image_set_index) self.dataset_name = 'coco_wholebody' logger.info('=> num_images: {}'.format(self.num_images)) self.num_joints = 133 self.body_num = 17 self.foot_num = 6 self.face_num = 68 self.left_hand_num = 21 self.right_hand_num = 21 self.sigmas_body = [ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 ] self.sigmas_foot = [0.068, 0.066, 0.066, 0.092, 0.094, 0.094] self.igmas_face = [ 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008 ] self.sigmas_lefthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_righthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_wholebody = (self.sigmas_body + self.sigmas_foot + self.sigmas_face + self.sigmas_lefthand + self.sigmas_righthand) self.sigmas = np.array(self.sigmas_wholebody) self.flip_pairs = self._make_flip_pairs() self.parent_ids = None self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.lower_body_ids = (11, 12, 13, 14, 15, 16) self.use_different_joints_weight = False self.joints_weight = np.ones(self.num_joints, dtype=np.float32) # self.db = self._get_db() # [Cache Point] self.cache_root = cfg.DATASET.CACHE_ROOT db_file = os.path.join( self.cache_root, '{}_cached_{}_db.pkl'.format(cfg.DATASET.DATASET, self.image_set)) if os.path.exists(db_file): with open(db_file, 'rb') as fd: self.db = pickle.load(fd) else: self.db = self._get_db() os.makedirs(self.cache_root, exist_ok=True) with open(db_file, 'wb') as fd: pickle.dump(self.db, fd) if is_train and cfg.DATASET.SELECT_DATA: self.db = self.select_data(self.db) logger.info('=> load {} samples'.format(len(self.db))) def _make_flip_pairs(self): body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] foot = [[17, 20], [18, 21], [19, 22]] face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46], [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75], [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]] hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] return body + foot + face + hand def _get_ann_file_keypoint(self): """ self.root / annotations / person_keypoints_train2017.json """ prefix = 'coco_wholebody' \ if 'test' not in self.image_set else 'image_info' return os.path.join('data/coco_wholebody', prefix + '_' + self.image_set + '_v1.0.json') def _load_image_set_index(self): """ image id: int """ image_ids = self.coco.getImgIds() return image_ids def _get_db(self): if self.is_train or self.use_gt_bbox: # use ground truth bbox gt_db = self._load_coco_keypoint_annotations() else: # use bbox from detection gt_db = self._load_coco_person_detection_results() return gt_db def _load_coco_keypoint_annotations(self): """ ground truth bbox and keypoints """ gt_db = [] for index in self.image_set_index: gt_db.extend(self._load_coco_keypoint_annotation_kernal(index)) return gt_db def _load_coco_keypoint_annotation_kernal(self, index): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ im_ann = self.coco.loadImgs(index)[0] width = im_ann['width'] height = im_ann['height'] num_joints = self.num_joints annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False) objs = self.coco.loadAnns(annIds) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] bbox_id = 0 for obj in objs: cls = self._coco_ind_to_class_ind[obj['category_id']] if cls != 1: continue # ignore objs without keypoints annotation if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] + obj['face_kpts'] + obj['lefthand_kpts'] + obj['righthand_kpts']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_vis[:, :2] = np.minimum(1, keypoints[:, 2:3] > 0) center, scale = self._box2cs(obj['clean_bbox'][:4]) rec.append({ 'image': self.image_path_from_index(index), 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_vis': joints_3d_vis, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 return rec def _box2cs(self, box): x, y, w, h = box[:4] return self._xywh2cs(x, y, w, h) def _xywh2cs(self, x, y, w, h): center = np.zeros((2), dtype=np.float32) center[0] = x + w * 0.5 center[1] = y + h * 0.5 if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) if center[0] != -1: scale = scale * 1.25 return center, scale def image_path_from_index(self, index): """ example: images / train2017 / 000000119993.jpg """ file_name = '%012d.jpg' % index if '2014' in self.image_set: file_name = 'COCO_%s_' % self.image_set + file_name prefix = 'test2017' if 'test' in self.image_set else self.image_set data_name = prefix + '.zip@' if self.data_format == 'zip' else prefix image_path = os.path.join(self.root, 'images', data_name, file_name) return image_path def _load_coco_person_detection_results(self): all_boxes = None with open(self.bbox_file, 'r') as f: all_boxes = json.load(f) if not all_boxes: logger.error('=> Load %s fail!' % self.bbox_file) return None logger.info('=> Total boxes: {}'.format(len(all_boxes))) kpt_db = [] num_boxes = 0 for n_img in range(0, len(all_boxes)): det_res = all_boxes[n_img] if det_res['category_id'] != 1: continue img_name = self.image_path_from_index(det_res['image_id']) box = det_res['bbox'] score = det_res['score'] if score < self.image_thre: continue num_boxes = num_boxes + 1 center, scale = self._box2cs(box) joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.ones((self.num_joints, 3), dtype=np.float) kpt_db.append({ 'image': img_name, 'center': center, 'scale': scale, 'score': score, 'joints_3d': joints_3d, 'joints_3d_vis': joints_3d_vis, }) logger.info('=> Total boxes after fliter low score@{}: {}'.format( self.image_thre, num_boxes)) return kpt_db def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args, **kwargs): rank = cfg.RANK res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): try: os.makedirs(res_folder) except Exception: logger.error('Fail to make {}'.format(res_folder)) res_file = os.path.join( res_folder, 'keypoints_{}_results_{}.json'.format(self.image_set, rank)) # person x (keypoints) _kpts = [] for idx, kpt in enumerate(preds): _kpts.append({ 'keypoints': kpt, 'center': all_boxes[idx][0:2], 'scale': all_boxes[idx][2:4], 'area': all_boxes[idx][4], 'score': all_boxes[idx][5], 'image': int(img_path[idx][-16:-4]) }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = self.num_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thre) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) if 'test' not in self.image_set: info_str = self._do_python_keypoint_eval(res_file, res_folder) name_value = OrderedDict(info_str) return name_value, name_value['AP'] else: return {'Null': 0}, 0 def _write_coco_keypoint_results(self, keypoints, res_file): data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) logger.info('=> writing results json to %s' % res_file) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) try: json.load(open(res_file)) except Exception: content = [] with open(res_file, 'r') as f: for line in f: content.append(line) content[-1] = ']' with open(res_file, 'w') as f: for c in content: f.write(c) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]) key_points = _key_points.zeros( (_key_points.shape[0], self.num_joints * 3), dtype=np.float) cuts = np.cumsum([ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num, self.right_hand_num ]) * 3 for ipt in range(self.num_joints): key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. cuts = np.cumsum([ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num, self.right_hand_num ]) * 3 result = [{ 'image_id': img_kpts[k]['image'], 'category_id': cat_id, 'keypoints': list(key_points[k][cuts[0]:cuts[1]]), 'foot_kpts': list(key_points[k][cuts[1]:cuts[2]]), 'face_kpts': list(key_points[k][cuts[2]:cuts[3]]), 'lefthand_kpts': list(key_points[k][cuts[3]:cuts[4]]), 'righthand_kpts': list(key_points[k][cuts[4]:cuts[5]]), 'score': img_kpts[k]['score'], 'center': list(img_kpts[k]['center']), 'scale': list(img_kpts[k]['scale']) } for k in range(len(img_kpts))] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file, res_folder): """Keypoint evaluation using COCOAPI.""" coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_body', np.array(self.sigmas_body), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_foot', np.array(self.sigmas_foot), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_face', np.array(self.sigmas_face), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_lefthand', np.array(self.sigmas_lefthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_righthand', np.array(self.sigmas_righthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_wholebody', np.array(self.sigmas_wholebody), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[ind])) return info_str
class BottomUpForkliftDataset(BottomUpBaseDataset): """COCO dataset for bottom-up pose estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. COCO keypoint indexes:: 0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear', 5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow', 9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip', 13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode) self.ann_info['flip_index'] = [1, 0, 3, 2, 5, 4] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' # 'pycocotools/cocoeval.py#L523' self.sigmas = np.array([1.] * 6) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco' print(f'=> num_images: {self.num_images}') @staticmethod def _get_mapping_id_name(imgs): """ Args: imgs (dict): dict of image info. Returns: tuple: Image name & id mapping dicts. - id2name (dict): Mapping image id to name. - name2id (dict): Mapping image name to id. """ id2name = {} name2id = {} for image_id, image in imgs.items(): file_name = image['file_name'] id2name[image_id] = file_name name2id[file_name] = image_id return id2name, name2id def _get_single(self, idx): """Get anno for a single image. Args: idx (int): image idx Returns: dict: info for model training """ coco = self.coco img_id = self.img_ids[idx] ann_ids = coco.getAnnIds(imgIds=img_id) anno = coco.loadAnns(ann_ids) mask = self._get_mask(anno, idx) anno = [ obj for obj in anno if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 ] joints = self._get_joints(anno) mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])] joints_list = [ joints.copy() for _ in range(self.ann_info['num_scales']) ] db_rec = {} db_rec['dataset'] = self.dataset_name db_rec['image_file'] = os.path.join(self.img_prefix, self.id2name[img_id]) db_rec['mask'] = mask_list db_rec['joints'] = joints_list return db_rec def _get_joints(self, anno): """Get joints for all people in an image.""" num_people = len(anno) if self.ann_info['scale_aware_sigma']: joints = np.zeros((num_people, self.ann_info['num_joints'], 4), dtype=np.float32) else: joints = np.zeros((num_people, self.ann_info['num_joints'], 3), dtype=np.float32) for i, obj in enumerate(anno): joints[i, :self.ann_info['num_joints'], :3] = \ np.array(obj['keypoints']).reshape([-1, 3]) if self.ann_info['scale_aware_sigma']: # get person box box = obj['bbox'] size = max(box[2], box[3]) sigma = size / self.base_size * self.base_sigma if self.int_sigma: sigma = int(np.ceil(sigma)) assert sigma > 0, sigma joints[i, :, 3] = sigma return joints def _get_mask(self, anno, idx): """Get ignore masks to mask out losses.""" coco = self.coco img_info = coco.loadImgs(self.img_ids[idx])[0] m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32) for obj in anno: if 'segmentation' in obj: if obj['iscrowd']: rle = xtcocotools.mask.frPyObjects(obj['segmentation'], img_info['height'], img_info['width']) m += xtcocotools.mask.decode(rle) elif obj['num_keypoints'] == 0: rles = xtcocotools.mask.frPyObjects( obj['segmentation'], img_info['height'], img_info['width']) for rle in rles: m += xtcocotools.mask.decode(rle) return m < 0.5 def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: num_people: P num_keypoints: K Args: outputs (list(preds, scores, image_path, heatmap)): * preds (list[np.ndarray(P, K, 3+tag_num)]): Pose predictions for all people in images. * scores (list[P]): * image_path (list[str]): For example, ['coco/images/ val2017/000000397133.jpg'] * heatmap (np.ndarray[N, K, H, W]): model outputs. res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['mAP'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') preds = [] scores = [] image_paths = [] for output in outputs: preds.append(output['preds']) scores.append(output['scores']) image_paths.append(output['image_paths'][0]) kpts = defaultdict(list) # iterate over images for idx, _preds in enumerate(preds): str_image_path = image_paths[idx] image_id = self.name2id[os.path.basename(str_image_path)] # iterate over people for idx_person, kpt in enumerate(_preds): # use bbox area area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * ( np.max(kpt[:, 1]) - np.min(kpt[:, 1])) kpts[image_id].append({ 'keypoints': kpt[:, 0:3], 'score': scores[idx][idx_person], 'tags': kpt[:, 3], 'image_id': image_id, 'area': area, }) oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] keep = [] if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) for img_kpt, key_point in zip(img_kpts, key_points): kpt = key_point.reshape((self.ann_info['num_joints'], 3)) left_top = np.amin(kpt, axis=0) right_bottom = np.amax(kpt, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] cat_results.append({ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point.tolist(), 'score': img_kpt['score'], 'bbox': [left_top[0], left_top[1], w, h] }) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] with open(res_file, 'r') as file: res_json = json.load(file) if not res_json: info_str = list(zip(stats_names, [ 0, ] * len(stats_names))) return info_str coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class TopDownCocoDataset(TopDownBaseDataset): """CocoDataset dataset for top-down pose estimation. `Microsoft COCO: Common Objects in Context' ECCV'2014 More details can be found in the `paper <https://arxiv.org/abs/1405.0312>`_ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. COCO keypoint indexes:: 0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear', 5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow', 9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip', 13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__( ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.image_thr = data_cfg['image_thr'] self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.bbox_thr = data_cfg['bbox_thr'] self.ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.image_set_index = self.coco.getImgIds() self.num_images = len(self.image_set_index) self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_db(self): """Load dataset.""" if (not self.test_mode) or self.use_gt_bbox: # use ground truth bbox gt_db = self._load_coco_keypoint_annotations() else: # use bbox from detection gt_db = self._load_coco_person_detection_results() return gt_db def _load_coco_keypoint_annotations(self): """Ground truth bbox and keypoints.""" gt_db = [] for index in self.image_set_index: gt_db.extend(self._load_coco_keypoint_annotation_kernel(index)) return gt_db def _load_coco_keypoint_annotation_kernel(self, index): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: index: coco image id Returns: db entry """ im_ann = self.coco.loadImgs(index)[0] width = im_ann['width'] height = im_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=index, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] for obj in objs: if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float) for ipt in range(num_joints): joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0] joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1] joints_3d[ipt, 2] = 0 t_vis = obj['keypoints'][ipt * 3 + 2] if t_vis > 1: t_vis = 1 joints_3d_visible[ipt, 0] = t_vis joints_3d_visible[ipt, 1] = t_vis joints_3d_visible[ipt, 2] = 0 center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) rec.append({ 'image_file': self._image_path_from_index(index), 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': 'coco', 'bbox_score': 1 }) return rec def _xywh2cs(self, x, y, w, h): """This encodes bbox(x,y,w,w) into (center, scale) Args: x, y, w, h Returns: center (np.ndarray[float32](2,)): center of the bbox (x, y). scale (np.ndarray[float32](2,)): scale of the bbox w & h. """ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ 'image_size'][1] center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) if (not self.test_mode) and np.random.rand() < 0.3: center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio # pixel std is 200.0 scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) scale = scale * 1.25 return center, scale def _image_path_from_index(self, index): """ example: images/train2017/000000119993.jpg """ image_path = os.path.join(self.img_prefix, '%012d.jpg' % index) return image_path def _load_coco_person_detection_results(self): """Load coco person detection results.""" num_joints = self.ann_info['num_joints'] all_boxes = None with open(self.bbox_file, 'r') as f: all_boxes = json.load(f) if not all_boxes: raise ValueError('=> Load %s fail!' % self.bbox_file) print(f'=> Total boxes: {len(all_boxes)}') kpt_db = [] num_boxes = 0 for det_res in all_boxes: if det_res['category_id'] != 1: continue img_name = self._image_path_from_index(det_res['image_id']) box = det_res['bbox'] score = det_res['score'] if score < self.image_thr: continue num_boxes = num_boxes + 1 center, scale = self._xywh2cs(*box[:4]) joints_3d = np.zeros((num_joints, 3), dtype=np.float) joints_3d_visible = np.ones((num_joints, 3), dtype=np.float) kpt_db.append({ 'image_file': img_name, 'center': center, 'scale': scale, 'rotation': 0, 'bbox_score': score, 'dataset': 'coco', 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible }) print(f'=> Total boxes after filter ' f'low score@{self.image_thr}: {num_boxes}') return kpt_db def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: num_keypoints: K Args: outputs (list(preds, boxes, image_path)) :preds (np.ndarray[1,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[1,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_path (list[str]): For example, [ '/', 'v','a', 'l', '2', '0', '1', '7', '/', '0', '0', '0', '0', '0', '0', '3', '9', '7', '1', '3', '3', '.', 'j', 'p', 'g'] res_folder (str): Path of directory to save the results. metric (str): Metric to be performed. Defaults: 'mAP'. Returns: name_value (dict): Evaluation results for evaluation metric. """ assert metric == 'mAP' res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = defaultdict(list) for preds, boxes, image_path in outputs: str_image_path = ''.join(image_path) image_id = int(osp.basename(osp.splitext(str_image_path)[0])) kpts[image_id].append({ 'keypoints': preds[0], 'center': boxes[0][0:2], 'scale': boxes[0][2:4], 'area': boxes[0][4], 'score': boxes[0][5], 'image_id': image_id, }) # rescoring and oks nms num_joints = self.ann_info['num_joints'] vis_thr = self.vis_thr oks_thr = self.oks_thr oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > vis_thr: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thr) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thr) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) result = [{ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point.tolist(), 'score': float(img_kpt['score']), 'center': img_kpt['center'].tolist(), 'scale': img_kpt['scale'].tolist() } for img_kpt, key_point in zip(img_kpts, key_points)] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[ind])) return info_str
class TopDownOCHumanDataset(TopDownCocoDataset): """OChuman dataset for top-down pose estimation. `Pose2Seg: Detection Free Human Instance Segmentation' CVPR'2019 More details can be found in the `paper <https://arxiv.org/abs/1803.10683>`_ . "Occluded Human (OCHuman)" dataset contains 8110 heavily occluded human instances within 4731 images. OCHuman dataset is designed for validation and testing. To evaluate on OCHuman, the model should be trained on COCO training set, and then test the robustness of the model to occlusion using OCHuman. OCHuman keypoint indexes (same as COCO):: 0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear', 5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow', 9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip', 13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.image_thr = data_cfg['image_thr'] self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.bbox_thr = data_cfg['bbox_thr'] self.ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.image_set_index = self.coco.getImgIds() self.num_images = len(self.image_set_index) self.id2name, self.name2id = _get_mapping_id_name(self.coco.imgs) self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_db(self): """Load dataset.""" assert self.use_gt_bbox gt_db = self._load_coco_keypoint_annotations() return gt_db def _load_coco_keypoint_annotation_kernel(self, index): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: index: coco image id Returns: db entry """ im_ann = self.coco.loadImgs(index)[0] width = im_ann['width'] height = im_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=index, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] for obj in objs: if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[index]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': 'ochuman', 'bbox_score': 1 }) return rec def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: num_keypoints: K Args: outputs (list(preds, boxes, image_path)) :preds (np.ndarray[1,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[1,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_path (list[str]): For example, [ '/', 'v','a', 'l', '2', '0', '1', '7', '/', '0', '0', '0', '0', '0', '0', '3', '9', '7', '1', '3', '3', '.', 'j', 'p', 'g'] res_folder (str): Path of directory to save the results. metric (str): Metric to be performed. Defaults: 'mAP'. Returns: name_value (dict): Evaluation results for evaluation metric. """ assert metric == 'mAP' res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = defaultdict(list) for preds, boxes, image_path in outputs: str_image_path = ''.join(image_path) image_id = self.name2id[os.path.basename(str_image_path)] kpts[image_id].append({ 'keypoints': preds[0], 'center': boxes[0][0:2], 'scale': boxes[0][2:4], 'area': boxes[0][4], 'score': boxes[0][5], 'image_id': image_id, }) # rescoring and oks nms num_joints = self.ann_info['num_joints'] vis_thr = self.vis_thr oks_thr = self.oks_thr oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > vis_thr: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thr) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thr) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value
class TopDownCocoWholeBodyDataset(TopDownCocoDataset): """CocoWholeBodyDataset dataset for top-down pose estimation. `Whole-Body Human Pose Estimation in the Wild' ECCV'2020 More details can be found in the `paper <https://arxiv.org/abs/2007.11858>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. In total, we have 133 keypoints for wholebody pose estimation. COCO-WholeBody keypoint indexes:: 0-16: 17 body keypoints 17-22: 6 foot keypoints 23-90: 68 face keypoints 91-132: 42 hand keypoints Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) if 'image_thr' in data_cfg: warnings.warn( 'image_thr is deprecated, ' 'please use det_bbox_thr instead', DeprecationWarning) self.det_bbox_thr = data_cfg['image_thr'] self.use_nms = data_cfg.get('use_nms', True) self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.ann_info['flip_pairs'] = self._make_flip_pairs() self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = \ np.ones((self.ann_info['num_joints'], 1), dtype=np.float32) self.body_num = 17 self.foot_num = 6 self.face_num = 68 self.left_hand_num = 21 self.right_hand_num = 21 # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/' # 'evaluation/myeval_wholebody.py#L170' self.sigmas_body = [ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 ] self.sigmas_foot = [0.068, 0.066, 0.066, 0.092, 0.094, 0.094] self.sigmas_face = [ 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008 ] self.sigmas_lefthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_righthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_wholebody = (self.sigmas_body + self.sigmas_foot + self.sigmas_face + self.sigmas_lefthand + self.sigmas_righthand) self.sigmas = np.array(self.sigmas_wholebody) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco_wholebody' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') @staticmethod def _make_flip_pairs(): body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] foot = [[17, 20], [18, 21], [19, 22]] face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46], [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75], [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]] hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] return body + foot + face + hand def _load_coco_keypoint_annotation_kernel(self, img_id): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ img_ann = self.coco.loadImgs(img_id)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] bbox_id = 0 for obj in objs: if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] + obj['face_kpts'] + obj['lefthand_kpts'] + obj['righthand_kpts']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3] > 0) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 return rec def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) cuts = np.cumsum([ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num, self.right_hand_num ]) * 3 result = [{ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point[cuts[0]:cuts[1]].tolist(), 'foot_kpts': key_point[cuts[1]:cuts[2]].tolist(), 'face_kpts': key_point[cuts[2]:cuts[3]].tolist(), 'lefthand_kpts': key_point[cuts[3]:cuts[4]].tolist(), 'righthand_kpts': key_point[cuts[4]:cuts[5]].tolist(), 'score': float(img_kpt['score']), 'center': img_kpt['center'].tolist(), 'scale': img_kpt['scale'].tolist() } for img_kpt, key_point in zip(img_kpts, key_points)] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints_body', np.array(self.sigmas_body), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_foot', np.array(self.sigmas_foot), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_face', np.array(self.sigmas_face), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_lefthand', np.array(self.sigmas_lefthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_righthand', np.array(self.sigmas_righthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_wholebody', np.array(self.sigmas_wholebody), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class Kpt2dSviewRgbImgBottomUpDataset(Dataset, metaclass=ABCMeta): """Base class for bottom-up datasets. All datasets should subclass it. All subclasses should overwrite: Methods:`_get_single` Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. dataset_info (DatasetInfo): A class containing all dataset info. coco_style (bool): Whether the annotation json is coco-style. Default: True test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, dataset_info=None, coco_style=True, test_mode=False): self.image_info = {} self.ann_info = {} self.ann_file = ann_file self.img_prefix = img_prefix self.pipeline = pipeline self.test_mode = test_mode # bottom-up self.base_size = data_cfg['base_size'] self.base_sigma = data_cfg['base_sigma'] self.int_sigma = False self.ann_info['image_size'] = np.array(data_cfg['image_size']) self.ann_info['heatmap_size'] = np.array(data_cfg['heatmap_size']) self.ann_info['num_joints'] = data_cfg['num_joints'] self.ann_info['num_scales'] = data_cfg['num_scales'] self.ann_info['scale_aware_sigma'] = data_cfg['scale_aware_sigma'] self.ann_info['inference_channel'] = data_cfg['inference_channel'] self.ann_info['dataset_channel'] = data_cfg['dataset_channel'] self.use_nms = data_cfg.get('use_nms', False) self.soft_nms = data_cfg.get('soft_nms', True) self.oks_thr = data_cfg.get('oks_thr', 0.9) if dataset_info is None: raise ValueError( 'Check https://github.com/open-mmlab/mmpose/pull/663 ' 'for details.') dataset_info = DatasetInfo(dataset_info) assert self.ann_info['num_joints'] == dataset_info.keypoint_num self.ann_info['flip_pairs'] = dataset_info.flip_pairs self.ann_info['flip_index'] = dataset_info.flip_index self.ann_info['upper_body_ids'] = dataset_info.upper_body_ids self.ann_info['lower_body_ids'] = dataset_info.lower_body_ids self.ann_info['joint_weights'] = dataset_info.joint_weights self.ann_info['skeleton'] = dataset_info.skeleton self.sigmas = dataset_info.sigmas self.dataset_name = dataset_info.dataset_name if coco_style: self.coco = COCO(ann_file) if 'categories' in self.coco.dataset: cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict( zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name( self.coco.imgs) self.pipeline = Compose(self.pipeline) @staticmethod def _get_mapping_id_name(imgs): """ Args: imgs (dict): dict of image info. Returns: tuple: Image name & id mapping dicts. - id2name (dict): Mapping image id to name. - name2id (dict): Mapping image name to id. """ id2name = {} name2id = {} for image_id, image in imgs.items(): file_name = image['file_name'] id2name[image_id] = file_name name2id[file_name] = image_id return id2name, name2id def _get_mask(self, anno, idx): """Get ignore masks to mask out losses.""" coco = self.coco img_info = coco.loadImgs(self.img_ids[idx])[0] m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32) for obj in anno: if 'segmentation' in obj: if obj['iscrowd']: rle = xtcocotools.mask.frPyObjects(obj['segmentation'], img_info['height'], img_info['width']) m += xtcocotools.mask.decode(rle) elif obj['num_keypoints'] == 0: rles = xtcocotools.mask.frPyObjects( obj['segmentation'], img_info['height'], img_info['width']) for rle in rles: m += xtcocotools.mask.decode(rle) return m < 0.5 @abstractmethod def _get_single(self, idx): """Get anno for a single image.""" raise NotImplementedError @abstractmethod def evaluate(self, cfg, outputs, res_folder, metric, *args, **kwargs): """Evaluate keypoint results.""" raise NotImplementedError def prepare_train_img(self, idx): """Prepare image for training given the index.""" results = copy.deepcopy(self._get_single(idx)) results['ann_info'] = self.ann_info return self.pipeline(results) def prepare_test_img(self, idx): """Prepare image for testing given the index.""" results = copy.deepcopy(self._get_single(idx)) results['ann_info'] = self.ann_info return self.pipeline(results) def __len__(self): """Get dataset length.""" return len(self.img_ids) def __getitem__(self, idx): """Get the sample for either training or testing given index.""" if self.test_mode: return self.prepare_test_img(idx) return self.prepare_train_img(idx)
class TopDownJhmdbDataset(TopDownCocoDataset): """JhmdbDataset dataset for top-down pose estimation. `Towards understanding action recognition <https://openaccess.thecvf.com/content_iccv_2013/papers/ Jhuang_Towards_Understanding_Action_2013_ICCV_paper.pdf>`__ The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. sub-JHMDB keypoint indexes:: 0: "neck", 1: "belly", 2: "head", 3: "right_shoulder", 4: "left_shoulder", 5: "right_hip", 6: "left_hip", 7: "right_elbow", 8: "left_elbow", 9: "right_knee", 10: "left_knee", 11: "right_wrist", 12: "left_wrist", 13: "right_ankle", 14: "left_ankle" Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) if 'image_thr' in data_cfg: warnings.warn( 'image_thr is deprecated, ' 'please use det_bbox_thr instead', DeprecationWarning) self.det_bbox_thr = data_cfg['image_thr'] self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.ann_info['flip_pairs'] = [[3, 4], [5, 6], [7, 8], [9, 10], [9, 10], [11, 12], [13, 14]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 7, 8, 11, 12) self.ann_info['lower_body_ids'] = (5, 6, 9, 10, 13, 14) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, 1.5, 1.5, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.sigmas = np.array([ .25, 1.07, .25, .79, .79, 1.07, 1.07, .72, .72, .87, .87, .62, .62, .89, .89 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'jhmdb' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_db(self): """Load dataset.""" assert self.use_gt_bbox gt_db = self._load_coco_keypoint_annotations() return gt_db def _load_coco_keypoint_annotation_kernel(self, img_id): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ img_ann = self.coco.loadImgs(img_id)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: if 'bbox' not in obj: continue x, y, w, h = obj['bbox'] # JHMDB uses matlab format, index is 1-based, # we should first convert to 0-based index x -= 1 y -= 1 x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] bbox_id = 0 for obj in objs: if 'keypoints' not in obj: continue if max(obj['keypoints']) == 0: continue if 'num_keypoints' in obj and obj['num_keypoints'] == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) # JHMDB uses matlab format, index is 1-based, # we should first convert to 0-based index joints_3d[:, :2] = keypoints[:, :2] - 1 joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'bbox': obj['clean_bbox'][:4], 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': f'{img_id}_{bbox_id:03}' }) bbox_id = bbox_id + 1 return rec def _write_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" with open(res_file, 'w') as f: json.dump(keypoints, f, sort_keys=True, indent=4) def _report_metric(self, res_file, metrics, pck_thr=0.2): """Keypoint evaluation. Args: res_file (str): Json file stored prediction results. metrics (str | list[str]): Metric to be performed. Options: 'PCK', 'PCKh', 'AUC', 'EPE'. pck_thr (float): PCK threshold, default as 0.2. pckh_thr (float): PCKh threshold, default as 0.7. auc_nor (float): AUC normalization factor, default as 30 pixel. Returns: dict: Evaluation results for evaluation metric. """ info_str = [] with open(res_file, 'r') as fin: preds = json.load(fin) assert len(preds) == len(self.db) outputs = [] gts = [] masks = [] threshold_bbox = [] threshold_torso = [] for pred, item in zip(preds, self.db): outputs.append(np.array(pred['keypoints'])[:, :-1]) gts.append(np.array(item['joints_3d'])[:, :-1]) masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0) if 'PCK' in metrics: bbox = np.array(item['bbox']) bbox_thr = np.max(bbox[2:]) threshold_bbox.append(np.array([bbox_thr, bbox_thr])) if 'tPCK' in metrics: torso_thr = np.linalg.norm(item['joints_3d'][4, :2] - item['joints_3d'][5, :2]) if torso_thr < 1: torso_thr = np.linalg.norm( np.array(pred['keypoints'])[4, :2] - np.array(pred['keypoints'])[5, :2]) warnings.warn('Torso Size < 1.') threshold_torso.append(np.array([torso_thr, torso_thr])) outputs = np.array(outputs) gts = np.array(gts) masks = np.array(masks) threshold_bbox = np.array(threshold_bbox) threshold_torso = np.array(threshold_torso) if 'PCK' in metrics: pck_p, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr, threshold_bbox) stats_names = [ 'Head PCK', 'Sho PCK', 'Elb PCK', 'Wri PCK', 'Hip PCK', 'Knee PCK', 'Ank PCK', 'Mean PCK' ] stats = [ pck_p[2], 0.5 * pck_p[3] + 0.5 * pck_p[4], 0.5 * pck_p[7] + 0.5 * pck_p[8], 0.5 * pck_p[11] + 0.5 * pck_p[12], 0.5 * pck_p[5] + 0.5 * pck_p[6], 0.5 * pck_p[9] + 0.5 * pck_p[10], 0.5 * pck_p[13] + 0.5 * pck_p[14], pck ] info_str.extend(list(zip(stats_names, stats))) if 'tPCK' in metrics: pck_p, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr, threshold_torso) stats_names = [ 'Head tPCK', 'Sho tPCK', 'Elb tPCK', 'Wri tPCK', 'Hip tPCK', 'Knee tPCK', 'Ank tPCK', 'Mean tPCK' ] stats = [ pck_p[2], 0.5 * pck_p[3] + 0.5 * pck_p[4], 0.5 * pck_p[7] + 0.5 * pck_p[8], 0.5 * pck_p[11] + 0.5 * pck_p[12], 0.5 * pck_p[5] + 0.5 * pck_p[6], 0.5 * pck_p[9] + 0.5 * pck_p[10], 0.5 * pck_p[13] + 0.5 * pck_p[14], pck ] info_str.extend(list(zip(stats_names, stats))) return info_str def evaluate(self, outputs, res_folder, metric='PCK', **kwargs): """Evaluate onehand10k keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: batch_size: N num_keypoints: K heatmap height: H heatmap width: W Args: outputs (list(preds, boxes, image_path, output_heatmap)) :preds (np.ndarray[1,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[1,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_path (list[str]) :output_heatmap (np.ndarray[N, K, H, W]): model outpus. res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Options: 'PCK', 'tPCK'. PCK means normalized by the bounding boxes, while tPCK means normalized by the torso size. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['PCK', 'tPCK'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = [] for preds, boxes, image_paths, _, bbox_ids in outputs: # convert 0-based index to 1-based index, # and get the first two dimensions. preds[..., :2] += 1.0 batch_size = len(image_paths) for i in range(batch_size): image_id = self.name2id[image_paths[i][len(self.img_prefix):]] kpts.append({ 'keypoints': preds[i], 'center': boxes[i][0:2], 'scale': boxes[i][2:4], 'area': boxes[i][4], 'score': boxes[i][5], 'image_id': image_id, 'bbox_id': bbox_ids[i] }) kpts = self._sort_and_unique_bboxes(kpts) self._write_keypoint_results(kpts, res_file) info_str = self._report_metric(res_file, metrics) name_value = OrderedDict(info_str) return name_value def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): """sort kpts and remove the repeated ones.""" kpts = sorted(kpts, key=lambda x: x[key]) num = len(kpts) for i in range(num - 1, 0, -1): if kpts[i][key] == kpts[i - 1][key]: del kpts[i] return kpts
class BottomUpCrowdPoseDataset(BottomUpCocoDataset): """CrowdPose dataset for bottom-up pose estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. CrowdPose keypoint indexes:: 0: 'left_shoulder', 1: 'right_shoulder', 2: 'left_elbow', 3: 'right_elbow', 4: 'left_wrist', 5: 'right_wrist', 6: 'left_hip', 7: 'right_hip', 8: 'left_knee', 9: 'right_knee', 10: 'left_ankle', 11: 'right_ankle', 12: 'top_head', 13: 'neck' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_index'] = [ 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 0.2, 0.2, 0.2, 1.3, 1.5, 0.2, 1.3, 1.5, 0.2, 0.2, 0.5, 0.2, 0.2, 0.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.sigmas = np.array([ .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79, .79 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'crowdpose' print(f'=> num_images: {self.num_images}') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" stats_names = [ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)', 'AP(H)' ] with open(res_file, 'r') as file: res_json = json.load(file) if not res_json: info_str = list(zip(stats_names, [ 0, ] * len(stats_names))) return info_str coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints_crowd', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() info_str = list(zip(stats_names, coco_eval.stats)) return info_str
def main(): """Visualize the demo images. Require the json_file containing boxes. """ parser = ArgumentParser() parser.add_argument('pose_config', help='Config file for detection') parser.add_argument('pose_checkpoint', help='Checkpoint file') parser.add_argument('--img-root', type=str, default='', help='Image root') parser.add_argument('--json-file', type=str, default='', help='Json file containing image info.') parser.add_argument('--show', action='store_true', default=False, help='whether to show img') parser.add_argument('--out-img-root', type=str, default='', help='Root of the output img file. ' 'Default not saving the visualization images.') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') args = parser.parse_args() assert args.show or (args.out_img_root != '') skeleton = [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 12], [7, 13], [6, 7], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7]] coco = COCO(args.json_file) # build the pose model from a config file and a checkpoint file pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, device=args.device) img_keys = list(coco.imgs.keys()) # process each image for i in range(len(img_keys)): # get bounding box annotations image_id = img_keys[i] image = coco.loadImgs(image_id)[0] image_name = os.path.join(args.img_root, image['file_name']) ann_ids = coco.getAnnIds(image_id) # make person bounding boxes person_bboxes = [] for ann_id in ann_ids: ann = coco.anns[ann_id] # bbox format is 'xywh' bbox = ann['bbox'] person_bboxes.append(bbox) # test a single image, with a list of bboxes. pose_results = inference_top_down_pose_model(pose_model, image_name, person_bboxes, format='xywh') if args.out_img_root == '': out_file = None else: os.makedirs(args.out_img_root, exist_ok=True) out_file = os.path.join(args.out_img_root, f'vis_{i}.jpg') # show the results vis_pose_result(pose_model, image_name, pose_results, skeleton=skeleton, kpt_score_thr=args.kpt_thr, show=args.show, out_file=out_file)
class BottomUpCowaCarDataset(BottomUpCocoDataset): """CowaCar dataset for bottom-up vehicle grounding point estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. CowaCar keypoint indexes:: 0: 'left_back', 1: 'right_back', 2: 'left_front', 3: 'right_front' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_index'] = [1, 0, 3, 2] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [1., 1., 1., 1.], dtype=np.float32).reshape( (self.ann_info['num_joints'], 1)) self.sigmas = np.array([1., 1., 1., 1.]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'cowacar' print(f'=> num_images: {self.num_images}') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints_crowd', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)', 'AP(H)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
def main(): """Visualize the demo images. Require the json_file containing boxes. """ parser = ArgumentParser() parser.add_argument('pose_config', help='Config file for detection') parser.add_argument('pose_checkpoint', help='Checkpoint file') parser.add_argument('--img-root', type=str, default='', help='Image root') parser.add_argument('--json-file', type=str, default='', help='Json file containing image info.') parser.add_argument('--show', action='store_true', default=False, help='whether to show img') parser.add_argument('--out-img-root', type=str, default='', help='Root of the output img file. ' 'Default not saving the visualization images.') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') parser.add_argument('--radius', type=int, default=4, help='Keypoint radius for visualization') parser.add_argument('--thickness', type=int, default=1, help='Link thickness for visualization') args = parser.parse_args() assert args.show or (args.out_img_root != '') coco = COCO(args.json_file) # build the pose model from a config file and a checkpoint file pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, device=args.device.lower()) dataset = pose_model.cfg.data['test']['type'] dataset_info = pose_model.cfg.data['test'].get('dataset_info', None) if dataset_info is None: warnings.warn( 'Please set `dataset_info` in the config.' 'Check https://github.com/open-mmlab/mmpose/pull/663 for details.', DeprecationWarning) else: dataset_info = DatasetInfo(dataset_info) img_keys = list(coco.imgs.keys()) # optional return_heatmap = False # e.g. use ('backbone', ) to return backbone feature output_layer_names = None # process each image for i in mmcv.track_iter_progress(range(len(img_keys))): # get bounding box annotations image_id = img_keys[i] image = coco.loadImgs(image_id)[0] image_name = os.path.join(args.img_root, image['file_name']) ann_ids = coco.getAnnIds(image_id) # make person bounding boxes person_results = [] for ann_id in ann_ids: person = {} ann = coco.anns[ann_id] # bbox format is 'xywh' person['bbox'] = ann['bbox'] person_results.append(person) # test a single image, with a list of bboxes pose_results, returned_outputs = inference_top_down_pose_model( pose_model, image_name, person_results, bbox_thr=None, format='xywh', dataset=dataset, dataset_info=dataset_info, return_heatmap=return_heatmap, outputs=output_layer_names) if args.out_img_root == '': out_file = None else: os.makedirs(args.out_img_root, exist_ok=True) out_file = os.path.join(args.out_img_root, f'vis_{i}.jpg') vis_pose_result(pose_model, image_name, pose_results, dataset=dataset, dataset_info=dataset_info, kpt_score_thr=args.kpt_thr, radius=args.radius, thickness=args.thickness, show=args.show, out_file=out_file)
def main(): """Visualize the demo images. Require the json_file containing boxes. """ parser = ArgumentParser() parser.add_argument('pose_config', help='Config file for detection') parser.add_argument('pose_checkpoint', help='Checkpoint file') parser.add_argument('--img-root', type=str, default='', help='Image root') parser.add_argument('--json-file', type=str, default='', help='Json file containing image info.') parser.add_argument('--show', action='store_true', default=False, help='whether to show img') parser.add_argument('--out-img-root', type=str, default='', help='Root of the output img file. ' 'Default not saving the visualization images.') parser.add_argument('--device', default='cuda:0', help='Device used for inference') args = parser.parse_args() assert args.show or (args.out_img_root != '') coco = COCO(args.json_file) # build the pose model from a config file and a checkpoint file pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, device=args.device.lower()) dataset = pose_model.cfg.data['test']['type'] img_keys = list(coco.imgs.keys()) # process each image for i in mmcv.track_iter_progress(range(len(img_keys))): # get bounding box annotations image_id = img_keys[i] image = coco.loadImgs(image_id)[0] image_name = os.path.join(args.img_root, image['file_name']) ann_ids = coco.getAnnIds(image_id) # make person bounding boxes person_results = [] for ann_id in ann_ids: person = {} ann = coco.anns[ann_id] # bbox format is 'xywh' person['bbox'] = ann['bbox'] person_results.append(person) # test a single image, with a list of bboxes pose_results = inference_mesh_model(pose_model, image_name, person_results, bbox_thr=None, format='xywh', dataset=dataset) if args.out_img_root == '': out_file = None else: os.makedirs(args.out_img_root, exist_ok=True) out_file = os.path.join(args.out_img_root, f'vis_{i}.jpg') vis_3d_mesh_result(pose_model, pose_results, image_name, show=args.show, out_file=out_file)
def main(): """Visualize the demo images. pose_keypoints require the json_file containing boxes. """ parser = ArgumentParser() parser.add_argument('pose_config', help='Config file for detection') parser.add_argument('pose_checkpoint', help='Checkpoint file') parser.add_argument('--img-root', type=str, default='', help='Image root') parser.add_argument( '--json-file', type=str, default='', help='Json file containing image person bboxes in COCO format.') parser.add_argument( '--out-json-file', type=str, default='', help='Output json contains pseudolabeled annotation') parser.add_argument( '--show', action='store_true', default=False, help='whether to show img') parser.add_argument( '--device', default='cuda:0', help='Device used for inference') parser.add_argument( '--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') args = parser.parse_args() coco = COCO(args.json_file) # build the pose model from a config file and a checkpoint file pose_model = init_pose_model( args.pose_config, args.pose_checkpoint, device=args.device.lower()) dataset = pose_model.cfg.data['test']['type'] img_keys = list(coco.imgs.keys()) # optional return_heatmap = False # e.g. use ('backbone', ) to return backbone feature output_layer_names = None categories = [{'id': 1, 'name': 'person'}] img_anno_dict = {'images': [], 'annotations': [], 'categories': categories} # process each image ann_uniq_id = int(0) for i in track_iter_progress(range(len(img_keys))): # get bounding box annotations image_id = img_keys[i] image = coco.loadImgs(image_id)[0] image_name = os.path.join(args.img_root, image['file_name']) width, height = Image.open(image_name).size ann_ids = coco.getAnnIds(image_id) # make person bounding boxes person_results = [] for ann_id in ann_ids: person = {} ann = coco.anns[ann_id] # bbox format is 'xywh' person['bbox'] = ann['bbox'] person_results.append(person) pose_results, returned_outputs = inference_top_down_pose_model( pose_model, image_name, person_results, bbox_thr=None, format='xywh', dataset=dataset, return_heatmap=return_heatmap, outputs=output_layer_names) # add output of model and bboxes to dict for indx, i in enumerate(pose_results): pose_results[indx]['keypoints'][ pose_results[indx]['keypoints'][:, 2] < args.kpt_thr, :3] = 0 pose_results[indx]['keypoints'][ pose_results[indx]['keypoints'][:, 2] >= args.kpt_thr, 2] = 2 x = int(pose_results[indx]['bbox'][0]) y = int(pose_results[indx]['bbox'][1]) w = int(pose_results[indx]['bbox'][2] - pose_results[indx]['bbox'][0]) h = int(pose_results[indx]['bbox'][3] - pose_results[indx]['bbox'][1]) bbox = [x, y, w, h] area = round((w * h), 0) images = { 'file_name': image_name.split('/')[-1], 'height': height, 'width': width, 'id': int(image_id) } annotations = { 'keypoints': [ int(i) for i in pose_results[indx]['keypoints'].reshape( -1).tolist() ], 'num_keypoints': len(pose_results[indx]['keypoints']), 'area': area, 'iscrowd': 0, 'image_id': int(image_id), 'bbox': bbox, 'category_id': 1, 'id': ann_uniq_id, } img_anno_dict['annotations'].append(annotations) ann_uniq_id += 1 img_anno_dict['images'].append(images) # create json with open(args.out_json_file, 'w') as outfile: json.dump(img_anno_dict, outfile, indent=2)
def main(): parser = ArgumentParser() parser.add_argument('pose_lifter_config', help='Config file for the 2nd stage pose lifter model') parser.add_argument( 'pose_lifter_checkpoint', help='Checkpoint file for the 2nd stage pose lifter model') parser.add_argument('--pose-detector-conifig', type=str, default=None, help='Config file for the 1st stage 2D pose detector') parser.add_argument( '--pose-detector-checkpoint', type=str, default=None, help='Checkpoint file for the 1st stage 2D pose detector') parser.add_argument('--img-root', type=str, default='', help='Image root') parser.add_argument( '--json-file', type=str, default=None, help='Json file containing image and bbox inforamtion. Optionally,' 'The Jons file can also contain 2D pose information. See' '"only-second-stage"') parser.add_argument( '--camera-param-file', type=str, default=None, help='Camera parameter file for converting 3D pose predictions from ' ' the camera space to to world space. If None, no conversion will be ' 'applied.') parser.add_argument( '--only-second-stage', action='store_true', help='If true, load 2D pose detection result from the Json file and ' 'skip the 1st stage. The pose detection model will be ignored.') parser.add_argument( '--rebase-keypoint-height', action='store_true', help='Rebase the predicted 3D pose so its lowest keypoint has a ' 'height of 0 (landing on the ground). This is useful for ' 'visualization when the model do not predict the global position ' 'of the 3D pose.') parser.add_argument( '--show-ground-truth', action='store_true', help='If True, show ground truth if it is available. The ground truth ' 'should be contained in the annotations in the Json file with the key ' '"keypoints_3d" for each instance.') parser.add_argument('--show', action='store_true', default=False, help='whether to show img') parser.add_argument('--out-img-root', type=str, default=None, help='Root of the output visualization images. ' 'Default not saving the visualization images.') parser.add_argument('--device', default='cuda:0', help='Device for inference') parser.add_argument('--kpt-thr', type=float, default=0.3) parser.add_argument('--radius', type=int, default=4, help='Keypoint radius for visualization') parser.add_argument('--thickness', type=int, default=1, help='Link thickness for visualization') args = parser.parse_args() assert args.show or (args.out_img_root != '') coco = COCO(args.json_file) # First stage: 2D pose detection pose_det_results_list = [] if args.only_second_stage: from mmpose.apis.inference import _xywh2xyxy print('Stage 1: load 2D pose results from Json file.') for image_id, image in coco.imgs.items(): image_name = osp.join(args.img_root, image['file_name']) ann_ids = coco.getAnnIds(image_id) pose_det_results = [] for ann_id in ann_ids: ann = coco.anns[ann_id] keypoints = np.array(ann['keypoints']).reshape(-1, 3) keypoints[..., 2] = keypoints[..., 2] >= 1 keypoints_3d = np.array(ann['keypoints_3d']).reshape(-1, 4) keypoints_3d[..., 3] = keypoints_3d[..., 3] >= 1 bbox = np.array(ann['bbox']).reshape(1, -1) pose_det_result = { 'image_name': image_name, 'bbox': _xywh2xyxy(bbox), 'keypoints': keypoints, 'keypoints_3d': keypoints_3d } pose_det_results.append(pose_det_result) pose_det_results_list.append(pose_det_results) else: print('Stage 1: 2D pose detection.') pose_det_model = init_pose_model(args.pose_detector_config, args.pose_detector_checkpoint, device=args.device.lower()) assert pose_det_model.cfg.model.type == 'TopDown', 'Only "TopDown"' \ 'model is supported for the 1st stage (2D pose detection)' dataset = pose_det_model.cfg.data['test']['type'] img_keys = list(coco.imgs.keys()) for i in mmcv.track_iter_progress(range(len(img_keys))): # get bounding box annotations image_id = img_keys[i] image = coco.loadImgs(image_id)[0] image_name = osp.join(args.img_root, image['file_name']) ann_ids = coco.getAnnIds(image_id) # make person results for single image person_results = [] for ann_id in ann_ids: person = {} ann = coco.anns[ann_id] person['bbox'] = ann['bbox'] person_results.append(person) pose_det_results, _ = inference_top_down_pose_model( pose_det_model, image_name, person_results, bbox_thr=None, format='xywh', dataset=dataset, return_heatmap=False, outputs=None) for res in pose_det_results: res['image_name'] = image_name pose_det_results_list.append(pose_det_results) # Second stage: Pose lifting print('Stage 2: 2D-to-3D pose lifting.') pose_lift_model = init_pose_model(args.pose_lifter_config, args.pose_lifter_checkpoint, device=args.device.lower()) assert pose_lift_model.cfg.model.type == 'PoseLifter', 'Only' \ '"PoseLifter" model is supported for the 2nd stage ' \ '(2D-to-3D lifting)' dataset = pose_lift_model.cfg.data['test']['type'] camera_params = None if args.camera_param_file is not None: camera_params = mmcv.load(args.camera_param_file) for i, pose_det_results in enumerate( mmcv.track_iter_progress(pose_det_results_list)): # 2D-to-3D pose lifting # Note that the pose_det_results are regarded as a single-frame pose # sequence pose_lift_results = inference_pose_lifter_model( pose_lift_model, pose_results_2d=[pose_det_results], dataset=dataset, with_track_id=False) image_name = pose_det_results[0]['image_name'] # Pose processing pose_lift_results_vis = [] for idx, res in enumerate(pose_lift_results): keypoints_3d = res['keypoints_3d'] # project to world space if camera_params is not None: keypoints_3d = _keypoint_camera_to_world( keypoints_3d, camera_params=camera_params, image_name=image_name, dataset=dataset) # rebase height (z-axis) if args.rebase_keypoint_height: keypoints_3d[..., 2] -= np.min(keypoints_3d[..., 2], axis=-1, keepdims=True) res['keypoints_3d'] = keypoints_3d # Add title det_res = pose_det_results[idx] instance_id = det_res.get('track_id', idx) res['title'] = f'Prediction ({instance_id})' pose_lift_results_vis.append(res) # Add ground truth if args.show_ground_truth: if 'keypoints_3d' not in det_res: print('Fail to show ground truth. Please make sure that' ' the instance annotations from the Json file' ' contain "keypoints_3d".') else: gt = res.copy() gt['keypoints_3d'] = det_res['keypoints_3d'] gt['title'] = f'Ground truth ({instance_id})' pose_lift_results_vis.append(gt) # Visualization if args.out_img_root is None: out_file = None else: os.makedirs(args.out_img_root, exist_ok=True) out_file = osp.join(args.out_img_root, f'vis_{i}.jpg') vis_3d_pose_result(pose_lift_model, result=pose_lift_results_vis, img=pose_lift_results[0]['image_name'], out_file=out_file)
class BottomUpCocoWholeBodyDataset(BottomUpCocoDataset): """CocoWholeBodyDataset dataset for bottom-up pose estimation. `Whole-Body Human Pose Estimation in the Wild' ECCV'2020 More details can be found in the `paper <https://arxiv.org/abs/2007.11858>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. In total, we have 133 keypoints for wholebody pose estimation. COCO-WholeBody keypoint indexes:: 0-16: 17 body keypoints 17-22: 6 foot keypoints 23-90: 68 face keypoints 91-132: 42 hand keypoints Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode) self.ann_info['flip_pairs'] = self._make_flip_pairs() self.ann_info['flip_index'] = self.get_flip_index_from_flip_pairs( self.ann_info['flip_pairs']) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = \ np.ones((self.ann_info['num_joints'], 1), dtype=np.float32) self.body_num = 17 self.foot_num = 6 self.face_num = 68 self.left_hand_num = 21 self.right_hand_num = 21 # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/' # 'evaluation/myeval_wholebody.py#L170' self.sigmas_body = [ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 ] self.sigmas_foot = [0.068, 0.066, 0.066, 0.092, 0.094, 0.094] self.sigmas_face = [ 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008 ] self.sigmas_lefthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_righthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_wholebody = (self.sigmas_body + self.sigmas_foot + self.sigmas_face + self.sigmas_lefthand + self.sigmas_righthand) self.sigmas = np.array(self.sigmas_wholebody) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco_wholebody' print(f'=> num_images: {self.num_images}') @staticmethod def _make_flip_pairs(): body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] foot = [[17, 20], [18, 21], [19, 22]] face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46], [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75], [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]] hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] return body + foot + face + hand def _get_joints(self, anno): """Get joints for all people in an image.""" num_people = len(anno) if self.ann_info['scale_aware_sigma']: joints = np.zeros((num_people, self.ann_info['num_joints'], 4), dtype=np.float32) else: joints = np.zeros((num_people, self.ann_info['num_joints'], 3), dtype=np.float32) for i, obj in enumerate(anno): keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] + obj['face_kpts'] + obj['lefthand_kpts'] + obj['righthand_kpts']).reshape(-1, 3) joints[i, :self.ann_info['num_joints'], :3] = keypoints if self.ann_info['scale_aware_sigma']: # get person box box = obj['bbox'] size = max(box[2], box[3]) sigma = size / self.base_size * self.base_sigma if self.int_sigma: sigma = int(np.ceil(sigma)) assert sigma > 0, sigma joints[i, :, 3] = sigma return joints def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) cuts = np.cumsum([ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num, self.right_hand_num ]) * 3 for img_kpt, key_point in zip(img_kpts, key_points): kpt = key_point.reshape((self.ann_info['num_joints'], 3)) left_top = np.amin(kpt, axis=0) right_bottom = np.amax(kpt, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] cat_results.append({ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point[cuts[0]:cuts[1]].tolist(), 'foot_kpts': key_point[cuts[1]:cuts[2]].tolist(), 'face_kpts': key_point[cuts[2]:cuts[3]].tolist(), 'lefthand_kpts': key_point[cuts[3]:cuts[4]].tolist(), 'righthand_kpts': key_point[cuts[4]:cuts[5]].tolist(), 'score': img_kpt['score'], 'bbox': [left_top[0], left_top[1], w, h] }) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints_body', np.array(self.sigmas_body), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_foot', np.array(self.sigmas_foot), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_face', np.array(self.sigmas_face), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_lefthand', np.array(self.sigmas_lefthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_righthand', np.array(self.sigmas_righthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_wholebody', np.array(self.sigmas_wholebody), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class BottomUpMhpDataset(BottomUpCocoDataset): """MHPv2.0 dataset for top-down pose estimation. `The Multi-Human Parsing project of Learning and Vision (LV) Group, National University of Singapore (NUS) is proposed to push the frontiers of fine-grained visual understanding of humans in crowd scene. <https://lv-mhp.github.io/>` The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. MHP keypoint indexes:: 0: "right ankle", 1: "right knee", 2: "right hip", 3: "left hip", 4: "left knee", 5: "left ankle", 6: "pelvis", 7: "thorax", 8: "upper neck", 9: "head top", 10: "right wrist", 11: "right elbow", 12: "right shoulder", 13: "left shoulder", 14: "left elbow", 15: "left wrist", Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_index'] = [ 5, 4, 3, 2, 1, 0, 6, 7, 8, 9, 15, 14, 13, 12, 11, 10 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # Adapted from COCO dataset self.sigmas = np.array([ .89, .83, 1.07, 1.07, .83, .89, .26, .26, .26, .26, .62, .72, 1.79, 1.79, .72, .62 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'mhp' print(f'=> num_images: {self.num_images}') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] with open(res_file, 'r') as file: res_json = json.load(file) if not res_json: info_str = list(zip(stats_names, [ 0, ] * len(stats_names))) return info_str coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() info_str = list(zip(stats_names, coco_eval.stats)) return info_str
def test_top_down_pipeline(): # test loading data_prefix = 'tests/data/coco/' ann_file = osp.join(data_prefix, 'test_coco.json') coco = COCO(ann_file) results = dict(image_file=osp.join(data_prefix, '000000000785.jpg')) transform = LoadImageFromFile() results = transform(copy.deepcopy(results)) assert results['image_file'] == osp.join(data_prefix, '000000000785.jpg') assert results['img'].shape == (425, 640, 3) image_size = (425, 640) ann_ids = coco.getAnnIds(785) ann = coco.anns[ann_ids[0]] num_joints = 17 joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) for ipt in range(num_joints): joints_3d[ipt, 0] = ann['keypoints'][ipt * 3 + 0] joints_3d[ipt, 1] = ann['keypoints'][ipt * 3 + 1] joints_3d[ipt, 2] = 0 t_vis = ann['keypoints'][ipt * 3 + 2] if t_vis > 1: t_vis = 1 joints_3d_visible[ipt, 0] = t_vis joints_3d_visible[ipt, 1] = t_vis joints_3d_visible[ipt, 2] = 0 center, scale = _box2cs(ann['bbox'][:4], image_size) results['joints_3d'] = joints_3d results['joints_3d_visible'] = joints_3d_visible results['center'] = center results['scale'] = scale results['bbox_score'] = 1 results['ann_info'] = {} results['ann_info']['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] results['ann_info']['num_joints'] = num_joints results['ann_info']['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) results['ann_info']['lower_body_ids'] = (11, 12, 13, 14, 15, 16) results['ann_info']['use_different_joint_weights'] = False results['ann_info']['joint_weights'] = np.array([ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape( (num_joints, 1)) results['ann_info']['image_size'] = np.array([192, 256]) results['ann_info']['heatmap_size'] = np.array([48, 64]) # test filp random_flip = TopDownRandomFlip(flip_prob=1.) results_flip = random_flip(copy.deepcopy(results)) assert _check_flip(results['img'], results_flip['img']) # test halfbody transform halfbody_transform = TopDownHalfBodyTransform(num_joints_half_body=8, prob_half_body=1.) results_halfbody = halfbody_transform(copy.deepcopy(results)) assert (results_halfbody['scale'] <= results['scale']).all() affine_transform = TopDownAffine() results['rotation'] = 90 results_affine = affine_transform(copy.deepcopy(results)) assert results_affine['img'].shape == (256, 192, 3) results = results_affine to_tensor = ToTensor() results_tensor = to_tensor(copy.deepcopy(results)) assert isinstance(results_tensor['img'], torch.Tensor) assert results_tensor['img'].shape == torch.Size([3, 256, 192]) norm_cfg = {} norm_cfg['mean'] = [0.485, 0.456, 0.406] norm_cfg['std'] = [0.229, 0.224, 0.225] normalize = NormalizeTensor(mean=norm_cfg['mean'], std=norm_cfg['std']) results_normalize = normalize(copy.deepcopy(results_tensor)) _check_normalize(results_tensor['img'].data.numpy(), results_normalize['img'].data.numpy(), norm_cfg) generate_target = TopDownGenerateTarget(sigma=2, unbiased_encoding=False) results_target = generate_target(copy.deepcopy(results_tensor)) assert 'target' in results_target assert results_target['target'].shape == ( num_joints, results['ann_info']['heatmap_size'][1], results['ann_info']['heatmap_size'][0]) assert 'target_weight' in results_target assert results_target['target_weight'].shape == (num_joints, 1) collect = Collect(keys=['img', 'target', 'target_weight'], meta_keys=[ 'image_file', 'center', 'scale', 'rotation', 'bbox_score', 'flip_pairs' ]) results_final = collect(results_target) assert 'img_size' not in results_final['img_metas'].data assert 'image_file' in results_final['img_metas'].data
def main(): parser = ArgumentParser() parser.add_argument('pose_config', help='Config file for pose network') parser.add_argument('pose_checkpoint', help='Checkpoint file') parser.add_argument('--img-root', type=str, default='', help='Image root') parser.add_argument('--json-file', type=str, default='', help='Json file containing image info.') parser.add_argument( '--camera-param-file', type=str, default=None, help='Camera parameter file for converting 3D pose predictions from ' ' the pixel space to camera space. If None, keypoints in pixel space' 'will be visualized') parser.add_argument( '--gt-joints-file', type=str, default=None, help='Optional arguement. Ground truth 3D keypoint parameter file. ' 'If None, gt keypoints will not be shown and keypoints in pixel ' 'space will be visualized.') parser.add_argument( '--rebase-keypoint-height', action='store_true', help='Rebase the predicted 3D pose so its lowest keypoint has a ' 'height of 0 (landing on the ground). This is useful for ' 'visualization when the model do not predict the global position ' 'of the 3D pose.') parser.add_argument( '--show-ground-truth', action='store_true', help='If True, show ground truth keypoint if it is available.') parser.add_argument('--show', action='store_true', default=False, help='whether to show img') parser.add_argument('--out-img-root', type=str, default=None, help='Root of the output visualization images. ' 'Default not saving the visualization images.') parser.add_argument('--device', default='cuda:0', help='Device for inference') parser.add_argument('--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') parser.add_argument('--radius', type=int, default=4, help='Keypoint radius for visualization') parser.add_argument('--thickness', type=int, default=1, help='Link thickness for visualization') args = parser.parse_args() assert args.show or (args.out_img_root != '') coco = COCO(args.json_file) # build the pose model from a config file and a checkpoint file pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, device=args.device.lower()) dataset = pose_model.cfg.data['test']['type'] # load camera parameters camera_params = None if args.camera_param_file is not None: camera_params = mmcv.load(args.camera_param_file) # load ground truth joints parameters gt_joint_params = None if args.gt_joints_file is not None: gt_joint_params = mmcv.load(args.gt_joints_file) # load hand bounding boxes det_results_list = [] for image_id, image in coco.imgs.items(): image_name = osp.join(args.img_root, image['file_name']) ann_ids = coco.getAnnIds(image_id) det_results = [] capture_key = str(image['capture']) camera_key = image['camera'] frame_idx = image['frame_idx'] for ann_id in ann_ids: ann = coco.anns[ann_id] if camera_params is not None: camera_param = { key: camera_params[capture_key][key][camera_key] for key in camera_params[capture_key].keys() } camera_param = _transform_interhand_camera_param(camera_param) else: camera_param = None if gt_joint_params is not None: joint_param = gt_joint_params[capture_key][str(frame_idx)] gt_joint = np.concatenate([ np.array(joint_param['world_coord']), np.array(joint_param['joint_valid']) ], axis=-1) else: gt_joint = None det_result = { 'image_name': image_name, 'bbox': ann['bbox'], # bbox format is 'xywh' 'camera_param': camera_param, 'keypoints_3d_gt': gt_joint } det_results.append(det_result) det_results_list.append(det_results) for i, det_results in enumerate( mmcv.track_iter_progress(det_results_list)): image_name = det_results[0]['image_name'] pose_results = inference_interhand_3d_model(pose_model, image_name, det_results, dataset=dataset) # Post processing pose_results_vis = [] for idx, res in enumerate(pose_results): keypoints_3d = res['keypoints_3d'] # normalize kpt score if keypoints_3d[:, 3].max() > 1: keypoints_3d[:, 3] /= 255 # get 2D keypoints in pixel space res['keypoints'] = keypoints_3d[:, [0, 1, 3]] # For model-predicted keypoints, channel 0 and 1 are coordinates # in pixel space, and channel 2 is the depth (in mm) relative # to root joints. # If both camera parameter and absolute depth of root joints are # provided, we can transform keypoint to camera space for better # visualization. camera_param = res['camera_param'] keypoints_3d_gt = res['keypoints_3d_gt'] if camera_param is not None and keypoints_3d_gt is not None: # build camera model camera = SimpleCamera(camera_param) # transform gt joints from world space to camera space keypoints_3d_gt[:, :3] = camera.world_to_camera( keypoints_3d_gt[:, :3]) # transform relative depth to absolute depth keypoints_3d[:21, 2] += keypoints_3d_gt[20, 2] keypoints_3d[21:, 2] += keypoints_3d_gt[41, 2] # transform keypoints from pixel space to camera space keypoints_3d[:, :3] = camera.pixel_to_camera( keypoints_3d[:, :3]) # rotate the keypoint to make z-axis correspondent to height # for better visualization vis_R = np.array([[1, 0, 0], [0, 0, -1], [0, 1, 0]]) keypoints_3d[:, :3] = keypoints_3d[:, :3] @ vis_R if keypoints_3d_gt is not None: keypoints_3d_gt[:, :3] = keypoints_3d_gt[:, :3] @ vis_R # rebase height (z-axis) if args.rebase_keypoint_height: valid = keypoints_3d[..., 3] > 0 keypoints_3d[..., 2] -= np.min(keypoints_3d[valid, 2], axis=-1, keepdims=True) res['keypoints_3d'] = keypoints_3d res['keypoints_3d_gt'] = keypoints_3d_gt # Add title instance_id = res.get('track_id', idx) res['title'] = f'Prediction ({instance_id})' pose_results_vis.append(res) # Add ground truth if args.show_ground_truth: if keypoints_3d_gt is None: print('Fail to show ground truth. Please make sure that' ' gt-joints-file is provided.') else: gt = res.copy() if args.rebase_keypoint_height: valid = keypoints_3d_gt[..., 3] > 0 keypoints_3d_gt[..., 2] -= np.min(keypoints_3d_gt[valid, 2], axis=-1, keepdims=True) gt['keypoints_3d'] = keypoints_3d_gt gt['title'] = f'Ground truth ({instance_id})' pose_results_vis.append(gt) # Visualization if args.out_img_root is None: out_file = None else: os.makedirs(args.out_img_root, exist_ok=True) out_file = osp.join(args.out_img_root, f'vis_{i}.jpg') vis_3d_pose_result( pose_model, result=pose_results_vis, img=det_results[0]['image_name'], out_file=out_file, dataset=dataset, show=args.show, kpt_score_thr=args.kpt_thr, radius=args.radius, thickness=args.thickness, )
def split_train_val(work_dir, trainval_file, train_file, val_file, val_ann_num): """Split train-val json file into training and validation files. :param work_dir: path to load train-val json file, and save split files. :param trainval_file: The input json file combining both train and val. :param trainval_file: The output json file for training. :param trainval_file: The output json file for validation. :param val_ann_num: the number of validation annotations. """ coco = COCO(os.path.join(work_dir, trainval_file)) img_list = list(coco.imgs.keys()) np.random.shuffle(img_list) count = 0 images_train = [] images_val = [] annotations_train = [] annotations_val = [] for img_id in img_list: ann_ids = coco.getAnnIds(img_id) if count + len(ann_ids) <= val_ann_num: # for validation count += len(ann_ids) images_val.append(coco.imgs[img_id]) for ann_id in ann_ids: annotations_val.append(coco.anns[ann_id]) else: images_train.append(coco.imgs[img_id]) for ann_id in ann_ids: annotations_train.append(coco.anns[ann_id]) if count == val_ann_num: print(f'We have found {count} annotations for validation.') else: warnings.warn( f'We only found {count} annotations, instead of {val_ann_num}.') cocotype_train = {} cocotype_val = {} keypoints_info, skeleton_info, category_info = get_anno_info() cocotype_train['info'] = {} cocotype_train['info'][ 'description'] = 'AnimalPose dataset Generated by MMPose Team' cocotype_train['info']['version'] = '1.0' cocotype_train['info']['year'] = time.strftime('%Y', time.localtime()) cocotype_train['info']['date_created'] = time.strftime( '%Y/%m/%d', time.localtime()) cocotype_train['images'] = images_train cocotype_train['annotations'] = annotations_train cocotype_train['categories'] = category_info json.dump(cocotype_train, open(os.path.join(work_dir, train_file), 'w'), indent=4) print('=========================================================') print('number of images:', len(images_train)) print('number of annotations:', len(annotations_train)) print(f'done {train_file}') cocotype_val['info'] = {} cocotype_val['info'][ 'description'] = 'AnimalPose dataset Generated by MMPose Team' cocotype_val['info']['version'] = '1.0' cocotype_val['info']['year'] = time.strftime('%Y', time.localtime()) cocotype_val['info']['date_created'] = time.strftime( '%Y/%m/%d', time.localtime()) cocotype_val['images'] = images_val cocotype_val['annotations'] = annotations_val cocotype_val['categories'] = category_info json.dump(cocotype_val, open(os.path.join(work_dir, val_file), 'w'), indent=4) print('=========================================================') print('number of images:', len(images_val)) print('number of annotations:', len(annotations_val)) print(f'done {val_file}')
def test_bottomup_pipeline(): data_prefix = 'tests/data/coco/' ann_file = osp.join(data_prefix, 'test_coco.json') coco = COCO(ann_file) ann_info = {} ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] ann_info['flip_index'] = [ 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 ] ann_info['use_different_joint_weights'] = False ann_info['joint_weights'] = np.array([ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape((17, 1)) ann_info['image_size'] = np.array(512) ann_info['heatmap_size'] = np.array([128, 256]) ann_info['num_joints'] = 17 ann_info['num_scales'] = 2 ann_info['scale_aware_sigma'] = False ann_ids = coco.getAnnIds(785) anno = coco.loadAnns(ann_ids) mask = _get_mask(coco, anno, 785) anno = [ obj for obj in anno if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 ] joints = _get_joints(anno, ann_info, False) mask_list = [mask.copy() for _ in range(ann_info['num_scales'])] joints_list = [joints.copy() for _ in range(ann_info['num_scales'])] results = {} results['dataset'] = 'coco' results['image_file'] = osp.join(data_prefix, '000000000785.jpg') results['mask'] = mask_list results['joints'] = joints_list results['ann_info'] = ann_info transform = LoadImageFromFile() results = transform(copy.deepcopy(results)) assert results['img'].shape == (425, 640, 3) # test HorizontalFlip random_horizontal_flip = BottomUpRandomFlip(flip_prob=1.) results_horizontal_flip = random_horizontal_flip(copy.deepcopy(results)) assert _check_flip(results['img'], results_horizontal_flip['img']) random_horizontal_flip = BottomUpRandomFlip(flip_prob=0.) results_horizontal_flip = random_horizontal_flip(copy.deepcopy(results)) assert (results['img'] == results_horizontal_flip['img']).all() results_copy = copy.deepcopy(results) results_copy['mask'] = mask_list[0] with pytest.raises(AssertionError): results_horizontal_flip = random_horizontal_flip( copy.deepcopy(results_copy)) results_copy = copy.deepcopy(results) results_copy['joints'] = joints_list[0] with pytest.raises(AssertionError): results_horizontal_flip = random_horizontal_flip( copy.deepcopy(results_copy)) results_copy = copy.deepcopy(results) results_copy['joints'] = joints_list[:1] with pytest.raises(AssertionError): results_horizontal_flip = random_horizontal_flip( copy.deepcopy(results_copy)) results_copy = copy.deepcopy(results) results_copy['mask'] = mask_list[:1] with pytest.raises(AssertionError): results_horizontal_flip = random_horizontal_flip( copy.deepcopy(results_copy)) # test TopDownAffine random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'short', 0) results_affine_transform = random_affine_transform(copy.deepcopy(results)) assert results_affine_transform['img'].shape == (512, 512, 3) random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'short', 40) results_affine_transform = random_affine_transform(copy.deepcopy(results)) assert results_affine_transform['img'].shape == (512, 512, 3) results_copy = copy.deepcopy(results) results_copy['ann_info']['scale_aware_sigma'] = True joints = _get_joints(anno, results_copy['ann_info'], False) results_copy['joints'] = \ [joints.copy() for _ in range(results_copy['ann_info']['num_scales'])] results_affine_transform = random_affine_transform(results_copy) assert results_affine_transform['img'].shape == (512, 512, 3) results_copy = copy.deepcopy(results) results_copy['mask'] = mask_list[0] with pytest.raises(AssertionError): results_horizontal_flip = random_affine_transform( copy.deepcopy(results_copy)) results_copy = copy.deepcopy(results) results_copy['joints'] = joints_list[0] with pytest.raises(AssertionError): results_horizontal_flip = random_affine_transform( copy.deepcopy(results_copy)) results_copy = copy.deepcopy(results) results_copy['joints'] = joints_list[:1] with pytest.raises(AssertionError): results_horizontal_flip = random_affine_transform( copy.deepcopy(results_copy)) results_copy = copy.deepcopy(results) results_copy['mask'] = mask_list[:1] with pytest.raises(AssertionError): results_horizontal_flip = random_affine_transform( copy.deepcopy(results_copy)) random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'long', 40) results_affine_transform = random_affine_transform(copy.deepcopy(results)) assert results_affine_transform['img'].shape == (512, 512, 3) with pytest.raises(ValueError): random_affine_transform = BottomUpRandomAffine(30, [0.75, 1.5], 'short-long', 40) results_affine_transform = random_affine_transform( copy.deepcopy(results)) # test BottomUpGenerateTarget generate_multi_target = BottomUpGenerateTarget(2, 30) results_generate_multi_target = generate_multi_target( copy.deepcopy(results)) assert 'targets' in results_generate_multi_target assert len(results_generate_multi_target['targets'] ) == results['ann_info']['num_scales'] # test BottomUpGetImgSize when W > H get_multi_scale_size = BottomUpGetImgSize([1]) results_get_multi_scale_size = get_multi_scale_size(copy.deepcopy(results)) assert 'test_scale_factor' in results_get_multi_scale_size['ann_info'] assert 'base_size' in results_get_multi_scale_size['ann_info'] assert 'center' in results_get_multi_scale_size['ann_info'] assert 'scale' in results_get_multi_scale_size['ann_info'] assert results_get_multi_scale_size['ann_info']['base_size'][1] == 512 # test BottomUpResizeAlign transforms = [ dict(type='ToTensor'), dict( type='NormalizeTensor', mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ] resize_align_multi_scale = BottomUpResizeAlign(transforms=transforms) results_copy = copy.deepcopy(results_get_multi_scale_size) results_resize_align_multi_scale = resize_align_multi_scale(results_copy) assert 'aug_data' in results_resize_align_multi_scale['ann_info'] # test BottomUpGetImgSize when W < H results_copy = copy.deepcopy(results) results_copy['img'] = np.random.rand(640, 425, 3) results_get_multi_scale_size = get_multi_scale_size(results_copy) assert results_get_multi_scale_size['ann_info']['base_size'][0] == 512
class TopDownCocoDataset(TopDownBaseDataset): """CocoDataset dataset for top-down pose estimation. `Microsoft COCO: Common Objects in Context' ECCV'2014 More details can be found in the `paper <https://arxiv.org/abs/1405.0312>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. COCO keypoint indexes:: 0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear', 5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow', 9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip', 13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) if 'image_thr' in data_cfg: warnings.warn( 'image_thr is deprecated, ' 'please use det_bbox_thr instead', DeprecationWarning) self.det_bbox_thr = data_cfg['image_thr'] self.use_nms = data_cfg.get('use_nms', True) self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16], [18, 19], [22, 23]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 17, 18, 19, 20, 21) self.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16, 23, 24) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1.6, 1.6, 1., 1., 1.6, 1.6 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' # 'pycocotools/cocoeval.py#L523' self.sigmas = np.array([ .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .28, .61, .61, .30, .32, .90, .90 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') @staticmethod def _get_mapping_id_name(imgs): """ Args: imgs (dict): dict of image info. Returns: tuple: Image name & id mapping dicts. - id2name (dict): Mapping image id to name. - name2id (dict): Mapping image name to id. """ id2name = {} name2id = {} for image_id, image in imgs.items(): file_name = image['file_name'] id2name[image_id] = file_name name2id[file_name] = image_id return id2name, name2id def _get_db(self): """Load dataset.""" if (not self.test_mode) or self.use_gt_bbox: # use ground truth bbox gt_db = self._load_coco_keypoint_annotations() else: # use bbox from detection gt_db = self._load_coco_person_detection_results() return gt_db def _load_coco_keypoint_annotations(self): """Ground truth bbox and keypoints.""" gt_db = [] for img_id in self.img_ids: gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id)) return gt_db def _load_coco_keypoint_annotation_kernel(self, img_id): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ img_ann = self.coco.loadImgs(img_id)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: if 'bbox' not in obj: continue x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs bbox_id = 0 rec = [] for obj in objs: if 'keypoints' not in obj: continue if max(obj['keypoints']) == 0: continue if 'num_keypoints' in obj and obj['num_keypoints'] == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'bbox': obj['clean_bbox'][:4], 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 return rec def _xywh2cs(self, x, y, w, h): """This encodes bbox(x,y,w,w) into (center, scale) Args: x, y, w, h Returns: tuple: A tuple containing center and scale. - center (np.ndarray[float32](2,)): center of the bbox (x, y). - scale (np.ndarray[float32](2,)): scale of the bbox w & h. """ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ 'image_size'][1] center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) if (not self.test_mode) and np.random.rand() < 0.3: center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio # pixel std is 200.0 scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) # padding to include proper amount of context scale = scale * 1.25 return center, scale def _load_coco_person_detection_results(self): """Load coco person detection results.""" num_joints = self.ann_info['num_joints'] all_boxes = None with open(self.bbox_file, 'r') as f: all_boxes = json.load(f) if not all_boxes: raise ValueError('=> Load %s fail!' % self.bbox_file) print(f'=> Total boxes: {len(all_boxes)}') kpt_db = [] bbox_id = 0 for det_res in all_boxes: if det_res['category_id'] != 1: continue # print(self.img_prefix) # print(self.id2name) image_file = os.path.join(self.img_prefix, det_res['image_id']) box = det_res['bbox'] score = det_res['score'] if score < self.det_bbox_thr: continue center, scale = self._xywh2cs(*box[:4]) joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32) kpt_db.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'bbox': box[:4], 'bbox_score': score, 'dataset': self.dataset_name, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 print(f'=> Total boxes after filter ' f'low score@{self.det_bbox_thr}: {bbox_id}') return kpt_db def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: batch_size: N num_keypoints: K heatmap height: H heatmap width: W Args: outputs (list(dict)) :preds (np.ndarray[N,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_paths (list[str]): For example, ['data/coco/val2017 /000000393226.jpg'] :heatmap (np.ndarray[N, K, H, W]): model output heatmap :bbox_id (list(int)). res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['mAP'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = defaultdict(list) for output in outputs: preds = output['preds'] boxes = output['boxes'] image_paths = output['image_paths'] bbox_ids = output['bbox_ids'] batch_size = len(image_paths) for i in range(batch_size): image_id = self.name2id[image_paths[i][len(self.img_prefix):]] kpts[image_id].append({ 'keypoints': preds[i], 'center': boxes[i][0:2], 'scale': boxes[i][2:4], 'area': boxes[i][4], 'score': boxes[i][5], 'image_id': image_id, 'bbox_id': bbox_ids[i] }) kpts = self._sort_and_unique_bboxes(kpts) # rescoring and oks nms num_joints = self.ann_info['num_joints'] vis_thr = self.vis_thr oks_thr = self.oks_thr valid_kpts = [] for image_id in kpts.keys(): img_kpts = kpts[image_id] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > vis_thr: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.use_nms: nms = soft_oks_nms if self.soft_nms else oks_nms keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) valid_kpts.append([img_kpts[_keep] for _keep in keep]) else: valid_kpts.append(img_kpts) self._write_coco_keypoint_results(valid_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) result = [{ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point.tolist(), 'score': float(img_kpt['score']), 'center': img_kpt['center'].tolist(), 'scale': img_kpt['scale'].tolist() } for img_kpt, key_point in zip(img_kpts, key_points)] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): """sort kpts and remove the repeated ones.""" for img_id, persons in kpts.items(): num = len(persons) kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key]) for i in range(num - 1, 0, -1): if kpts[img_id][i][key] == kpts[img_id][i - 1][key]: del kpts[img_id][i] return kpts
class TopDownCowaCarDataset(TopDownCocoDataset): """CowaCarDataset dataset for top-down pose estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. CowaCar keypoint indexes:: 0: 'left_back', 1: 'right_back', 2: 'left_front', 3: 'right_front' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.image_thr = data_cfg['image_thr'] self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.bbox_thr = data_cfg['bbox_thr'] self.ann_info['flip_pairs'] = [[0, 1], [2, 3]] self.ann_info['joint_to_joint'] = {} for pair in self.ann_info['flip_pairs']: self.ann_info['joint_to_joint'][pair[0]] = pair[1] self.ann_info['joint_to_joint'][pair[1]] = pair[0] self.ann_info['upper_body_ids'] = (0, 2) self.ann_info['lower_body_ids'] = (1, 3) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [1., 1., 1., 1.], dtype=np.float32).reshape( (self.ann_info['num_joints'], 1)) self.sigmas = np.array([1., 1., 1., 1.]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'cowacar' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _load_coco_keypoint_annotation_kernel(self, img_id): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ img_ann = self.coco.loadImgs(img_id)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] for obj in objs: if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum( 1, keypoints[:, 2:3] - int(self.vis_thr)) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1 }) return rec def _xywh2cs(self, x, y, w, h): """This encodes bbox(x,y,w,w) into (center, scale) Args: x, y, w, h Returns: tuple: A tuple containing center and scale. - center (np.ndarray[float32](2,)): center of the bbox (x, y). - scale (np.ndarray[float32](2,)): scale of the bbox w & h. """ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ 'image_size'][1] center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) if (not self.test_mode) and np.random.rand() < 0.3: center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio # pixel std is 200.0 scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) scale = scale * 1.1 return center, scale
class AnimalPoseDataset(AnimalBaseDataset): """Animal-Pose dataset for animal pose estimation. `Cross-domain Adaptation For Animal Pose Estimation’ ICCV'2019 More details can be found in the `paper <https://arxiv.org/abs/1908.05806>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. Animal-Pose keypoint indexes:: 0: 'L_Eye', 1: 'R_Eye', 2: 'L_EarBase', 3: 'R_EarBase', 4: 'Nose', 5: 'Throat', 6: 'TailBase', 7: 'Withers', 8: 'L_F_Elbow', 9: 'R_F_Elbow', 10: 'L_B_Elbow', 11: 'R_B_Elbow', 12: 'L_F_Knee', 13: 'R_F_Knee', 14: 'L_B_Knee', 15: 'R_B_Knee', 16: 'L_F_Paw', 17: 'R_F_Paw', 18: 'L_B_Paw', 19: 'R_B_Paw' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) if 'image_thr' in data_cfg: warnings.warn( 'image_thr is deprecated, ' 'please use det_bbox_thr instead', DeprecationWarning) self.det_bbox_thr = data_cfg['image_thr'] self.use_nms = data_cfg.get('use_nms', True) self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.ann_info['flip_pairs'] = [[0, 1], [2, 3], [8, 9], [10, 11], [12, 13], [14, 15], [16, 17], [18, 19]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 13, 16, 17) self.ann_info['lower_body_ids'] = (6, 10, 11, 14, 15, 18, 19) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, 1.5, 1.5, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # Note: The original paper did not provide enough information about # the sigmas. We modified from 'https://github.com/cocodataset/' # 'cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py#L523' self.sigmas = np.array([ .25, .25, .26, .35, .35, 1.0, 1.0, 1.0, 1.07, 1.07, 1.07, 1.07, .87, .87, .87, .87, .89, .89, .89, .89 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'animalpose' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_db(self): """Load dataset.""" assert self.use_gt_bbox gt_db = self._load_coco_keypoint_annotations() return gt_db def _load_coco_keypoint_annotations(self): """Ground truth bbox and keypoints.""" gt_db = [] for img_id in self.img_ids: gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id)) return gt_db def _load_coco_keypoint_annotation_kernel(self, img_id): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ img_ann = self.coco.loadImgs(img_id)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: if 'bbox' not in obj: continue x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs bbox_id = 0 rec = [] for obj in objs: if 'keypoints' not in obj: continue if max(obj['keypoints']) == 0: continue if 'num_keypoints' in obj and obj['num_keypoints'] == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'bbox': obj['clean_bbox'][:4], 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 return rec def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: batch_size: N num_keypoints: K heatmap height: H heatmap width: W Args: outputs (list(dict)) :preds (np.ndarray[N,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_paths (list[str]): For example, ['data/coco/val2017 /000000393226.jpg'] :heatmap (np.ndarray[N, K, H, W]): model output heatmap :bbox_id (list(int)). res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['mAP'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = defaultdict(list) for output in outputs: preds = output['preds'] boxes = output['boxes'] image_paths = output['image_paths'] bbox_ids = output['bbox_ids'] batch_size = len(image_paths) for i in range(batch_size): image_id = self.name2id[image_paths[i][len(self.img_prefix):]] kpts[image_id].append({ 'keypoints': preds[i], 'center': boxes[i][0:2], 'scale': boxes[i][2:4], 'area': boxes[i][4], 'score': boxes[i][5], 'image_id': image_id, 'bbox_id': bbox_ids[i] }) kpts = self._sort_and_unique_bboxes(kpts) # rescoring and oks nms num_joints = self.ann_info['num_joints'] vis_thr = self.vis_thr oks_thr = self.oks_thr valid_kpts = [] for image_id in kpts.keys(): img_kpts = kpts[image_id] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > vis_thr: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.use_nms: nms = soft_oks_nms if self.soft_nms else oks_nms keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) valid_kpts.append([img_kpts[_keep] for _keep in keep]) else: valid_kpts.append(img_kpts) self._write_coco_keypoint_results(valid_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) result = [{ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point.tolist(), 'score': float(img_kpt['score']), 'center': img_kpt['center'].tolist(), 'scale': img_kpt['scale'].tolist() } for img_kpt, key_point in zip(img_kpts, key_points)] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): """sort kpts and remove the repeated ones.""" for img_id, persons in kpts.items(): num = len(persons) kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key]) for i in range(num - 1, 0, -1): if kpts[img_id][i][key] == kpts[img_id][i - 1][key]: del kpts[img_id][i] return kpts
class BottomUpCocoDataset(BottomUpBaseDataset): """CocoDataset dataset for bottom-up pose estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. Keypoint Order: .. code-block:: JSON { "keypoints": { "0": "nose", "1": "left_eye", "2": "right_eye", "3": "left_ear", "4": "right_ear", "5": "left_shoulder", "6": "right_shoulder", "7": "left_elbow", "8": "right_elbow", "9": "left_wrist", "10": "right_wrist", "11": "left_hip", "12": "right_hip", "13": "left_knee", "14": "right_knee", "15": "left_ankle", "16": "right_ankle" }, "skeleton": [ [16,14],[14,12],[17,15],[15,13],[12,13],[6,12], [7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3], [1,2],[1,3],[2,4],[3,5],[4,6],[5,7] ] } Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode) self.ann_info['flip_index'] = [ 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.ids = list(self.coco.imgs.keys()) if not test_mode: self.ids = [ img_id for img_id in self.ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.num_images = len(self.ids) def __len__(self): """Get dataset length.""" return len(self.ids) def _get_single(self, idx): """Get anno for a single image. Args: idx (int): image idx Returns: dict: info for model training """ coco = self.coco img_id = self.ids[idx] ann_ids = coco.getAnnIds(imgIds=img_id) anno = coco.loadAnns(ann_ids) mask = self._get_mask(anno, idx) anno = [ obj for obj in anno if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 ] joints = self._get_joints(anno) mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])] joints_list = [ joints.copy() for _ in range(self.ann_info['num_scales']) ] db_rec = {} db_rec['dataset'] = 'coco' db_rec['image_file'] = os.path.join( self.img_prefix, coco.loadImgs(img_id)[0]['file_name']) db_rec['mask'] = mask_list db_rec['joints'] = joints_list return db_rec def _get_joints(self, anno): """Get joints for all people in an image.""" num_people = len(anno) if self.ann_info['scale_aware_sigma']: joints = np.zeros((num_people, self.ann_info['num_joints'], 4), dtype=np.float32) else: joints = np.zeros((num_people, self.ann_info['num_joints'], 3), dtype=np.float32) for i, obj in enumerate(anno): joints[i, :self.ann_info['num_joints'], :3] = \ np.array(obj['keypoints']).reshape([-1, 3]) if self.ann_info['scale_aware_sigma']: # get person box box = obj['bbox'] size = max(box[2], box[3]) sigma = size / self.base_size * self.base_sigma if self.int_sigma: sigma = int(np.ceil(sigma)) assert sigma > 0, sigma joints[i, :, 3] = sigma return joints def _get_mask(self, anno, idx): """Get ignore masks to mask out losses.""" coco = self.coco img_info = coco.loadImgs(self.ids[idx])[0] m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32) for obj in anno: if obj['iscrowd']: rle = xtcocotools.mask.frPyObjects(obj['segmentation'], img_info['height'], img_info['width']) m += xtcocotools.mask.decode(rle) elif obj['num_keypoints'] == 0: rles = xtcocotools.mask.frPyObjects(obj['segmentation'], img_info['height'], img_info['width']) for rle in rles: m += xtcocotools.mask.decode(rle) return m < 0.5 def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: num_people: P num_keypoints: K Args: outputs (list(preds, scores, image_path)):Output results. * preds (list[images x np.ndarray(P, K, 3+tag_num)]): Pose predictions for all people in images. * scores (list[images x P]): * image_path (list[str]): For example, [ 'c','o','c','o', '/',i','m','a','g','e','s','/', 'v','a', 'l', '2', '0', '1', '7', '/', '0', '0', '0', '0', '0', '0', '3', '9', '7', '1', '3', '3', '.', 'j', 'p', 'g'] res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['mAP'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') preds = [] scores = [] image_paths = [] for _preds, _scores, _image_path in outputs: preds.append(_preds) scores.append(_scores) image_paths.append(''.join(_image_path)) kpts = defaultdict(list) # iterate over images for idx, _preds in enumerate(preds): file_name = image_paths[idx] # iterate over people for idx_person, kpt in enumerate(_preds): # use bbox area area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * ( np.max(kpt[:, 1]) - np.min(kpt[:, 1])) kpts[int(file_name[-16:-4])].append({ 'keypoints': kpt[:, 0:3], 'score': scores[idx][idx_person], 'tags': kpt[:, 3], 'image_id': int(file_name[-16:-4]), 'area': area }) oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] keep = [] if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) for img_kpt, key_point in zip(img_kpts, key_points): kpt = key_point.reshape((self.ann_info['num_joints'], 3)) left_top = np.amin(kpt, axis=0) right_bottom = np.amax(kpt, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] cat_results.append({ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': list(key_point), 'score': img_kpt['score'], 'bbox': list([left_top[0], left_top[1], w, h]) }) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints') coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class BottomUpAicDataset(BottomUpCocoDataset): """Aic dataset for bottom-up pose estimation. `AI Challenger : A Large-scale Dataset for Going Deeper in Image Understanding <https://arxiv.org/abs/1711.06475>`__ The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. AIC keypoint indexes:: 0: "right_shoulder", 1: "right_elbow", 2: "right_wrist", 3: "left_shoulder", 4: "left_elbow", 5: "left_wrist", 6: "right_hip", 7: "right_knee", 8: "right_ankle", 9: "left_hip", 10: "left_knee", 11: "left_ankle", 12: "head_top", 13: "neck" Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_index'] = [ 3, 4, 5, 0, 1, 2, 9, 10, 11, 6, 7, 8, 12, 13 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.sigmas = np.array([ 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144, 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081, 0.01291456, 0.01236173 ]) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'aic' print(f'=> num_images: {self.num_images}') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] with open(res_file, 'r') as file: res_json = json.load(file) if not res_json: info_str = list(zip(stats_names, [ 0, ] * len(stats_names))) return info_str coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class TopDownPanopticDataset(TopDownBaseDataset): """Panoptic dataset for top-down hand pose estimation. `Hand Keypoint Detection in Single Images using Multiview Bootstrapping' CVPR'2017 More details can be found in the `paper <https://arxiv.org/abs/1704.07809>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. OneHand10K keypoint indexes:: 0: 'wrist', 1: 'thumb1', 2: 'thumb2', 3: 'thumb3', 4: 'thumb4', 5: 'forefinger1', 6: 'forefinger2', 7: 'forefinger3', 8: 'forefinger4', 9: 'middle_finger1', 10: 'middle_finger2', 11: 'middle_finger3', 12: 'middle_finger4', 13: 'ring_finger1', 14: 'ring_finger2', 15: 'ring_finger3', 16: 'ring_finger4', 17: 'pinky_finger1', 18: 'pinky_finger2', 19: 'pinky_finger3', 20: 'pinky_finger4' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_pairs'] = [] self.ann_info['use_different_joint_weights'] = False assert self.ann_info['num_joints'] == 21 self.ann_info['joint_weights'] = \ np.ones((self.ann_info['num_joints'], 1), dtype=np.float32) self.coco = COCO(ann_file) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'panoptic' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_mapping_id_name(self, imgs): """ Args: imgs (dict): dict of image info. Returns: tuple: Image name & id mapping dicts. - id2name (dict): Mapping image id to name. - name2id (dict): Mapping image name to id. """ id2name = {} name2id = {} for image_id, image in imgs.items(): file_name = image['file_name'] id2name[image_id] = file_name name2id[file_name] = image_id return id2name, name2id def _get_db(self): """Load dataset.""" gt_db = [] for img_id in self.img_ids: num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) rec = [] for obj in objs: if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = self._xywh2cs(*obj['bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'head_size': obj['head_size'], 'bbox_score': 1 }) gt_db.extend(rec) return gt_db def _xywh2cs(self, x, y, w, h): """This encodes bbox(x,y,w,w) into (center, scale) Args: x, y, w, h Returns: center (np.ndarray[float32](2,)): center of the bbox (x, y). scale (np.ndarray[float32](2,)): scale of the bbox w & h. """ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ 'image_size'][1] center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) if (not self.test_mode) and np.random.rand() < 0.3: center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio # pixel std is 200.0 scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) scale = scale * 1.76 return center, scale def evaluate(self, outputs, res_folder, metric='PCKh', **kwargs): """Evaluate OneHand10K keypoint results. metric (str | list[str]): Metrics to be evaluated. Options are 'PCKh', 'AUC', 'EPE'. 'PCKh': ||pre[i] - joints_3d[i]|| < 0.7 * head_size 'AUC': area under curve 'EPE': end-point error """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['PCKh', 'AUC', 'EPE'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = [] for preds, boxes, image_path, _ in outputs: str_image_path = ''.join(image_path) image_id = self.name2id[str_image_path[len(self.img_prefix):]] kpts.append({ 'keypoints': preds[0].tolist(), 'center': boxes[0][0:2].tolist(), 'scale': boxes[0][2:4].tolist(), 'area': float(boxes[0][4]), 'score': float(boxes[0][5]), 'image_id': image_id, }) self._write_keypoint_results(kpts, res_file) info_str = self._report_metric(res_file, metrics) name_value = OrderedDict(info_str) return name_value def _write_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" with open(res_file, 'w') as f: json.dump(keypoints, f, sort_keys=True, indent=4) def _report_metric(self, res_file, metrics): """Keypoint evaluation. Report PCK, AUC or EPE. """ info_str = [] with open(res_file, 'r') as fin: preds = json.load(fin) assert len(preds) == len(self.db) outputs = [] gts = [] for pred, item in zip(preds, self.db): outputs.append(pred['keypoints']) gts.append(item['joints_3d']) outputs = np.array(outputs)[:, :, :-1] gts = np.array(gts)[:, :, :-1] if 'PCKh' in metrics: hit = 0 exist = 0 for pred, item in zip(preds, self.db): threshold = item['head_size'] * 0.7 h, _, e = keypoint_pck_accuracy( np.array(pred['keypoints'])[None, :, :-1], np.array(item['joints_3d'])[None, :, :-1], 1, np.array([[threshold, threshold]])) hit += len(h[h > 0]) exist += e pck = hit / exist info_str.append(('PCKh', pck)) if 'AUC' in metrics: info_str.append(('AUC', keypoint_auc(outputs, gts, 30))) if 'EPE' in metrics: info_str.append(('EPE', keypoint_epe(outputs, gts))) return info_str
def main(): """Visualize the demo images. Require the json_file containing boxes. """ parser = ArgumentParser() parser.add_argument('pose_config', help='Config file for detection') parser.add_argument('pose_checkpoint', help='Checkpoint file') parser.add_argument('--img-root', type=str, default='', help='Image root') parser.add_argument('--json-file', type=str, default='', help='Json file containing image info.') parser.add_argument('--show', action='store_true', default=False, help='whether to show img') parser.add_argument('--out-img-root', type=str, default='', help='Root of the output img file. ' 'Default not saving the visualization images.') parser.add_argument('--device', default='cuda:0', help='Device used for inference') parser.add_argument('--kpt-thr', type=float, default=0.3, help='Keypoint score threshold') args = parser.parse_args() assert args.show or (args.out_img_root != '') coco = COCO(args.json_file) # build the pose model from a config file and a checkpoint file pose_model = init_pose_model(args.pose_config, args.pose_checkpoint, device=args.device) dataset = pose_model.cfg.data['test']['type'] img_keys = list(coco.imgs.keys()) # optional return_heatmap = False # e.g. use ('backbone', ) to return backbone feature output_layer_names = None # process each image for i in range(len(img_keys)): # get bounding box annotations image_id = img_keys[i] image = coco.loadImgs(image_id)[0] image_name = os.path.join(args.img_root, image['file_name']) ann_ids = coco.getAnnIds(image_id) # make person bounding boxes person_bboxes = [] for ann_id in ann_ids: ann = coco.anns[ann_id] # bbox format is 'xywh' bbox = ann['bbox'] person_bboxes.append(bbox) # test a single image, with a list of bboxes pose_results, returned_outputs = inference_top_down_pose_model( pose_model, image_name, person_bboxes, bbox_thr=args.bbox_thr, format='xywh', dataset=dataset, return_heatmap=return_heatmap, outputs=output_layer_names) if args.out_img_root == '': out_file = None else: os.makedirs(args.out_img_root, exist_ok=True) out_file = os.path.join(args.out_img_root, f'vis_{i}.jpg') vis_pose_result(pose_model, image_name, pose_results, dataset=dataset, kpt_score_thr=args.kpt_thr, show=args.show, out_file=out_file)
class TopDownH36MDataset(TopDownBaseDataset): """Human3.6M dataset for top-down 2D pose estimation. `Human3.6M: Large Scale Datasets and Predictive Methods for 3D Human Sensing in Natural Environments' TPAMI`2014 More details can be found in the `paper <http://vision.imar.ro/human3.6m/pami-h36m.pdf>`__. Human3.6M keypoint indexes:: 0: 'root (pelvis)', 1: 'right_hip', 2: 'right_knee', 3: 'right_foot', 4: 'left_hip', 5: 'left_knee', 6: 'left_foot', 7: 'spine', 8: 'thorax', 9: 'neck_base', 10: 'head', 11: 'left_shoulder', 12: 'left_elbow', 13: 'left_wrist', 14: 'right_shoulder', 15: 'right_elbow', 16: 'right_wrist' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownH36MDataset, self).__init__( ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) assert self.ann_info['num_joints'] == 17 self.ann_info['flip_pairs'] = [[1, 4], [2, 5], [3, 6], [11, 14], [12, 15], [13, 16]] self.ann_info['upper_body_ids'] = (0, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) self.ann_info['lower_body_ids'] = (1, 2, 3, 4, 5, 6) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.ones( (self.ann_info['num_joints'], 1), dtype=np.float32) self.coco = COCO(ann_file) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'h36m' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') @staticmethod def _get_mapping_id_name(imgs): """ Args: imgs (dict): dict of image info. Returns: tuple: Image name & id mapping dicts. - id2name (dict): Mapping image id to name. - name2id (dict): Mapping image name to id. """ id2name = {} name2id = {} for image_id, image in imgs.items(): file_name = image['file_name'] id2name[image_id] = file_name name2id[file_name] = image_id return id2name, name2id def _xywh2cs(self, x, y, w, h, padding=1.): """This encodes bbox(x,y,w,h) into (center, scale) Args: x, y, w, h Returns: center (np.ndarray[float32](2,)): center of the bbox (x, y). scale (np.ndarray[float32](2,)): scale of the bbox w & h. """ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ 'image_size'][1] center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) if (not self.test_mode) and np.random.rand() < 0.3: center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio # pixel std is 200.0 scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) # padding to include proper amount of context scale = scale * padding return center, scale def _get_db(self): """Load dataset.""" gt_db = [] bbox_id = 0 num_joints = self.ann_info['num_joints'] for img_id in self.img_ids: ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) for obj in objs: if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) # use 1.25 padded bbox as input center, scale = self._xywh2cs(*obj['bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) gt_db.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox': obj['bbox'], 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 gt_db = sorted(gt_db, key=lambda x: x['bbox_id']) return gt_db def evaluate(self, outputs, res_folder, metric, **kwargs): """Evaluate human3.6m 2d keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: batch_size: N num_keypoints: K heatmap height: H heatmap width: W Args: outputs (list(dict)) :preds (np.ndarray[N,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_paths (list[str]): For example, ['data/coco/val2017 /000000393226.jpg'] :heatmap (np.ndarray[N, K, H, W]): model output heatmap :bbox_id (list(int)). res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['PCK', 'EPE'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = [] for output in outputs: preds = output['preds'] boxes = output['boxes'] image_paths = output['image_paths'] bbox_ids = output['bbox_ids'] batch_size = len(image_paths) for i in range(batch_size): image_id = self.name2id[image_paths[i][len(self.img_prefix):]] kpts.append({ 'keypoints': preds[i].tolist(), 'center': boxes[i][0:2].tolist(), 'scale': boxes[i][2:4].tolist(), 'area': float(boxes[i][4]), 'score': float(boxes[i][5]), 'image_id': image_id, 'bbox_id': bbox_ids[i] }) kpts = self._sort_and_unique_bboxes(kpts) self._write_keypoint_results(kpts, res_file) info_str = self._report_metric(res_file, metrics) name_value = OrderedDict(info_str) return name_value def _report_metric(self, res_file, metrics, pck_thr=0.05): """Keypoint evaluation. Args: res_file (str): Json file stored prediction results. metrics (str | list[str]): Metric to be performed. Options: 'PCK', 'PCKh', 'AUC', 'EPE'. pck_thr (float): PCK threshold, default as 0.05. auc_nor (float): AUC normalization factor, default as 30 pixel. Returns: List: Evaluation results for evaluation metric. """ info_str = [] with open(res_file, 'r') as fin: preds = json.load(fin) assert len(preds) == len(self.db) outputs = [] gts = [] masks = [] threshold_bbox = [] for pred, item in zip(preds, self.db): outputs.append(np.array(pred['keypoints'])[:, :-1]) gts.append(np.array(item['joints_3d'])[:, :-1]) masks.append((np.array(item['joints_3d_visible'])[:, 0]) > 0) if 'PCK' in metrics: bbox = np.array(item['bbox']) bbox_thr = np.max(bbox[2:]) threshold_bbox.append(np.array([bbox_thr, bbox_thr])) outputs = np.array(outputs) gts = np.array(gts) masks = np.array(masks) threshold_bbox = np.array(threshold_bbox) if 'PCK' in metrics: _, pck, _ = keypoint_pck_accuracy(outputs, gts, masks, pck_thr, threshold_bbox) info_str.append(('PCK', pck)) if 'EPE' in metrics: info_str.append(('EPE', keypoint_epe(outputs, gts, masks))) return info_str def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): """sort kpts and remove the repeated ones.""" kpts = sorted(kpts, key=lambda x: x[key]) num = len(kpts) for i in range(num - 1, 0, -1): if kpts[i][key] == kpts[i - 1][key]: del kpts[i] return kpts @staticmethod def _write_keypoint_results(keypoints, res_file): """Write results into a json file.""" with open(res_file, 'w') as f: json.dump(keypoints, f, sort_keys=True, indent=4)