class BottomUpCocoDataset(BottomUpBaseDataset): """COCO dataset for bottom-up pose estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. COCO keypoint indexes:: 0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear', 5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow', 9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip', 13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode) self.ann_info['flip_index'] = [ 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.sigmas = np.array([ .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco' print(f'=> num_images: {self.num_images}') @staticmethod def _get_mapping_id_name(imgs): """ Args: imgs (dict): dict of image info. Returns: tuple: Image name & id mapping dicts. - id2name (dict): Mapping image id to name. - name2id (dict): Mapping image name to id. """ id2name = {} name2id = {} for image_id, image in imgs.items(): file_name = image['file_name'] id2name[image_id] = file_name name2id[file_name] = image_id return id2name, name2id def _get_single(self, idx): """Get anno for a single image. Args: idx (int): image idx Returns: dict: info for model training """ coco = self.coco img_id = self.img_ids[idx] ann_ids = coco.getAnnIds(imgIds=img_id) anno = coco.loadAnns(ann_ids) mask = self._get_mask(anno, idx) anno = [ obj for obj in anno if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 ] joints = self._get_joints(anno) mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])] joints_list = [ joints.copy() for _ in range(self.ann_info['num_scales']) ] db_rec = {} db_rec['dataset'] = self.dataset_name db_rec['image_file'] = os.path.join(self.img_prefix, self.id2name[img_id]) db_rec['mask'] = mask_list db_rec['joints'] = joints_list return db_rec def _get_joints(self, anno): """Get joints for all people in an image.""" num_people = len(anno) if self.ann_info['scale_aware_sigma']: joints = np.zeros((num_people, self.ann_info['num_joints'], 4), dtype=np.float32) else: joints = np.zeros((num_people, self.ann_info['num_joints'], 3), dtype=np.float32) for i, obj in enumerate(anno): joints[i, :self.ann_info['num_joints'], :3] = \ np.array(obj['keypoints']).reshape([-1, 3]) if self.ann_info['scale_aware_sigma']: # get person box box = obj['bbox'] size = max(box[2], box[3]) sigma = size / self.base_size * self.base_sigma if self.int_sigma: sigma = int(np.ceil(sigma)) assert sigma > 0, sigma joints[i, :, 3] = sigma return joints def _get_mask(self, anno, idx): """Get ignore masks to mask out losses.""" coco = self.coco img_info = coco.loadImgs(self.img_ids[idx])[0] m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32) for obj in anno: if 'segmentation' in obj: if obj['iscrowd']: rle = xtcocotools.mask.frPyObjects(obj['segmentation'], img_info['height'], img_info['width']) m += xtcocotools.mask.decode(rle) elif obj['num_keypoints'] == 0: rles = xtcocotools.mask.frPyObjects( obj['segmentation'], img_info['height'], img_info['width']) for rle in rles: m += xtcocotools.mask.decode(rle) return m < 0.5 def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: num_people: P num_keypoints: K Args: outputs (list(preds, scores, image_path, heatmap)): * preds (list[np.ndarray(P, K, 3+tag_num)]): Pose predictions for all people in images. * scores (list[P]): * image_path (list[str]): For example, [ 'c','o','c','o', '/',i','m','a','g','e','s','/', 'v','a', 'l', '2', '0', '1', '7', '/', '0', '0', '0', '0', '0', '0', '3', '9', '7', '1', '3', '3', '.', 'j', 'p', 'g'] * heatmap (np.ndarray[N, K, H, W]): model outputs. res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['mAP'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') preds = [] scores = [] image_paths = [] for _preds, _scores, _image_path, _ in outputs: preds.append(_preds) scores.append(_scores) image_paths.append(''.join(_image_path)) kpts = defaultdict(list) # iterate over images for idx, _preds in enumerate(preds): str_image_path = image_paths[idx] image_id = self.name2id[os.path.basename(str_image_path)] # iterate over people for idx_person, kpt in enumerate(_preds): # use bbox area area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * ( np.max(kpt[:, 1]) - np.min(kpt[:, 1])) kpts[image_id].append({ 'keypoints': kpt[:, 0:3], 'score': scores[idx][idx_person], 'tags': kpt[:, 3], 'image_id': image_id, 'area': area, }) oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] keep = [] if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) for img_kpt, key_point in zip(img_kpts, key_points): kpt = key_point.reshape((self.ann_info['num_joints'], 3)) left_top = np.amin(kpt, axis=0) right_bottom = np.amax(kpt, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] cat_results.append({ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point.tolist(), 'score': img_kpt['score'], 'bbox': [left_top[0], left_top[1], w, h] }) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] with open(res_file, 'r') as file: res_json = json.load(file) if not res_json: info_str = list(zip(stats_names, [ 0, ] * len(stats_names))) return info_str coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class TopDownMhpDataset(TopDownCocoDataset): """MHPv2.0 dataset for top-down pose estimation. `The Multi-Human Parsing project of Learning and Vision (LV) Group, National University of Singapore (NUS) is proposed to push the frontiers of fine-grained visual understanding of humans in crowd scene. <https://lv-mhp.github.io/>` Note that, the evaluation metric used here is mAP (adapted from COCO), which may be different from the official evaluation codes. 'https://github.com/ZhaoJ9014/Multi-Human-Parsing/tree/master/' 'Evaluation/Multi-Human-Pose' Please be cautious if you use the results in papers. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. MHP keypoint indexes:: 0: "right ankle", 1: "right knee", 2: "right hip", 3: "left hip", 4: "left knee", 5: "left ankle", 6: "pelvis", 7: "thorax", 8: "upper neck", 9: "head top", 10: "right wrist", 11: "right elbow", 12: "right shoulder", 13: "left shoulder", 14: "left elbow", 15: "left wrist", Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.image_thr = data_cfg['image_thr'] self.use_nms = data_cfg.get('use_nms', True) self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.bbox_thr = data_cfg['bbox_thr'] self.ann_info['flip_pairs'] = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]] self.ann_info['upper_body_ids'] = (7, 8, 9, 10, 11, 12, 13, 14, 15) self.ann_info['lower_body_ids'] = (0, 1, 2, 3, 4, 5, 6) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # Adapted from COCO dataset. self.sigmas = np.array([ .89, .83, 1.07, 1.07, .83, .89, .26, .26, .26, .26, .62, .72, 1.79, 1.79, .72, .62 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'mhp' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_db(self): """Load dataset.""" assert self.use_gt_bbox gt_db = self._load_coco_keypoint_annotations() return gt_db def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class TopDownCocoWholeBodyDataset(TopDownCocoDataset): """CocoWholeBodyDataset dataset for top-down pose estimation. `Whole-Body Human Pose Estimation in the Wild' ECCV'2020 More details can be found in the `paper <https://arxiv.org/abs/2007.11858>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. In total, we have 133 keypoints for wholebody pose estimation. COCO-WholeBody keypoint indexes:: 0-16: 17 body keypoints 17-22: 6 foot keypoints 23-90: 68 face keypoints 91-132: 42 hand keypoints Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownCocoDataset, self).__init__( ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.image_thr = data_cfg['image_thr'] self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.bbox_thr = data_cfg['bbox_thr'] self.ann_info['flip_pairs'] = self._make_flip_pairs() self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = \ np.ones((self.ann_info['num_joints'], 1), dtype=np.float32) self.body_num = 17 self.foot_num = 6 self.face_num = 68 self.left_hand_num = 21 self.right_hand_num = 21 self.sigmas_body = [ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 ] self.sigmas_foot = [0.068, 0.066, 0.066, 0.092, 0.094, 0.094] self.sigmas_face = [ 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008 ] self.sigmas_lefthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_righthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_wholebody = ( self.sigmas_body + self.sigmas_foot + self.sigmas_face + self.sigmas_lefthand + self.sigmas_righthand) self.sigmas = np.array(self.sigmas_wholebody) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _make_flip_pairs(self): body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] foot = [[17, 20], [18, 21], [19, 22]] face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46], [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75], [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]] hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] return body + foot + face + hand def _load_coco_keypoint_annotation_kernel(self, img_id): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ img_ann = self.coco.loadImgs(img_id)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] for obj in objs: if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] + obj['face_kpts'] + obj['lefthand_kpts'] + obj['righthand_kpts']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3] > 0) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1 }) return rec def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) cuts = np.cumsum([ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num, self.right_hand_num ]) * 3 result = [{ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point[cuts[0]:cuts[1]].tolist(), 'foot_kpts': key_point[cuts[1]:cuts[2]].tolist(), 'face_kpts': key_point[cuts[2]:cuts[3]].tolist(), 'lefthand_kpts': key_point[cuts[3]:cuts[4]].tolist(), 'righthand_kpts': key_point[cuts[4]:cuts[5]].tolist(), 'score': float(img_kpt['score']), 'center': img_kpt['center'].tolist(), 'scale': img_kpt['scale'].tolist() } for img_kpt, key_point in zip(img_kpts, key_points)] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval( self.coco, coco_det, 'keypoints_body', np.array(self.sigmas_body), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval( self.coco, coco_det, 'keypoints_foot', np.array(self.sigmas_foot), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval( self.coco, coco_det, 'keypoints_face', np.array(self.sigmas_face), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval( self.coco, coco_det, 'keypoints_lefthand', np.array(self.sigmas_lefthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval( self.coco, coco_det, 'keypoints_righthand', np.array(self.sigmas_righthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval( self.coco, coco_det, 'keypoints_wholebody', np.array(self.sigmas_wholebody), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class TopDownCocoDataset(TopDownBaseDataset): """CocoDataset dataset for top-down pose estimation. `Microsoft COCO: Common Objects in Context' ECCV'2014 More details can be found in the `paper <https://arxiv.org/abs/1405.0312>`_ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. COCO keypoint indexes:: 0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear', 5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow', 9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip', 13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__( ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.image_thr = data_cfg['image_thr'] self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.bbox_thr = data_cfg['bbox_thr'] self.ann_info['flip_pairs'] = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.ann_info['lower_body_ids'] = (11, 12, 13, 14, 15, 16) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.sigmas = np.array([ .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.image_set_index = self.coco.getImgIds() self.num_images = len(self.image_set_index) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_mapping_id_name(self, imgs): """ Args: imgs (dict): dict of image info. Returns: id2name (dict): mapping image id to name. name2id (dict): mapping image name to id. """ id2name = {} name2id = {} for image_id, image in imgs.items(): file_name = image['file_name'] id2name[image_id] = file_name name2id[file_name] = image_id return id2name, name2id def _get_db(self): """Load dataset.""" if (not self.test_mode) or self.use_gt_bbox: # use ground truth bbox gt_db = self._load_coco_keypoint_annotations() else: # use bbox from detection gt_db = self._load_coco_person_detection_results() return gt_db def _load_coco_keypoint_annotations(self): """Ground truth bbox and keypoints.""" gt_db = [] for index in self.image_set_index: gt_db.extend(self._load_coco_keypoint_annotation_kernel(index)) return gt_db def _load_coco_keypoint_annotation_kernel(self, index): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: index: coco image id Returns: db entry """ img_ann = self.coco.loadImgs(index)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=index, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] for obj in objs: if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[index]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1 }) return rec def _xywh2cs(self, x, y, w, h): """This encodes bbox(x,y,w,w) into (center, scale) Args: x, y, w, h Returns: center (np.ndarray[float32](2,)): center of the bbox (x, y). scale (np.ndarray[float32](2,)): scale of the bbox w & h. """ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ 'image_size'][1] center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) if (not self.test_mode) and np.random.rand() < 0.3: center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio # pixel std is 200.0 scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) scale = scale * 1.25 return center, scale def _load_coco_person_detection_results(self): """Load coco person detection results.""" num_joints = self.ann_info['num_joints'] all_boxes = None with open(self.bbox_file, 'r') as f: all_boxes = json.load(f) if not all_boxes: raise ValueError('=> Load %s fail!' % self.bbox_file) print(f'=> Total boxes: {len(all_boxes)}') kpt_db = [] num_boxes = 0 for det_res in all_boxes: if det_res['category_id'] != 1: continue image_file = os.path.join(self.img_prefix, self.id2name[det_res['image_id']]) box = det_res['bbox'] score = det_res['score'] if score < self.image_thr: continue num_boxes = num_boxes + 1 center, scale = self._xywh2cs(*box[:4]) joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32) kpt_db.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'bbox_score': score, 'dataset': 'coco', 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible }) print(f'=> Total boxes after filter ' f'low score@{self.image_thr}: {num_boxes}') return kpt_db def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: num_keypoints: K Args: outputs (list(preds, boxes, image_path)) :preds (np.ndarray[1,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[1,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_path (list[str]): For example, [ '/', 'v','a', 'l', '2', '0', '1', '7', '/', '0', '0', '0', '0', '0', '0', '3', '9', '7', '1', '3', '3', '.', 'j', 'p', 'g'] res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: name_value (dict): Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['mAP'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = defaultdict(list) for preds, boxes, image_path in outputs: str_image_path = ''.join(image_path) image_id = self.name2id[os.path.basename(str_image_path)] kpts[image_id].append({ 'keypoints': preds[0], 'center': boxes[0][0:2], 'scale': boxes[0][2:4], 'area': boxes[0][4], 'score': boxes[0][5], 'image_id': image_id, }) # rescoring and oks nms num_joints = self.ann_info['num_joints'] vis_thr = self.vis_thr oks_thr = self.oks_thr oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > vis_thr: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score nms = soft_oks_nms if self.soft_nms else oks_nms keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) result = [{ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point.tolist(), 'score': float(img_kpt['score']), 'center': img_kpt['center'].tolist(), 'scale': img_kpt['scale'].tolist() } for img_kpt, key_point in zip(img_kpts, key_points)] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class TopDownAicDataset(TopDownCocoDataset): """AicDataset dataset for top-down pose estimation. `AI Challenger : A Large-scale Dataset for Going Deeper in Image Understanding <https://arxiv.org/abs/1711.06475>`__ The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. AIC keypoint indexes:: 0: "right_shoulder", 1: "right_elbow", 2: "right_wrist", 3: "left_shoulder", 4: "left_elbow", 5: "left_wrist", 6: "right_hip", 7: "right_knee", 8: "right_ankle", 9: "left_hip", 10: "left_knee", 11: "left_ankle", 12: "head_top", 13: "neck" Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) if 'image_thr' in data_cfg: warnings.warn( 'image_thr is deprecated, ' 'please use det_bbox_thr instead', DeprecationWarning) self.det_bbox_thr = data_cfg['image_thr'] self.use_nms = data_cfg.get('use_nms', True) self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.ann_info['flip_pairs'] = [[0, 3], [1, 4], [2, 5], [6, 9], [7, 10], [8, 11]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 12, 13) self.ann_info['lower_body_ids'] = (6, 7, 8, 9, 10, 11) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # 'https://github.com/AIChallenger/AI_Challenger_2017/blob/master/' # 'Evaluation/keypoint_eval/keypoint_eval.py#L50' # delta = 2 x sigma self.sigmas = np.array([ 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144, 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081, 0.01291456, 0.01236173 ]) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'aic' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_db(self): """Load dataset.""" assert self.use_gt_bbox gt_db = self._load_coco_keypoint_annotations() return gt_db def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class TopDownCrowdPoseDataset(TopDownCocoDataset): """CrowdPoseDataset dataset for top-down pose estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. CrowdPose keypoint indexes:: 0: 'left_shoulder', 1: 'right_shoulder', 2: 'left_elbow', 3: 'right_elbow', 4: 'left_wrist', 5: 'right_wrist', 6: 'left_hip', 7: 'right_hip', 8: 'left_knee', 9: 'right_knee', 10: 'left_ankle', 11: 'right_ankle', 12: 'top_head', 13: 'neck' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(TopDownCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) if 'image_thr' in data_cfg: warnings.warn( 'image_thr is deprecated, ' 'please use det_bbox_thr instead', DeprecationWarning) self.det_bbox_thr = data_cfg['image_thr'] self.use_nms = data_cfg.get('use_nms', True) self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.ann_info['flip_pairs'] = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]] self.ann_info['joint_to_joint'] = {} for pair in self.ann_info['flip_pairs']: self.ann_info['joint_to_joint'][pair[0]] = pair[1] self.ann_info['joint_to_joint'][pair[1]] = pair[0] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 12, 13) self.ann_info['lower_body_ids'] = (6, 7, 8, 9, 10, 11) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 0.2, 0.2, 0.2, 1.3, 1.5, 0.2, 1.3, 1.5, 0.2, 0.2, 0.5, 0.2, 0.2, 0.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # 'https://github.com/Jeff-sjtu/CrowdPose/blob/master/crowdpose-api/' # 'PythonAPI/crowdposetools/cocoeval.py#L224' self.sigmas = np.array([ .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79, .79 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'crowdpose' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints_crowd', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)', 'AP(H)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class AnimalPoseDataset(AnimalBaseDataset): """Animal-Pose dataset for animal pose estimation. `Cross-domain Adaptation For Animal Pose Estimation’ ICCV'2019 More details can be found in the `paper <https://arxiv.org/abs/1908.05806>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. Animal-Pose keypoint indexes:: 0: 'L_Eye', 1: 'R_Eye', 2: 'L_EarBase', 3: 'R_EarBase', 4: 'Nose', 5: 'Throat', 6: 'TailBase', 7: 'Withers', 8: 'L_F_Elbow', 9: 'R_F_Elbow', 10: 'L_B_Elbow', 11: 'R_B_Elbow', 12: 'L_F_Knee', 13: 'R_F_Knee', 14: 'L_B_Knee', 15: 'R_B_Knee', 16: 'L_F_Paw', 17: 'R_F_Paw', 18: 'L_B_Paw', 19: 'R_B_Paw' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) if 'image_thr' in data_cfg: warnings.warn( 'image_thr is deprecated, ' 'please use det_bbox_thr instead', DeprecationWarning) self.det_bbox_thr = data_cfg['image_thr'] self.use_nms = data_cfg.get('use_nms', True) self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.ann_info['flip_pairs'] = [[0, 1], [2, 3], [8, 9], [10, 11], [12, 13], [14, 15], [16, 17], [18, 19]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5, 7, 8, 9, 12, 13, 16, 17) self.ann_info['lower_body_ids'] = (6, 10, 11, 14, 15, 18, 19) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.2, 1.2, 1.5, 1.5, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # Note: The original paper did not provide enough information about # the sigmas. We modified from 'https://github.com/cocodataset/' # 'cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py#L523' self.sigmas = np.array([ .25, .25, .26, .35, .35, 1.0, 1.0, 1.0, 1.07, 1.07, 1.07, 1.07, .87, .87, .87, .87, .89, .89, .89, .89 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'animalpose' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') def _get_db(self): """Load dataset.""" assert self.use_gt_bbox gt_db = self._load_coco_keypoint_annotations() return gt_db def _load_coco_keypoint_annotations(self): """Ground truth bbox and keypoints.""" gt_db = [] for img_id in self.img_ids: gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id)) return gt_db def _load_coco_keypoint_annotation_kernel(self, img_id): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ img_ann = self.coco.loadImgs(img_id)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: if 'bbox' not in obj: continue x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs bbox_id = 0 rec = [] for obj in objs: if 'keypoints' not in obj: continue if max(obj['keypoints']) == 0: continue if 'num_keypoints' in obj and obj['num_keypoints'] == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'bbox': obj['clean_bbox'][:4], 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 return rec def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: batch_size: N num_keypoints: K heatmap height: H heatmap width: W Args: outputs (list(dict)) :preds (np.ndarray[N,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_paths (list[str]): For example, ['data/coco/val2017 /000000393226.jpg'] :heatmap (np.ndarray[N, K, H, W]): model output heatmap :bbox_id (list(int)). res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['mAP'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = defaultdict(list) for output in outputs: preds = output['preds'] boxes = output['boxes'] image_paths = output['image_paths'] bbox_ids = output['bbox_ids'] batch_size = len(image_paths) for i in range(batch_size): image_id = self.name2id[image_paths[i][len(self.img_prefix):]] kpts[image_id].append({ 'keypoints': preds[i], 'center': boxes[i][0:2], 'scale': boxes[i][2:4], 'area': boxes[i][4], 'score': boxes[i][5], 'image_id': image_id, 'bbox_id': bbox_ids[i] }) kpts = self._sort_and_unique_bboxes(kpts) # rescoring and oks nms num_joints = self.ann_info['num_joints'] vis_thr = self.vis_thr oks_thr = self.oks_thr valid_kpts = [] for image_id in kpts.keys(): img_kpts = kpts[image_id] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > vis_thr: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.use_nms: nms = soft_oks_nms if self.soft_nms else oks_nms keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) valid_kpts.append([img_kpts[_keep] for _keep in keep]) else: valid_kpts.append(img_kpts) self._write_coco_keypoint_results(valid_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) result = [{ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point.tolist(), 'score': float(img_kpt['score']), 'center': img_kpt['center'].tolist(), 'scale': img_kpt['scale'].tolist() } for img_kpt, key_point in zip(img_kpts, key_points)] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): """sort kpts and remove the repeated ones.""" for img_id, persons in kpts.items(): num = len(persons) kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key]) for i in range(num - 1, 0, -1): if kpts[img_id][i][key] == kpts[img_id][i - 1][key]: del kpts[img_id][i] return kpts
class BottomUpCrowdPoseDataset(BottomUpCocoDataset): """CrowdPose dataset for bottom-up pose estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. CrowdPose keypoint indexes:: 0: 'left_shoulder', 1: 'right_shoulder', 2: 'left_elbow', 3: 'right_elbow', 4: 'left_wrist', 5: 'right_wrist', 6: 'left_hip', 7: 'right_hip', 8: 'left_knee', 9: 'right_knee', 10: 'left_ankle', 11: 'right_ankle', 12: 'top_head', 13: 'neck' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_index'] = [ 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 0.2, 0.2, 0.2, 1.3, 1.5, 0.2, 1.3, 1.5, 0.2, 0.2, 0.5, 0.2, 0.2, 0.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.sigmas = np.array([ .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79, .79 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'crowdpose' print(f'=> num_images: {self.num_images}') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" stats_names = [ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)', 'AP(H)' ] with open(res_file, 'r') as file: res_json = json.load(file) if not res_json: info_str = list(zip(stats_names, [ 0, ] * len(stats_names))) return info_str coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints_crowd', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class BottomUpMhpDataset(BottomUpCocoDataset): """MHPv2.0 dataset for top-down pose estimation. `The Multi-Human Parsing project of Learning and Vision (LV) Group, National University of Singapore (NUS) is proposed to push the frontiers of fine-grained visual understanding of humans in crowd scene. <https://lv-mhp.github.io/>` The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. MHP keypoint indexes:: 0: "right ankle", 1: "right knee", 2: "right hip", 3: "left hip", 4: "left knee", 5: "left ankle", 6: "pelvis", 7: "thorax", 8: "upper neck", 9: "head top", 10: "right wrist", 11: "right elbow", 12: "right shoulder", 13: "left shoulder", 14: "left elbow", 15: "left wrist", Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_index'] = [ 5, 4, 3, 2, 1, 0, 6, 7, 8, 9, 15, 14, 13, 12, 11, 10 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1.5, 1.2, 1., 1., 1.2, 1.5, 1., 1., 1., 1., 1.5, 1.2, 1., 1., 1.2, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # Adapted from COCO dataset self.sigmas = np.array([ .89, .83, 1.07, 1.07, .83, .89, .26, .26, .26, .26, .62, .72, 1.79, 1.79, .72, .62 ]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'mhp' print(f'=> num_images: {self.num_images}') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] with open(res_file, 'r') as file: res_json = json.load(file) if not res_json: info_str = list(zip(stats_names, [ 0, ] * len(stats_names))) return info_str coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class BottomUpAicDataset(BottomUpCocoDataset): """Aic dataset for bottom-up pose estimation. `AI Challenger : A Large-scale Dataset for Going Deeper in Image Understanding <https://arxiv.org/abs/1711.06475>`__ The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. AIC keypoint indexes:: 0: "right_shoulder", 1: "right_elbow", 2: "right_wrist", 3: "left_shoulder", 4: "left_elbow", 5: "left_wrist", 6: "right_hip", 7: "right_knee", 8: "right_ankle", 9: "left_hip", 10: "left_knee", 11: "left_ankle", 12: "head_top", 13: "neck" Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_index'] = [ 3, 4, 5, 0, 1, 2, 9, 10, 11, 6, 7, 8, 12, 13 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.2, 1.5, 1., 1.], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.sigmas = np.array([ 0.01388152, 0.01515228, 0.01057665, 0.01417709, 0.01497891, 0.01402144, 0.03909642, 0.03686941, 0.01981803, 0.03843971, 0.03412318, 0.02415081, 0.01291456, 0.01236173 ]) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'aic' print(f'=> num_images: {self.num_images}') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] with open(res_file, 'r') as file: res_json = json.load(file) if not res_json: info_str = list(zip(stats_names, [ 0, ] * len(stats_names))) return info_str coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() info_str = list(zip(stats_names, coco_eval.stats)) return info_str
class BottomUpCocoDataset(BottomUpBaseDataset): """CocoDataset dataset for bottom-up pose estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. Keypoint Order: "keypoints": { 0: "nose", 1: "left_eye", 2: "right_eye", 3: "left_ear", 4: "right_ear", 5: "left_shoulder", 6: "right_shoulder", 7: "left_elbow", 8: "right_elbow", 9: "left_wrist", 10: "right_wrist", 11: "left_hip", 12: "right_hip", 13: "left_knee", 14: "right_knee", 15: "left_ankle", 16: "right_ankle" }, "skeleton": [ [16,14],[14,12],[17,15],[15,13],[12,13],[6,12], [7,13],[6,7],[6,8],[7,9],[8,10],[9,11],[2,3], [1,2],[1,3],[2,4],[3,5],[4,6],[5,7]] Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode) self.ann_info['flip_index'] = [ 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 ] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.ids = list(self.coco.imgs.keys()) if not test_mode: self.ids = [ img_id for img_id in self.ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.num_images = len(self.ids) def __len__(self): """Get dataset length.""" return len(self.ids) def _get_single(self, idx): """Get anno for a single image. Args: idx (int): image idx Returns: db_rec (dict): info for model training """ coco = self.coco img_id = self.ids[idx] ann_ids = coco.getAnnIds(imgIds=img_id) anno = coco.loadAnns(ann_ids) mask = self._get_mask(anno, idx) anno = [ obj for obj in anno if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 ] joints = self._get_joints(anno) mask_list = [mask.copy() for _ in range(self.ann_info['num_scales'])] joints_list = [ joints.copy() for _ in range(self.ann_info['num_scales']) ] db_rec = {} db_rec['dataset'] = 'coco' db_rec['image_file'] = os.path.join( self.img_prefix, coco.loadImgs(img_id)[0]['file_name']) db_rec['mask'] = mask_list db_rec['joints'] = joints_list return db_rec def _get_joints(self, anno): """Get joints for all people in an image.""" num_people = len(anno) if self.ann_info['scale_aware_sigma']: joints = np.zeros((num_people, self.ann_info['num_joints'], 4)) else: joints = np.zeros((num_people, self.ann_info['num_joints'], 3)) for i, obj in enumerate(anno): joints[i, :self.ann_info['num_joints'], :3] = \ np.array(obj['keypoints']).reshape([-1, 3]) if self.ann_info['scale_aware_sigma']: # get person box box = obj['bbox'] size = max(box[2], box[3]) sigma = size / self.base_size * self.base_sigma if self.int_sigma: sigma = int(np.ceil(sigma)) assert sigma > 0, sigma joints[i, :, 3] = sigma return joints def _get_mask(self, anno, idx): """Get ignore masks to mask out losses.""" coco = self.coco img_info = coco.loadImgs(self.ids[idx])[0] m = np.zeros((img_info['height'], img_info['width']), dtype=np.float32) for obj in anno: if obj['iscrowd']: rle = xtcocotools.mask.frPyObjects(obj['segmentation'], img_info['height'], img_info['width']) m += xtcocotools.mask.decode(rle) elif obj['num_keypoints'] == 0: rles = xtcocotools.mask.frPyObjects(obj['segmentation'], img_info['height'], img_info['width']) for rle in rles: m += xtcocotools.mask.decode(rle) return m < 0.5 def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: num_people: P num_keypoints: K Args: outputs (list(preds, scores, image_path)):Output results. preds (list[images x np.ndarray(P, K, 3+tag_num)]): Pose predictions for all people in images. scores (list[images x P]): image_path (list[str]): For example, [ 'c','o','c','o', '/',i','m','a','g','e','s','/', 'v','a', 'l', '2', '0', '1', '7', '/', '0', '0', '0', '0', '0', '0', '3', '9', '7', '1', '3', '3', '.', 'j', 'p', 'g'] res_folder (str): Path of directory to save the results. metric (str): Metric to be performed. Defaults: 'mAP'. Returns: name_value (dict): Evaluation results for evaluation metric. """ assert metric == 'mAP' res_file = os.path.join(res_folder, 'result_keypoints.json') preds = [] scores = [] image_paths = [] for _preds, _scores, _image_path in outputs: preds.append(_preds) scores.append(_scores) image_paths.append(''.join(_image_path)) kpts = defaultdict(list) # iterate over images for idx, _preds in enumerate(preds): file_name = image_paths[idx] # iterate over people for idx_person, kpt in enumerate(_preds): # use bbox area area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * ( np.max(kpt[:, 1]) - np.min(kpt[:, 1])) kpts[int(file_name[-16:-4])].append({ 'keypoints': kpt[:, 0:3], 'score': scores[idx][idx_person], 'tags': kpt[:, 3], 'image_id': int(file_name[-16:-4]), 'area': area }) oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] keep = [] if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) for img_kpt, key_point in zip(img_kpts, key_points): kpt = key_point.reshape((self.ann_info['num_joints'], 3)) left_top = np.amin(kpt, axis=0) right_bottom = np.amax(kpt, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] cat_results.append({ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': list(key_point), 'score': img_kpt['score'], 'bbox': list([left_top[0], left_top[1], w, h]) }) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[ind])) return info_str
from xtcocotools.coco import COCO from xtcocotools.cocoeval import COCOeval import numpy as np gt_file = '../annotations/example_coco_val.json' preds = '../annotations/example_coco_preds.json' sigmas = np.array( [.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0 cocoGt = COCO(gt_file) cocoDt = cocoGt.loadRes(preds) cocoEval = COCOeval(cocoGt, cocoDt, 'keypoints', sigmas, use_area=True) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize()
class BottomUpCowaCarDataset(BottomUpCocoDataset): """CowaCar dataset for bottom-up vehicle grounding point estimation. The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. CowaCar keypoint indexes:: 0: 'left_back', 1: 'right_back', 2: 'left_front', 3: 'right_front' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.ann_info['flip_index'] = [1, 0, 3, 2] self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [1., 1., 1., 1.], dtype=np.float32).reshape( (self.ann_info['num_joints'], 1)) self.sigmas = np.array([1., 1., 1., 1.]) / 10.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'cowacar' print(f'=> num_images: {self.num_images}') def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints_crowd', self.sigmas, use_area=False) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP(E)', 'AP(M)', 'AP(H)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
def evaluate_mAP(res_file, ann_type='bbox', ann_file='./data/coco/annotations/person_keypoints_val2017.json', silence=True): """Evaluate mAP result for coco dataset. Parameters ---------- res_file: str Path to result json file. ann_type: str annotation type, including: `bbox`, `segm`, `keypoints`. ann_file: str Path to groundtruth file. silence: bool True: disable running log. """ class NullWriter(object): def write(self, arg): pass # ann_file = os.path.join('./data/coco/annotations/', ann_file) sigmas_body = [0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087,0.087, 0.089, 0.089] sigmas_foot = [0.068, 0.066, 0.066, 0.092, 0.094, 0.094] sigmas_face = [0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043,0.041, 0.045, 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007,0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011,0.012, 0.010, 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008] sigmas_lefthand = [0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017,0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031] sigmas_righthand = [0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031] sigmas_wholebody = sigmas_body + sigmas_foot + sigmas_face + sigmas_lefthand + sigmas_righthand # if silence: # nullwrite = NullWriter() # oldstdout = sys.stdout # sys.stdout = nullwrite # disable output # cocoGt = COCO(ann_file) # cocoDt = cocoGt.loadRes(res_file) # res = {} # #cocoEval = COCOeval(cocoGt, cocoDt, ann_type) # cocoEval = COCOeval(cocoGt, cocoDt, 'keypoints_body', np.array(sigmas_body+sigmas_foot),use_area=True) # cocoEval.evaluate() # cocoEval.accumulate() # cocoEval.summarize() # res['body'] = cocoEval.stats # # cocoEval = COCOeval(cocoGt, cocoDt, 'keypoints_foot', np.array(sigmas_foot), use_area=True) # # cocoEval.evaluate() # # cocoEval.accumulate() # # cocoEval.summarize() # cocoEval = COCOeval(cocoGt, cocoDt, 'keypoints_face', np.array(sigmas_face), use_area=True) # cocoEval.evaluate() # cocoEval.accumulate() # cocoEval.summarize() # res['face'] = cocoEval.stats # cocoEval = COCOeval(cocoGt, cocoDt, 'keypoints_lefthand', np.array(sigmas_lefthand), use_area=True) # cocoEval.evaluate() # cocoEval.accumulate() # cocoEval.summarize() # res['lefthand'] = cocoEval.stats # cocoEval = COCOeval(cocoGt, cocoDt, 'keypoints_righthand', np.array(sigmas_righthand), use_area=True) # cocoEval.evaluate() # cocoEval.accumulate() # cocoEval.summarize() # res['righthand'] = cocoEval.stats # cocoEval = COCOeval(cocoGt, cocoDt, 'keypoints_hand', np.array(sigmas_lefthand+sigmas_righthand), use_area=True) # cocoEval.evaluate() # cocoEval.accumulate() # cocoEval.summarize() # res['hand'] = cocoEval.stats # cocoEval = COCOeval(cocoGt, cocoDt, 'keypoints_wholebody', np.array(sigmas_wholebody), use_area=True) # cocoEval.evaluate() # cocoEval.accumulate() # cocoEval.summarize() # res['wholebody'] = cocoEval.stats # if silence: # sys.stdout = oldstdout # enable output # #print('cocoeval:',cocoEval.stats[0]) # #if isinstance(cocoEval.stats[0], dict): # # stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', # # 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'] # parts = ['body', 'face', 'hand', 'lefthand','righthand','wholebody'] # #parts = ['keypoints_body','keypoints_foot','keypoints_face','keypoints_lefthand','keypoints_righthand','keypoints_wholebody'] # info = {} # for i, part in enumerate(parts): # info[part] = res[part][0] # return info if silence: nullwrite = NullWriter() oldstdout = sys.stdout sys.stdout = nullwrite # disable output cocoGt = COCO(ann_file) cocoDt = cocoGt.loadRes(res_file) cocoEval = COCOeval(cocoGt, cocoDt, ann_type) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() if silence: sys.stdout = oldstdout # enable output if isinstance(cocoEval.stats[0], dict): stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'] parts = ['body', 'face', 'hand', 'fullbody'] info = {} for i, part in enumerate(parts): info[part] = cocoEval.stats[i][part][0] return info else: stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)'] info_str = {} for ind, name in enumerate(stats_names): info_str[name] = cocoEval.stats[ind] return info_str['AP']
class BottomUpCocoWholeBodyDataset(BottomUpCocoDataset): """CocoWholeBodyDataset dataset for bottom-up pose estimation. `Whole-Body Human Pose Estimation in the Wild' ECCV'2020 More details can be found in the `paper <https://arxiv.org/abs/2007.11858>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. In total, we have 133 keypoints for wholebody pose estimation. COCO-WholeBody keypoint indexes:: 0-16: 17 body keypoints 17-22: 6 foot keypoints 23-90: 68 face keypoints 91-132: 42 hand keypoints Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super(BottomUpCocoDataset, self).__init__(ann_file, img_prefix, data_cfg, pipeline, test_mode) self.ann_info['flip_pairs'] = self._make_flip_pairs() self.ann_info['flip_index'] = self.get_flip_index_from_flip_pairs( self.ann_info['flip_pairs']) self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = \ np.ones((self.ann_info['num_joints'], 1), dtype=np.float32) self.body_num = 17 self.foot_num = 6 self.face_num = 68 self.left_hand_num = 21 self.right_hand_num = 21 # 'https://github.com/jin-s13/COCO-WholeBody/blob/master/' # 'evaluation/myeval_wholebody.py#L170' self.sigmas_body = [ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 ] self.sigmas_foot = [0.068, 0.066, 0.066, 0.092, 0.094, 0.094] self.sigmas_face = [ 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008 ] self.sigmas_lefthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_righthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_wholebody = (self.sigmas_body + self.sigmas_foot + self.sigmas_face + self.sigmas_lefthand + self.sigmas_righthand) self.sigmas = np.array(self.sigmas_wholebody) self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() if not test_mode: self.img_ids = [ img_id for img_id in self.img_ids if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 ] self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco_wholebody' print(f'=> num_images: {self.num_images}') @staticmethod def _make_flip_pairs(): body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] foot = [[17, 20], [18, 21], [19, 22]] face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46], [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75], [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]] hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] return body + foot + face + hand def _get_joints(self, anno): """Get joints for all people in an image.""" num_people = len(anno) if self.ann_info['scale_aware_sigma']: joints = np.zeros((num_people, self.ann_info['num_joints'], 4), dtype=np.float32) else: joints = np.zeros((num_people, self.ann_info['num_joints'], 3), dtype=np.float32) for i, obj in enumerate(anno): keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] + obj['face_kpts'] + obj['lefthand_kpts'] + obj['righthand_kpts']).reshape(-1, 3) joints[i, :self.ann_info['num_joints'], :3] = keypoints if self.ann_info['scale_aware_sigma']: # get person box box = obj['bbox'] size = max(box[2], box[3]) sigma = size / self.base_size * self.base_sigma if self.int_sigma: sigma = int(np.ceil(sigma)) assert sigma > 0, sigma joints[i, :, 3] = sigma return joints def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) cuts = np.cumsum([ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num, self.right_hand_num ]) * 3 for img_kpt, key_point in zip(img_kpts, key_points): kpt = key_point.reshape((self.ann_info['num_joints'], 3)) left_top = np.amin(kpt, axis=0) right_bottom = np.amax(kpt, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] cat_results.append({ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point[cuts[0]:cuts[1]].tolist(), 'foot_kpts': key_point[cuts[1]:cuts[2]].tolist(), 'face_kpts': key_point[cuts[2]:cuts[3]].tolist(), 'lefthand_kpts': key_point[cuts[3]:cuts[4]].tolist(), 'righthand_kpts': key_point[cuts[4]:cuts[5]].tolist(), 'score': img_kpt['score'], 'bbox': [left_top[0], left_top[1], w, h] }) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints_body', np.array(self.sigmas_body), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_foot', np.array(self.sigmas_foot), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_face', np.array(self.sigmas_face), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_lefthand', np.array(self.sigmas_lefthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_righthand', np.array(self.sigmas_righthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_det, 'keypoints_wholebody', np.array(self.sigmas_wholebody), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str
def cocoapi_eval(jsonfile, style, coco_gt=None, anno_file=None, max_dets=(100, 300, 1000), classwise=False, sigmas=None, use_area=True): """ Args: jsonfile (str): Evaluation json file, eg: bbox.json, mask.json. style (str): COCOeval style, can be `bbox` , `segm` , `proposal`, `keypoints` and `keypoints_crowd`. coco_gt (str): Whether to load COCOAPI through anno_file, eg: coco_gt = COCO(anno_file) anno_file (str): COCO annotations file. max_dets (tuple): COCO evaluation maxDets. classwise (bool): Whether per-category AP and draw P-R Curve or not. sigmas (nparray): keypoint labelling sigmas. use_area (bool): If gt annotations (eg. CrowdPose, AIC) do not have 'area', please set use_area=False. """ assert coco_gt != None or anno_file != None if style == 'keypoints_crowd': #please install xtcocotools==1.6 from xtcocotools.coco import COCO from xtcocotools.cocoeval import COCOeval else: from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval if coco_gt == None: coco_gt = COCO(anno_file) logger.info("Start evaluate...") coco_dt = coco_gt.loadRes(jsonfile) if style == 'proposal': coco_eval = COCOeval(coco_gt, coco_dt, 'bbox') coco_eval.params.useCats = 0 coco_eval.params.maxDets = list(max_dets) elif style == 'keypoints_crowd': coco_eval = COCOeval(coco_gt, coco_dt, style, sigmas, use_area) else: coco_eval = COCOeval(coco_gt, coco_dt, style) coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() if classwise: # Compute per-category AP and PR curve try: from terminaltables import AsciiTable except Exception as e: logger.error( 'terminaltables not found, plaese install terminaltables. ' 'for example: `pip install terminaltables`.') raise e precisions = coco_eval.eval['precision'] cat_ids = coco_gt.getCatIds() # precision: (iou, recall, cls, area range, max dets) assert len(cat_ids) == precisions.shape[2] results_per_category = [] for idx, catId in enumerate(cat_ids): # area range index 0: all area ranges # max dets index -1: typically 100 per image nm = coco_gt.loadCats(catId)[0] precision = precisions[:, :, idx, 0, -1] precision = precision[precision > -1] if precision.size: ap = np.mean(precision) else: ap = float('nan') results_per_category.append( (str(nm["name"]), '{:0.3f}'.format(float(ap)))) pr_array = precisions[0, :, idx, 0, 2] recall_array = np.arange(0.0, 1.01, 0.01) draw_pr_curve(pr_array, recall_array, out_dir=style + '_pr_curve', file_name='{}_precision_recall_curve.jpg'.format( nm["name"])) num_columns = min(6, len(results_per_category) * 2) results_flatten = list(itertools.chain(*results_per_category)) headers = ['category', 'AP'] * (num_columns // 2) results_2d = itertools.zip_longest( *[results_flatten[i::num_columns] for i in range(num_columns)]) table_data = [headers] table_data += [result for result in results_2d] table = AsciiTable(table_data) logger.info('Per-category of {} AP: \n{}'.format(style, table.table)) logger.info("per-category PR curve has output to {} folder.".format( style + '_pr_curve')) # flush coco evaluation result sys.stdout.flush() return coco_eval.stats
class TopDownForkliftDataset(TopDownBaseDataset): """CocoDataset dataset for top-down pose estimation. `Microsoft COCO: Common Objects in Context' ECCV'2014 More details can be found in the `paper <https://arxiv.org/abs/1405.0312>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. COCO keypoint indexes:: 0: 'nose', 1: 'left_eye', 2: 'right_eye', 3: 'left_ear', 4: 'right_ear', 5: 'left_shoulder', 6: 'right_shoulder', 7: 'left_elbow', 8: 'right_elbow', 9: 'left_wrist', 10: 'right_wrist', 11: 'left_hip', 12: 'right_hip', 13: 'left_knee', 14: 'right_knee', 15: 'left_ankle', 16: 'right_ankle' Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, ann_file, img_prefix, data_cfg, pipeline, test_mode=False): super().__init__( ann_file, img_prefix, data_cfg, pipeline, test_mode=test_mode) self.use_gt_bbox = data_cfg['use_gt_bbox'] self.bbox_file = data_cfg['bbox_file'] self.det_bbox_thr = data_cfg.get('det_bbox_thr', 0.0) if 'image_thr' in data_cfg: warnings.warn( 'image_thr is deprecated, ' 'please use det_bbox_thr instead', DeprecationWarning) self.det_bbox_thr = data_cfg['image_thr'] self.use_nms = data_cfg.get('use_nms', True) self.soft_nms = data_cfg['soft_nms'] self.nms_thr = data_cfg['nms_thr'] self.oks_thr = data_cfg['oks_thr'] self.vis_thr = data_cfg['vis_thr'] self.ann_info['flip_pairs'] = [[0, 1], [2, 3], [4, 5]] self.ann_info['upper_body_ids'] = (0, 1, 2, 3, 4, 5) self.ann_info['lower_body_ids'] = () self.ann_info['use_different_joint_weights'] = False self.ann_info['joint_weights'] = np.array( [ 1., 1., 1., 1., 1., 1., ], dtype=np.float32).reshape((self.ann_info['num_joints'], 1)) # 'https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/' # 'pycocotools/cocoeval.py#L523' self.sigmas = np.array([ .1, .1, .1, .1, .1, .1 ]) / 1.0 self.coco = COCO(ann_file) cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict( (self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]) self.img_ids = self.coco.getImgIds() self.num_images = len(self.img_ids) self.id2name, self.name2id = self._get_mapping_id_name(self.coco.imgs) self.dataset_name = 'coco' self.db = self._get_db() print(f'=> num_images: {self.num_images}') print(f'=> load {len(self.db)} samples') @staticmethod def _get_mapping_id_name(imgs): """ Args: imgs (dict): dict of image info. Returns: tuple: Image name & id mapping dicts. - id2name (dict): Mapping image id to name. - name2id (dict): Mapping image name to id. """ id2name = {} name2id = {} for image_id, image in imgs.items(): file_name = image['file_name'] id2name[image_id] = file_name name2id[file_name] = image_id return id2name, name2id def _get_db(self): """Load dataset.""" if (not self.test_mode) or self.use_gt_bbox: # use ground truth bbox gt_db = self._load_coco_keypoint_annotations() else: # use bbox from detection gt_db = self._load_coco_person_detection_results() return gt_db def _load_coco_keypoint_annotations(self): """Ground truth bbox and keypoints.""" gt_db = [] for img_id in self.img_ids: gt_db.extend(self._load_coco_keypoint_annotation_kernel(img_id)) return gt_db def _load_coco_keypoint_annotation_kernel(self, img_id): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ img_ann = self.coco.loadImgs(img_id)[0] width = img_ann['width'] height = img_ann['height'] num_joints = self.ann_info['num_joints'] ann_ids = self.coco.getAnnIds(imgIds=img_id, iscrowd=False) objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: if 'bbox' not in obj: continue x, y, w, h = obj['bbox'] x1 = max(0, x) y1 = max(0, y) x2 = min(width - 1, x1 + max(0, w - 1)) y2 = min(height - 1, y1 + max(0, h - 1)) if ('area' not in obj or obj['area'] > 0) and x2 > x1 and y2 > y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs bbox_id = 0 rec = [] for obj in objs: if 'keypoints' not in obj: continue if max(obj['keypoints']) == 0: continue if 'num_keypoints' in obj and obj['num_keypoints'] == 0: continue joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.zeros((num_joints, 3), dtype=np.float32) keypoints = np.array(obj['keypoints']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_visible[:, :2] = np.minimum(1, keypoints[:, 2:3]) center, scale = self._xywh2cs(*obj['clean_bbox'][:4]) image_file = os.path.join(self.img_prefix, self.id2name[img_id]) rec.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'bbox': obj['clean_bbox'][:4], 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 return rec def _xywh2cs(self, x, y, w, h): """This encodes bbox(x,y,w,w) into (center, scale) Args: x, y, w, h Returns: tuple: A tuple containing center and scale. - center (np.ndarray[float32](2,)): center of the bbox (x, y). - scale (np.ndarray[float32](2,)): scale of the bbox w & h. """ aspect_ratio = self.ann_info['image_size'][0] / self.ann_info[ 'image_size'][1] center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32) if (not self.test_mode) and np.random.rand() < 0.3: center += 0.4 * (np.random.rand(2) - 0.5) * [w, h] if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio # pixel std is 200.0 scale = np.array([w / 200.0, h / 200.0], dtype=np.float32) # padding to include proper amount of context scale = scale * 1.25 return center, scale def _load_coco_person_detection_results(self): """Load coco person detection results.""" num_joints = self.ann_info['num_joints'] all_boxes = None with open(self.bbox_file, 'r') as f: all_boxes = json.load(f) if not all_boxes: raise ValueError('=> Load %s fail!' % self.bbox_file) print(f'=> Total boxes: {len(all_boxes)}') kpt_db = [] bbox_id = 0 for det_res in all_boxes: if det_res['category_id'] != 1: continue image_file = os.path.join(self.img_prefix, self.id2name[det_res['image_id']]) box = det_res['bbox'] score = det_res['score'] if score < self.det_bbox_thr: continue center, scale = self._xywh2cs(*box[:4]) joints_3d = np.zeros((num_joints, 3), dtype=np.float32) joints_3d_visible = np.ones((num_joints, 3), dtype=np.float32) kpt_db.append({ 'image_file': image_file, 'center': center, 'scale': scale, 'rotation': 0, 'bbox': box[:4], 'bbox_score': score, 'dataset': self.dataset_name, 'joints_3d': joints_3d, 'joints_3d_visible': joints_3d_visible, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 print(f'=> Total boxes after filter ' f'low score@{self.det_bbox_thr}: {bbox_id}') return kpt_db def evaluate(self, outputs, res_folder, metric='mAP', **kwargs): """Evaluate coco keypoint results. The pose prediction results will be saved in `${res_folder}/result_keypoints.json`. Note: batch_size: N num_keypoints: K heatmap height: H heatmap width: W Args: outputs (list(dict)) :preds (np.ndarray[N,K,3]): The first two dimensions are coordinates, score is the third dimension of the array. :boxes (np.ndarray[N,6]): [center[0], center[1], scale[0] , scale[1],area, score] :image_paths (list[str]): For example, ['data/coco/val2017 /000000393226.jpg'] :heatmap (np.ndarray[N, K, H, W]): model output heatmap :bbox_id (list(int)). res_folder (str): Path of directory to save the results. metric (str | list[str]): Metric to be performed. Defaults: 'mAP'. Returns: dict: Evaluation results for evaluation metric. """ metrics = metric if isinstance(metric, list) else [metric] allowed_metrics = ['mAP'] for metric in metrics: if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported') res_file = os.path.join(res_folder, 'result_keypoints.json') kpts = defaultdict(list) for output in outputs: preds = output['preds'] boxes = output['boxes'] image_paths = output['image_paths'] bbox_ids = output['bbox_ids'] batch_size = len(image_paths) for i in range(batch_size): image_id = self.name2id[image_paths[i][len(self.img_prefix):]] kpts[image_id].append({ 'keypoints': preds[i], 'center': boxes[i][0:2], 'scale': boxes[i][2:4], 'area': boxes[i][4], 'score': boxes[i][5], 'image_id': image_id, 'bbox_id': bbox_ids[i] }) kpts = self._sort_and_unique_bboxes(kpts) # rescoring and oks nms num_joints = self.ann_info['num_joints'] vis_thr = self.vis_thr oks_thr = self.oks_thr valid_kpts = [] for image_id in kpts.keys(): img_kpts = kpts[image_id] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > vis_thr: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.use_nms: nms = soft_oks_nms if self.soft_nms else oks_nms keep = nms(list(img_kpts), oks_thr, sigmas=self.sigmas) valid_kpts.append([img_kpts[_keep] for _keep in keep]) else: valid_kpts.append(img_kpts) self._write_coco_keypoint_results(valid_kpts, res_file) info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value def _write_coco_keypoint_results(self, keypoints, res_file): """Write results into a json file.""" data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpt['keypoints'] for img_kpt in img_kpts]) key_points = _key_points.reshape(-1, self.ann_info['num_joints'] * 3) result = [{ 'image_id': img_kpt['image_id'], 'category_id': cat_id, 'keypoints': key_point.tolist(), 'score': float(img_kpt['score']), 'center': img_kpt['center'].tolist(), 'scale': img_kpt['scale'].tolist() } for img_kpt, key_point in zip(img_kpts, key_points)] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): """Keypoint evaluation using COCOAPI.""" coco_det = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_det, 'keypoints', self.sigmas) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'AP .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = list(zip(stats_names, coco_eval.stats)) return info_str def _sort_and_unique_bboxes(self, kpts, key='bbox_id'): """sort kpts and remove the repeated ones.""" for img_id, persons in kpts.items(): num = len(persons) kpts[img_id] = sorted(kpts[img_id], key=lambda x: x[key]) for i in range(num - 1, 0, -1): if kpts[img_id][i][key] == kpts[img_id][i - 1][key]: del kpts[img_id][i] return kpts
class COCO_WHOLEBODYDataset(JointsDataset): """CocoWholeBodyDataset dataset for top-down pose estimation. `Whole-Body Human Pose Estimation in the Wild' ECCV'2020 More details can be found in the `paper <https://arxiv.org/abs/2007.11858>`__ . The dataset loads raw features and apply specified transforms to return a dict containing the image tensors and other information. In total, we have 133 keypoints for wholebody pose estimation. COCO-WholeBody keypoint indexes:: 0-16: 17 body keypoints 17-22: 6 foot keypoints 23-90: 68 face keypoints 91-132: 42 hand keypoints Args: ann_file (str): Path to the annotation file. img_prefix (str): Path to a directory where images are held. Default: None. data_cfg (dict): config pipeline (list[dict | callable]): A sequence of data transforms. test_mode (bool): Store True when building test or validation dataset. Default: False. """ def __init__(self, cfg, root, image_set, is_train, transform=None): super().__init__(cfg, root, image_set, is_train, transform) self.nms_thre = cfg.TEST.NMS_THRE self.image_thre = cfg.TEST.IMAGE_THRE self.soft_nms = cfg.TEST.SOFT_NMS self.oks_thre = cfg.TEST.OKS_THRE self.in_vis_thre = cfg.TEST.IN_VIS_THRE self.bbox_file = cfg.TEST.COCO_BBOX_FILE self.use_gt_bbox = cfg.TEST.USE_GT_BBOX self.image_width = cfg.MODEL.IMAGE_SIZE[0] self.image_height = cfg.MODEL.IMAGE_SIZE[1] self.aspect_ratio = self.image_width * 1.0 / self.image_height self.pixel_std = 200 self.coco = COCO(self._get_ann_file_keypoint()) # deal with class names cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats logger.info('=> classes: {}'.format(self.classes)) self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict([(self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]]) # load image file names self.image_set_index = self._load_image_set_index() self.num_images = len(self.image_set_index) self.dataset_name = 'coco_wholebody' logger.info('=> num_images: {}'.format(self.num_images)) self.num_joints = 133 self.body_num = 17 self.foot_num = 6 self.face_num = 68 self.left_hand_num = 21 self.right_hand_num = 21 self.sigmas_body = [ 0.026, 0.025, 0.025, 0.035, 0.035, 0.079, 0.079, 0.072, 0.072, 0.062, 0.062, 0.107, 0.107, 0.087, 0.087, 0.089, 0.089 ] self.sigmas_foot = [0.068, 0.066, 0.066, 0.092, 0.094, 0.094] self.igmas_face = [ 0.042, 0.043, 0.044, 0.043, 0.040, 0.035, 0.031, 0.025, 0.020, 0.023, 0.029, 0.032, 0.037, 0.038, 0.043, 0.041, 0.045, 0.013, 0.012, 0.011, 0.011, 0.012, 0.012, 0.011, 0.011, 0.013, 0.015, 0.009, 0.007, 0.007, 0.007, 0.012, 0.009, 0.008, 0.016, 0.010, 0.017, 0.011, 0.009, 0.011, 0.009, 0.007, 0.013, 0.008, 0.011, 0.012, 0.010, 0.034, 0.008, 0.008, 0.009, 0.008, 0.008, 0.007, 0.010, 0.008, 0.009, 0.009, 0.009, 0.007, 0.007, 0.008, 0.011, 0.008, 0.008, 0.008, 0.01, 0.008 ] self.sigmas_lefthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_righthand = [ 0.029, 0.022, 0.035, 0.037, 0.047, 0.026, 0.025, 0.024, 0.035, 0.018, 0.024, 0.022, 0.026, 0.017, 0.021, 0.021, 0.032, 0.02, 0.019, 0.022, 0.031 ] self.sigmas_wholebody = (self.sigmas_body + self.sigmas_foot + self.sigmas_face + self.sigmas_lefthand + self.sigmas_righthand) self.sigmas = np.array(self.sigmas_wholebody) self.flip_pairs = self._make_flip_pairs() self.parent_ids = None self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) self.lower_body_ids = (11, 12, 13, 14, 15, 16) self.use_different_joints_weight = False self.joints_weight = np.ones(self.num_joints, dtype=np.float32) # self.db = self._get_db() # [Cache Point] self.cache_root = cfg.DATASET.CACHE_ROOT db_file = os.path.join( self.cache_root, '{}_cached_{}_db.pkl'.format(cfg.DATASET.DATASET, self.image_set)) if os.path.exists(db_file): with open(db_file, 'rb') as fd: self.db = pickle.load(fd) else: self.db = self._get_db() os.makedirs(self.cache_root, exist_ok=True) with open(db_file, 'wb') as fd: pickle.dump(self.db, fd) if is_train and cfg.DATASET.SELECT_DATA: self.db = self.select_data(self.db) logger.info('=> load {} samples'.format(len(self.db))) def _make_flip_pairs(self): body = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] foot = [[17, 20], [18, 21], [19, 22]] face = [[23, 39], [24, 38], [25, 37], [26, 36], [27, 35], [28, 34], [29, 33], [30, 32], [40, 49], [41, 48], [42, 47], [43, 46], [44, 45], [54, 58], [55, 57], [59, 68], [60, 67], [61, 66], [62, 65], [63, 70], [64, 69], [71, 77], [72, 76], [73, 75], [78, 82], [79, 81], [83, 87], [84, 86], [88, 90]] hand = [[91, 112], [92, 113], [93, 114], [94, 115], [95, 116], [96, 117], [97, 118], [98, 119], [99, 120], [100, 121], [101, 122], [102, 123], [103, 124], [104, 125], [105, 126], [106, 127], [107, 128], [108, 129], [109, 130], [110, 131], [111, 132]] return body + foot + face + hand def _get_ann_file_keypoint(self): """ self.root / annotations / person_keypoints_train2017.json """ prefix = 'coco_wholebody' \ if 'test' not in self.image_set else 'image_info' return os.path.join('data/coco_wholebody', prefix + '_' + self.image_set + '_v1.0.json') def _load_image_set_index(self): """ image id: int """ image_ids = self.coco.getImgIds() return image_ids def _get_db(self): if self.is_train or self.use_gt_bbox: # use ground truth bbox gt_db = self._load_coco_keypoint_annotations() else: # use bbox from detection gt_db = self._load_coco_person_detection_results() return gt_db def _load_coco_keypoint_annotations(self): """ ground truth bbox and keypoints """ gt_db = [] for index in self.image_set_index: gt_db.extend(self._load_coco_keypoint_annotation_kernal(index)) return gt_db def _load_coco_keypoint_annotation_kernal(self, index): """load annotation from COCOAPI. Note: bbox:[x1, y1, w, h] Args: img_id: coco image id Returns: dict: db entry """ im_ann = self.coco.loadImgs(index)[0] width = im_ann['width'] height = im_ann['height'] num_joints = self.num_joints annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False) objs = self.coco.loadAnns(annIds) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] bbox_id = 0 for obj in objs: cls = self._coco_ind_to_class_ind[obj['category_id']] if cls != 1: continue # ignore objs without keypoints annotation if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) keypoints = np.array(obj['keypoints'] + obj['foot_kpts'] + obj['face_kpts'] + obj['lefthand_kpts'] + obj['righthand_kpts']).reshape(-1, 3) joints_3d[:, :2] = keypoints[:, :2] joints_3d_vis[:, :2] = np.minimum(1, keypoints[:, 2:3] > 0) center, scale = self._box2cs(obj['clean_bbox'][:4]) rec.append({ 'image': self.image_path_from_index(index), 'center': center, 'scale': scale, 'rotation': 0, 'joints_3d': joints_3d, 'joints_3d_vis': joints_3d_vis, 'dataset': self.dataset_name, 'bbox_score': 1, 'bbox_id': bbox_id }) bbox_id = bbox_id + 1 return rec def _box2cs(self, box): x, y, w, h = box[:4] return self._xywh2cs(x, y, w, h) def _xywh2cs(self, x, y, w, h): center = np.zeros((2), dtype=np.float32) center[0] = x + w * 0.5 center[1] = y + h * 0.5 if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) if center[0] != -1: scale = scale * 1.25 return center, scale def image_path_from_index(self, index): """ example: images / train2017 / 000000119993.jpg """ file_name = '%012d.jpg' % index if '2014' in self.image_set: file_name = 'COCO_%s_' % self.image_set + file_name prefix = 'test2017' if 'test' in self.image_set else self.image_set data_name = prefix + '.zip@' if self.data_format == 'zip' else prefix image_path = os.path.join(self.root, 'images', data_name, file_name) return image_path def _load_coco_person_detection_results(self): all_boxes = None with open(self.bbox_file, 'r') as f: all_boxes = json.load(f) if not all_boxes: logger.error('=> Load %s fail!' % self.bbox_file) return None logger.info('=> Total boxes: {}'.format(len(all_boxes))) kpt_db = [] num_boxes = 0 for n_img in range(0, len(all_boxes)): det_res = all_boxes[n_img] if det_res['category_id'] != 1: continue img_name = self.image_path_from_index(det_res['image_id']) box = det_res['bbox'] score = det_res['score'] if score < self.image_thre: continue num_boxes = num_boxes + 1 center, scale = self._box2cs(box) joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.ones((self.num_joints, 3), dtype=np.float) kpt_db.append({ 'image': img_name, 'center': center, 'scale': scale, 'score': score, 'joints_3d': joints_3d, 'joints_3d_vis': joints_3d_vis, }) logger.info('=> Total boxes after fliter low score@{}: {}'.format( self.image_thre, num_boxes)) return kpt_db def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args, **kwargs): rank = cfg.RANK res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): try: os.makedirs(res_folder) except Exception: logger.error('Fail to make {}'.format(res_folder)) res_file = os.path.join( res_folder, 'keypoints_{}_results_{}.json'.format(self.image_set, rank)) # person x (keypoints) _kpts = [] for idx, kpt in enumerate(preds): _kpts.append({ 'keypoints': kpt, 'center': all_boxes[idx][0:2], 'scale': all_boxes[idx][2:4], 'area': all_boxes[idx][4], 'score': all_boxes[idx][5], 'image': int(img_path[idx][-16:-4]) }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = self.num_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thre) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) if 'test' not in self.image_set: info_str = self._do_python_keypoint_eval(res_file, res_folder) name_value = OrderedDict(info_str) return name_value, name_value['AP'] else: return {'Null': 0}, 0 def _write_coco_keypoint_results(self, keypoints, res_file): data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) logger.info('=> writing results json to %s' % res_file) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) try: json.load(open(res_file)) except Exception: content = [] with open(res_file, 'r') as f: for line in f: content.append(line) content[-1] = ']' with open(res_file, 'w') as f: for c in content: f.write(c) def _coco_keypoint_results_one_category_kernel(self, data_pack): """Get coco keypoint results.""" cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]) key_points = _key_points.zeros( (_key_points.shape[0], self.num_joints * 3), dtype=np.float) cuts = np.cumsum([ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num, self.right_hand_num ]) * 3 for ipt in range(self.num_joints): key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. cuts = np.cumsum([ 0, self.body_num, self.foot_num, self.face_num, self.left_hand_num, self.right_hand_num ]) * 3 result = [{ 'image_id': img_kpts[k]['image'], 'category_id': cat_id, 'keypoints': list(key_points[k][cuts[0]:cuts[1]]), 'foot_kpts': list(key_points[k][cuts[1]:cuts[2]]), 'face_kpts': list(key_points[k][cuts[2]:cuts[3]]), 'lefthand_kpts': list(key_points[k][cuts[3]:cuts[4]]), 'righthand_kpts': list(key_points[k][cuts[4]:cuts[5]]), 'score': img_kpts[k]['score'], 'center': list(img_kpts[k]['center']), 'scale': list(img_kpts[k]['scale']) } for k in range(len(img_kpts))] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file, res_folder): """Keypoint evaluation using COCOAPI.""" coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_body', np.array(self.sigmas_body), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_foot', np.array(self.sigmas_foot), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_face', np.array(self.sigmas_face), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_lefthand', np.array(self.sigmas_lefthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_righthand', np.array(self.sigmas_righthand), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() coco_eval = COCOeval(self.coco, coco_dt, 'keypoints_wholebody', np.array(self.sigmas_wholebody), use_area=True) coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[ind])) return info_str