def load_test_data(self, score=False): coco = COCO(self.test_annot_path) test_data = [] for aid in coco.anns.keys(): ann = coco.anns[aid] imgname = coco.imgs[ann['image_id']]['file_name'] joints = ann['keypoints'] if (ann['image_id'] not in coco.imgs) or ann['iscrowd'] or (np.sum( joints[2::3]) == 0) or (ann['num_keypoints'] == 0): continue # sanitize bboxes x, y, w, h = ann['bbox'] img = coco.loadImgs(ann['image_id'])[0] width, height = img['width'], img['height'] x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) if ann['area'] > 0 and x2 >= x1 and y2 >= y1: # if x2 >= x1 and y2 >= y1: bbox = [x1, y1, x2 - x1, y2 - y1] else: continue if score: data = dict(image_id=ann['image_id'], imgpath=imgname[7:], id=aid, bbox=bbox, joints=joints, score=1) else: data = dict(image_id=ann['image_id'], imgpath=imgname[7:], id=aid, bbox=bbox, joints=joints) test_data.append(data) return test_data
class CrowdPoseDataset(Dataset): """`CrowdPose`_ Dataset. Args: root (string): Root directory where dataset is located to. dataset (string): Dataset name(train2017, val2017, test2017). data_format(string): Data format for reading('jpg', 'zip') transform (callable, optional): A function/transform that takes in an opencv image and returns a transformed version. E.g, ``transforms.ToTensor`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. """ def __init__(self, root, dataset, data_format, transform=None, target_transform=None): from crowdposetools.coco import COCO self.name = 'CROWDPOSE' self.root = root self.dataset = dataset self.data_format = data_format self.coco = COCO(self._get_anno_file_name()) self.ids = list(self.coco.imgs.keys()) self.transform = transform self.target_transform = target_transform cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats logger.info('=> classes: {}'.format(self.classes)) self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict([(self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]]) def _get_anno_file_name(self): # example: root/json/crowdpose_{train,val,test}.json return os.path.join(self.root, 'annotations', 'crowdpose_{}.json'.format(self.dataset)) def _get_image_path(self, file_name): images_dir = os.path.join(self.root, 'images') if self.data_format == 'zip': return images_dir + '.zip@' + file_name else: return os.path.join(images_dir, file_name) def __getitem__(self, index): """ Args: index (int): Index Returns: tuple: Tuple (image, target). target is the object returned by ``coco.loadAnns``. """ coco = self.coco img_id = self.ids[index] ann_ids = coco.getAnnIds(imgIds=img_id) target = coco.loadAnns(ann_ids) file_name = coco.loadImgs(img_id)[0]['file_name'] if self.data_format == 'zip': img = zipreader.imread( self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) else: img = cv2.imread(self._get_image_path(file_name), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) if self.transform is not None: img = self.transform(img) if self.target_transform is not None: target = self.target_transform(target) return img, target def __len__(self): return len(self.ids) def __repr__(self): fmt_str = 'Dataset ' + self.__class__.__name__ + '\n' fmt_str += ' Number of datapoints: {}\n'.format(self.__len__()) fmt_str += ' Root Location: {}\n'.format(self.root) tmp = ' Transforms (if any): ' fmt_str += '{0}{1}\n'.format( tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) tmp = ' Target Transforms (if any): ' fmt_str += '{0}{1}'.format( tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp))) return fmt_str def processKeypoints(self, keypoints): tmp = keypoints.copy() if keypoints[:, 2].max() > 0: p = keypoints[keypoints[:, 2] > 0][:, :2].mean(axis=0) num_keypoints = keypoints.shape[0] for i in range(num_keypoints): tmp[i][0:3] = [ float(keypoints[i][0]), float(keypoints[i][1]), float(keypoints[i][2]) ] return tmp def evaluate(self, cfg, preds, scores, output_dir, *args, **kwargs): ''' Perform evaluation on COCO keypoint task :param cfg: cfg dictionary :param preds: prediction :param output_dir: output directory :param args: :param kwargs: :return: ''' res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): os.makedirs(res_folder) res_file = os.path.join(res_folder, 'keypoints_%s_results.json' % self.dataset) # preds is a list of: image x person x (keypoints) # keypoints: num_joints * 4 (x, y, score, tag) kpts = defaultdict(list) for idx, _kpts in enumerate(preds): img_id = self.ids[idx] file_name = self.coco.loadImgs(img_id)[0]['file_name'] for idx_kpt, kpt in enumerate(_kpts): area = (np.max(kpt[:, 0]) - np.min(kpt[:, 0])) * ( np.max(kpt[:, 1]) - np.min(kpt[:, 1])) kpt = self.processKeypoints(kpt) # if self.with_center: if cfg.DATASET.WITH_CENTER and not cfg.TEST.IGNORE_CENTER: kpt = kpt[:-1] kpts[int(file_name.split('.')[0])].append({ 'keypoints': kpt[:, 0:3], 'score': scores[idx][idx_kpt], 'tags': kpt[:, 3], 'image': int(file_name.split('.')[0]), 'area': area }) # rescoring and oks nms oks_nmsed_kpts = [] # image x person x (keypoints) for img in kpts.keys(): # person x (keypoints) img_kpts = kpts[img] # person x (keypoints) # do not use nms, keep all detections keep = [] if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) # CrowdPose `test` set has annotation. info_str = self._do_python_keypoint_eval(res_file, res_folder) name_value = OrderedDict(info_str) return name_value, name_value['AP'] def _write_coco_keypoint_results(self, keypoints, res_file): data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) logger.info('=> Writing results json to %s' % res_file) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) try: json.load(open(res_file)) except Exception: content = [] with open(res_file, 'r') as f: for line in f: content.append(line) content[-1] = ']' with open(res_file, 'w') as f: for c in content: f.write(c) def _coco_keypoint_results_one_category_kernel(self, data_pack): cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] num_joints = 14 for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]) key_points = np.zeros((_key_points.shape[0], num_joints * 3), dtype=np.float) for ipt in range(num_joints): key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. for k in range(len(img_kpts)): kpt = key_points[k].reshape((num_joints, 3)) left_top = np.amin(kpt, axis=0) right_bottom = np.amax(kpt, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] cat_results.append({ 'image_id': img_kpts[k]['image'], 'category_id': cat_id, 'keypoints': list(key_points[k]), 'score': img_kpts[k]['score'], 'bbox': list([left_top[0], left_top[1], w, h]) }) return cat_results def _do_python_keypoint_eval(self, res_file, res_folder): coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'Ap .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP (easy)', 'AP (medium)', 'AP (hard)' ] stats_index = [0, 1, 2, 5, 6, 7, 8, 9, 10] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[stats_index[ind]])) # info_str.append(coco_eval.stats[ind]) return info_str
class HIECPDataset(CPJointsDataset): ''' ['nose', 'chest', 'right_shoulder', 'right_elbow', 'right_wrist', 'left_shoulder','left_elbow', 'left_wrist', 'right_hip','right_knee','right_ankle', 'left_hip', 'left_knee', 'left_ankle'] ''' def __init__(self, cfg, root, image_set, is_train, transform=None): super().__init__(cfg, root, image_set, is_train, transform) self.nms_thre = cfg.TEST.NMS_THRE self.image_thre = cfg.TEST.IMAGE_THRE self.soft_nms = cfg.TEST.SOFT_NMS self.oks_thre = cfg.TEST.OKS_THRE self.in_vis_thre = cfg.TEST.IN_VIS_THRE self.bbox_file = cfg.TEST.COCO_BBOX_FILE self.use_gt_bbox = cfg.TEST.USE_GT_BBOX self.image_width = cfg.MODEL.IMAGE_SIZE[0] self.image_height = cfg.MODEL.IMAGE_SIZE[1] self.aspect_ratio = self.image_width * 1.0 / self.image_height self.pixel_std = 200 self.coco = COCO(self._get_ann_file_keypoint()) # deal with class names cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats logger.info('=> classes: {}'.format(self.classes)) self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict([(self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]]) # load image file names self.image_set_index = self._load_image_set_index() self.num_images = len(self.image_set_index) logger.info('=> num_images: {}'.format(self.num_images)) self.num_joints = 14 self.flip_pairs = [[2, 5], [3, 6], [4, 7], [8, 11], [9, 12], [10, 13]] self.parent_ids = None self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7) self.lower_body_ids = (8, 9, 10, 11, 12, 13) self.joints_weight = np.array([ 1., 1., 1.2, 1.5, 1.5, 1.2, 1.5, 1.5, 1.2, 1.5, 1.5, 1.2, 1.5, 1.5 ], dtype=np.float32).reshape( (self.num_joints, 1)) self.nms_sigmas = np.array([ .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .35, .35 ]) / 10.0 self.db = self._get_db() if is_train and cfg.DATASET.SELECT_DATA: self.db = self.select_data(self.db) logger.info('=> load {} samples'.format(len(self.db))) def _get_ann_file_keypoint(self): """ self.root / annotations / hie_person_keypoints_train2020.json """ prefix = 'hie_person_keypoints' return os.path.join(self.root, 'annotations', prefix + '_' + self.image_set + '.json') def _load_image_set_index(self): """ image id: int """ image_ids = self.coco.getImgIds() # print('image id:',image_ids[0]) return image_ids def _get_db(self): if self.is_train or self.use_gt_bbox: # use ground truth bbox gt_db = self._load_coco_keypoint_annotations() else: # use bbox from detection gt_db = self._load_coco_person_detection_results() return gt_db def _load_coco_keypoint_annotations(self): """ ground truth bbox and keypoints """ gt_db = [] for index in self.image_set_index: gt_db.extend(self._load_coco_keypoint_annotation_kernal(index)) return gt_db def _load_coco_keypoint_annotation_kernal(self, index): """ coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] iscrowd: crowd instances are handled by marking their overlaps with all categories to -1 and later excluded in training bbox: [x1, y1, w, h] :param index: coco image id :return: db entry """ im_ann = self.coco.loadImgs([index])[0] width = im_ann['width'] height = im_ann['height'] annIds = self.coco.getAnnIds(imgIds=[index], iscrowd=False) objs = self.coco.loadAnns(annIds) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) if x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] for obj in objs: cls = self._coco_ind_to_class_ind[obj['category_id']] if cls != 1: continue # ignore objs without keypoints annotation if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) for ipt in range(self.num_joints): joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0] joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1] joints_3d[ipt, 2] = 0 t_vis = obj['keypoints'][ipt * 3 + 2] if t_vis > 1: t_vis = 1 joints_3d_vis[ipt, 0] = t_vis joints_3d_vis[ipt, 1] = t_vis joints_3d_vis[ipt, 2] = 0 center, scale = self._box2cs(obj['clean_bbox'][:4]) rec.append({ 'image': self.image_path_from_index(index), 'center': center, 'scale': scale, 'joints_3d': joints_3d, 'joints_3d_vis': joints_3d_vis, 'filename': '', 'imgnum': 0, }) return rec def _box2cs(self, box): x, y, w, h = box[:4] return self._xywh2cs(x, y, w, h) def _xywh2cs(self, x, y, w, h): center = np.zeros((2), dtype=np.float32) center[0] = x + w * 0.5 center[1] = y + h * 0.5 if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) if center[0] != -1: scale = scale * 1.25 return center, scale def image_path_from_index(self, index): """ example: images / train2017 / 000000119993.jpg """ file_name = index prefix = 'train' if ('train' in self.image_set or 'val' in self.image_set) else self.image_set image_path = os.path.join(self.root, 'images', prefix, file_name) return image_path def _load_coco_person_detection_results(self): all_boxes = None with open(self.bbox_file, 'r') as f: all_boxes = json.load(f) if not all_boxes: logger.error('=> Load %s fail!' % self.bbox_file) return None logger.info('=> Total boxes: {}'.format(len(all_boxes))) kpt_db = [] num_boxes = 0 for n_img in range(0, len(all_boxes)): det_res = all_boxes[n_img] if det_res['category_id'] != 1: continue img_name = self.image_path_from_index(det_res['image_id']) box = det_res['bbox'] score = det_res['score'] if score < self.image_thre: continue num_boxes = num_boxes + 1 center, scale = self._box2cs(box) joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.ones((self.num_joints, 3), dtype=np.float) kpt_db.append({ 'image': img_name, 'center': center, 'scale': scale, 'score': score, 'joints_3d': joints_3d, 'joints_3d_vis': joints_3d_vis, }) logger.info('=> Total boxes after fliter low score@{}: {}'.format( self.image_thre, num_boxes)) return kpt_db def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args, **kwargs): rank = cfg.RANK res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): try: os.makedirs(res_folder) except Exception: logger.error('Fail to make {}'.format(res_folder)) res_file = os.path.join( res_folder, 'keypoints_{}_results_{}.json'.format(self.image_set, rank)) # person x (keypoints) _kpts = [] for idx, kpt in enumerate(preds): _kpts.append({ 'keypoints': kpt, 'center': all_boxes[idx][0:2], 'scale': all_boxes[idx][2:4], 'area': all_boxes[idx][4], 'score': all_boxes[idx][5], 'image': img_path[idx][-13:-4] }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = self.num_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thre) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre, self.nms_sigmas) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) if 'test' not in self.image_set: info_str = self._do_python_keypoint_eval(res_file, res_folder) name_value = OrderedDict(info_str) return name_value, name_value['AP'] else: return {'Null': 0}, 0 def _write_coco_keypoint_results(self, keypoints, res_file): data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) logger.info('=> writing results json to %s' % res_file) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) try: json.load(open(res_file)) except Exception: content = [] with open(res_file, 'r') as f: for line in f: content.append(line) content[-1] = ']' with open(res_file, 'w') as f: for c in content: f.write(c) def _coco_keypoint_results_one_category_kernel(self, data_pack): cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]) key_points = np.zeros((_key_points.shape[0], self.num_joints * 3), dtype=np.float) for ipt in range(self.num_joints): key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. result = [{ 'image_id': img_kpts[k]['image'], 'category_id': cat_id, 'keypoints': list(key_points[k]), 'score': img_kpts[k]['score'], 'center': list(img_kpts[k]['center']), 'scale': list(img_kpts[k]['scale']) } for k in range(len(img_kpts))] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file, res_folder): coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[ind])) return info_str
def __init__(self, train=True): dataset_name = 'JTA' additional_name = 'SyMPose_IOSB_CrowdPose' self.num_kps = 14 self.kps_names = [ "left_shoulder", "right_shoulder", "left_elbow", "right_elbow", "left_wrist", "right_wrist", "left_hip", "right_hip", "left_knee", "right_knee", "left_ankle", "right_ankle", "head_top", "neck", ], self.kps_symmetry = [(0, 1), (2, 3), (4, 5), (6, 7), (8, 9), (10, 11)] self.kps_lines = [ (0, 1), (0, 2), (1, 3), (2, 4), (3, 5), (6, 7), (6, 8), (7, 9), (8, 10), (9, 11), (12, 13), ], self.sigmas = np.array([ .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .79, .79 ]) / 10.0 # self.human_det_path = osp.join('../data', dataset_name, 'dets', 'human_detection.json') # human detection result self.human_det_path = osp.join('../../crowdPE', 'dets', 'human_detection_test.json') self.img_path = osp.join('../data', dataset_name, additional_name, 'images') self.train_annot_path = osp.join('../data', dataset_name, additional_name, 'annotations', 'train_jta.json') self.num_val_split = 5 self.val_annot_path = osp.join('../data', dataset_name, additional_name, 'annotations', 'train_jta.json') self.test_annot_path = osp.join('../data', dataset_name, additional_name, 'annotations', 'iosb_crowdpose_test.json') self.train_data = [] if train: coco = COCO(self.train_annot_path) # train_data = [] for aid in coco.anns.keys(): ann = coco.anns[aid] imgname = coco.imgs[ann['image_id']]['file_name'] joints = ann['keypoints'] if (ann['image_id'] not in coco.imgs) or ann['iscrowd'] or (np.sum( joints[2::3]) == 0) or (ann['num_keypoints'] == 0): continue # sanitize bboxes x, y, w, h = ann['bbox'] img = coco.loadImgs(ann['image_id'])[0] width, height = img['width'], img['height'] x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) if ann['area'] > 0 and x2 >= x1 and y2 >= y1: # if x2 >= x1 and y2 >= y1: bbox = [x1, y1, x2 - x1, y2 - y1] else: continue data = dict(image_id=ann['image_id'], imgpath=imgname, id=aid, bbox=bbox, joints=joints, score=1) self.train_data.append(data)
from crowdposetools.coco import COCO # pycocotools import numpy as np import skimage.io as io import matplotlib.pyplot as plt import pylab pylab.rcParams['figure.figsize'] = (8.0, 10.0) # dataDir= '/home/andrew/datasets/MSCOCO/coco2017' # dataType='train2017' # annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType) annFile = 'data/crowdpose/person_keypoints_train2017.json' # 初始化标注数据的 COCO api coco = COCO(annFile) imgIds = coco.getImgIds(imgIds=[100000]) img = coco.loadImgs(imgIds[0])[0] I = io.imread('data/crowdpose/%s' % (img['file_name'])) plt.figure() plt.imshow(I) plt.axis('off') ax = plt.gca() annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None) anns = coco.loadAnns(annIds) coco.showAnns(anns) plt.show() # plt.savefig('coco4.png')
class CrowdPoseRLDataset(CPRelationJointsDataset): """`CrowdPose`_ Dataset. Args: root (string): Root directory where dataset is located to. dataset (string): Dataset name(train2017, val2017, test2017). data_format(string): Data format for reading('jpg', 'zip') transform (callable, optional): A function/transform that takes in an opencv image and returns a transformed version. E.g, ``transforms.ToTensor`` target_transform (callable, optional): A function/transform that takes in the target and transforms it. """ ''' keypoints: ['left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', 'right_ankle', 'head', 'neck'] ''' def __init__(self, cfg, root, image_set, is_train, transform=None): self.name = 'CROWDPOSE' super().__init__(cfg, root, image_set, is_train, transform) self.nms_thre = cfg.TEST.NMS_THRE self.image_thre = cfg.TEST.IMAGE_THRE self.soft_nms = cfg.TEST.SOFT_NMS self.oks_thre = cfg.TEST.OKS_THRE self.in_vis_thre = cfg.TEST.IN_VIS_THRE self.bbox_file = cfg.TEST.COCO_BBOX_FILE self.use_gt_bbox = cfg.TEST.USE_GT_BBOX self.image_width = cfg.MODEL.IMAGE_SIZE[0] self.image_height = cfg.MODEL.IMAGE_SIZE[1] self.aspect_ratio = self.image_width * 1.0 / self.image_height self.pixel_std = 200 self.nms_sigmas = np.array([ .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89, .35, .35 ]) / 10.0 self.coco = COCO(self._get_anno_file_name()) # deal with class names cats = [ cat['name'] for cat in self.coco.loadCats(self.coco.getCatIds()) ] self.classes = ['__background__'] + cats logger.info('=> classes: {}'.format(self.classes)) self.num_classes = len(self.classes) self._class_to_ind = dict(zip(self.classes, range(self.num_classes))) self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds())) self._coco_ind_to_class_ind = dict([(self._class_to_coco_ind[cls], self._class_to_ind[cls]) for cls in self.classes[1:]]) # load image file names self.image_set_index = list(self.coco.imgs.keys()) self.num_images = len(self.image_set_index) logger.info('=> num_images: {}'.format(self.num_images)) self.num_joints = 14 self.flip_pairs = [[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11]] self.parent_ids = None self.upper_body_ids = (0, 1, 2, 3, 4, 5, 12, 13) self.lower_body_ids = (6, 7, 8, 9, 10, 11) self.joints_weight = np.array( [1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1.], dtype=np.float32).reshape((self.num_joints, 1)) self.db = self._get_db() if is_train and cfg.DATASET.SELECT_DATA: self.db = self.select_data(self.db) def _get_anno_file_name(self): # example: root/json/crowdpose_{train,val,test}.json return os.path.join(self.root, 'json', 'crowdpose_{}.json'.format(self.image_set)) def _get_image_path(self, file_name): images_dir = os.path.join(self.root, 'images') if self.data_format == 'zip': return images_dir + '.zip@' + file_name else: return os.path.join(images_dir, file_name) def _load_image_set_index(self): """ image id: int """ image_ids = self.coco.getImgIds() return image_ids def _get_db(self): if self.is_train or self.use_gt_bbox: # use ground truth bbox gt_db = self._load_coco_keypoint_annotations() else: # use bbox from detection gt_db = self._load_coco_person_detection_results() return gt_db def _load_coco_keypoint_annotations(self): """ ground truth bbox and keypoints """ gt_db = [] for index in self.image_set_index: gt_db.extend(self._load_coco_keypoint_annotation_kernal(index)) return gt_db def _load_coco_keypoint_annotation_kernal(self, index): """ coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id'] iscrowd: crowd instances are handled by marking their overlaps with all categories to -1 and later excluded in training bbox: [x1, y1, w, h] :param index: coco image id :return: db entry """ im_ann = self.coco.loadImgs(index)[0] data_numpy = cv2.imread( self.image_path_from_index(index), cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) # bug in crowdpose # height = im_ann['width'] # width = im_ann['height'] height = im_ann['height'] width = im_ann['width'] if data_numpy.shape[1] == height and data_numpy.shape[ 0] == width and height != width: # print('image size mismatched:',(width, height), data_numpy.shape[:2]) height = data_numpy.shape[0] width = data_numpy.shape[1] annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False) objs = self.coco.loadAnns(annIds) # sanitize bboxes valid_objs = [] for obj in objs: x, y, w, h = obj['bbox'] x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((width - 1, x1 + np.max((0, w - 1)))) y2 = np.min((height - 1, y1 + np.max((0, h - 1)))) if x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs rec = [] for obj in objs: cls = self._coco_ind_to_class_ind[obj['category_id']] if cls != 1: continue # ignore objs without keypoints annotation if max(obj['keypoints']) == 0: continue joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float) for ipt in range(self.num_joints): joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0] joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1] joints_3d[ipt, 2] = 0 t_vis = obj['keypoints'][ipt * 3 + 2] if t_vis > 1: t_vis = 1 joints_3d_vis[ipt, 0] = t_vis joints_3d_vis[ipt, 1] = t_vis joints_3d_vis[ipt, 2] = 0 center, scale = self._box2cs(obj['clean_bbox'][:4]) obj_size = obj['clean_bbox'][2:4] rec.append({ 'image': self.image_path_from_index(index), 'center': center, 'scale': scale, 'obj_size': obj_size, 'joints_3d': joints_3d, 'joints_3d_vis': joints_3d_vis, 'filename': '', 'imgnum': 0, }) for i in range(len(rec)): interference = [] interfenrece_vis = [] for j in range(len(rec)): if i == j: continue interference.append(rec[j]['joints_3d']) interfenrece_vis.append(rec[j]['joints_3d_vis']) rec[i]['interference'] = interference rec[i]['interference_vis'] = interfenrece_vis return rec def _box2cs(self, box): x, y, w, h = box[:4] return self._xywh2cs(x, y, w, h) def _xywh2cs(self, x, y, w, h): center = np.zeros((2), dtype=np.float32) center[0] = x + w * 0.5 center[1] = y + h * 0.5 if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) if center[0] != -1: scale = scale * 1 #.25 return center, scale def image_path_from_index(self, index, prefix=''): """ example: images / 109979.jpg """ file_name = '%d.jpg' % index image_path = os.path.join(self.root, 'images', file_name) return image_path def _load_coco_person_detection_results(self): all_boxes = None with open(self.bbox_file, 'r') as f: all_boxes = json.load(f) if not all_boxes: logger.error('=> Load %s fail!' % self.bbox_file) return None logger.info('=> Total boxes: {}'.format(len(all_boxes))) kpt_db = [] num_boxes = 0 for n_img in range(0, len(all_boxes)): det_res = all_boxes[n_img] if det_res['category_id'] != 1: continue img_name = self.image_path_from_index(det_res['image_id']) box = det_res['bbox'] obj_size = det_res['bbox'][2:4] score = det_res['score'] if score < self.image_thre: continue num_boxes = num_boxes + 1 center, scale = self._box2cs(box) joints_3d = np.zeros((self.num_joints, 3), dtype=np.float) joints_3d_vis = np.ones((self.num_joints, 3), dtype=np.float) kpt_db.append({ 'image': img_name, 'center': center, 'scale': scale, 'score': score, 'joints_3d': joints_3d, 'joints_3d_vis': joints_3d_vis, 'obj_size': obj_size, }) logger.info('=> Total boxes after fliter low score@{}: {}'.format( self.image_thre, num_boxes)) return kpt_db def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args, **kwargs): rank = cfg.RANK res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): try: os.makedirs(res_folder) except Exception: logger.error('Fail to make {}'.format(res_folder)) res_file = os.path.join( res_folder, 'keypoints_{}_results_{}.json'.format(self.image_set, rank)) # person x (keypoints) _kpts = [] for idx, kpt in enumerate(preds): _kpts.append({ 'keypoints': kpt, 'center': all_boxes[idx][0:2], 'scale': all_boxes[idx][2:4], 'area': all_boxes[idx][4], 'score': all_boxes[idx][5], 'image': int(img_path[idx].split('/')[-1][:-4]) }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = self.num_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thre, self.nms_sigmas) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre, self.nms_sigmas) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) if 'test' not in self.image_set: info_str = self._do_python_keypoint_eval(res_file, res_folder) name_value = OrderedDict(info_str) return name_value, name_value['AP'] else: info_str = self._do_python_keypoint_eval(res_file, res_folder) name_value = OrderedDict(info_str) return name_value, name_value['AP'] # return {'Null': 0}, 0 def _write_coco_keypoint_results(self, keypoints, res_file): data_pack = [{ 'cat_id': self._class_to_coco_ind[cls], 'cls_ind': cls_ind, 'cls': cls, 'ann_type': 'keypoints', 'keypoints': keypoints } for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) logger.info('=> writing results json to %s' % res_file) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) try: json.load(open(res_file)) except Exception: content = [] with open(res_file, 'r') as f: for line in f: content.append(line) content[-1] = ']' with open(res_file, 'w') as f: for c in content: f.write(c) def _coco_keypoint_results_one_category_kernel(self, data_pack): cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpts[k]['keypoints'] for k in range(len(img_kpts))]) key_points = np.zeros((_key_points.shape[0], self.num_joints * 3), dtype=np.float) for ipt in range(self.num_joints): key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. result = [{ 'image_id': img_kpts[k]['image'], 'category_id': cat_id, 'keypoints': list(key_points[k]), 'score': img_kpts[k]['score'], 'center': list(img_kpts[k]['center']), 'scale': list(img_kpts[k]['scale']) } for k in range(len(img_kpts))] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file, res_folder): coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') coco_eval.params.useSegm = None coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'Ap .5', 'AP .75', 'AR', 'AR .5', 'AR .75', 'AP (easy)', 'AP (medium)', 'AP (hard)' ] stats_index = [0, 1, 2, 5, 6, 7, 8, 9, 10] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[stats_index[ind]])) # info_str.append(coco_eval.stats[ind]) return info_str