def _load_all(self, anno_file, shuffle): """ initialize all entries given annotation json file Parameters: ---------- anno_file: str annotation json file shuffle: bool whether to shuffle image list """ image_set_index = [] labels = [] coco = COCO(anno_file) img_ids = coco.getImgIds() for img_id in img_ids: # filename image_info = coco.loadImgs(img_id)[0] filename = image_info["file_name"] subdir = filename.split('_')[1] height = image_info["height"] width = image_info["width"] # label anno_ids = coco.getAnnIds(imgIds=img_id) annos = coco.loadAnns(anno_ids) label = [] for anno in annos: cat_id = int(anno["category_id"]) bbox = anno["bbox"] assert len(bbox) == 4 xmin = float(bbox[0]) / width ymin = float(bbox[1]) / height xmax = xmin + float(bbox[2]) / width ymax = ymin + float(bbox[3]) / height label.append([cat_id, xmin, ymin, xmax, ymax, 0]) if label: labels.append(np.array(label)) image_set_index.append(os.path.join(subdir, filename)) if shuffle: import random indices = list(range(len(image_set_index))) random.shuffle(indices) image_set_index = [image_set_index[i] for i in indices] labels = [labels[i] for i in indices] # store the results self.image_set_index = image_set_index self.labels = labels
class CocoDataset(Dataset): """COCO Custom Dataset compatible with torch.utils.data.DataLoader.""" def __init__(self, root, json, vocab, transform=None): """Set the path for images, captions and vocabulary wrapper. Args: root: image directory. json: coco annotation file path. vocab: vocabulary wrapper. transform: image transformer. """ self.root = root self.coco = COCO(json) self.ids = list(self.coco.anns.keys()) self.vocab = vocab self.transform = transform def __getitem__(self, index): """Returns one data pair (image and caption).""" ann_id = self.ids[index] img_id = self.coco.anns[ann_id]['image_id'] caption = self.coco.anns[ann_id]['caption'] path = self.coco.loadImgs(img_id)[0]['file_name'] # Load image from disk and perform required transformations image = Image.open(os.path.join(self.root, path)).convert('RGB') if self.transform is not None: image = self.transform(image) # numericalize: convert caption to token ids. tokens = tokenizer(str(caption)) # nltk.tokenize.word_tokenize(str(caption).lower()) caption = [] caption.append(self.vocab('<BOS>')) caption.extend([self.vocab(token) for token in tokens]) caption.append(self.vocab('<EOS>')) target = torch.Tensor(caption) return image, target def __len__(self): return len(self.ids)
print('There are %d images in the training set' % len(imgIds)) annotations = cocoGt.getAnnIds() print('There are %d annotations in the training set' % len(annotations)) # Select random annotation anno_id = annotations[random.randint(0, len(annotations))] anno = cocoGt.loadAnns(ids=anno_id)[0] print('Annotation (id=%d): %s' % (anno_id, anno)) cat = cocoGt.loadCats(ids=anno['category_id'])[0] category_name = cat['name'] print('Object type %s' % category_name) # Show the annotation in its image image = cocoGt.loadImgs(ids=anno['image_id'])[0] file_path = path.join(image_directory, image['file_name']) fig, ax = plt.subplots(1) img = mpimg.imread(file_path) ax.imshow(img) bbox = anno['bbox'] rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], linewidth=2, edgecolor='g', facecolor='none') ax.add_patch(rect)
def plot(data, gt_file, img_path, save_path, link_pairs, ring_color, save=True): # joints coco = COCO(gt_file) coco_dt = coco.loadRes(data) coco_eval = COCOeval(coco, coco_dt, 'keypoints') coco_eval._prepare() gts_ = coco_eval._gts dts_ = coco_eval._dts p = coco_eval.params p.imgIds = list(np.unique(p.imgIds)) if p.useCats: p.catIds = list(np.unique(p.catIds)) p.maxDets = sorted(p.maxDets) # loop through images, area range, max detection number catIds = p.catIds if p.useCats else [-1] threshold = 0 joint_thres = 0.1 imgs = coco.loadImgs(p.imgIds) mean_rmse_list = [] mean_rmse_mask_list = [] for catId in catIds: for imgId in imgs[:3]: # dimension here should be Nxm gts = gts_[imgId['id'], catId] dts = dts_[imgId['id'], catId] if len(gts) != 0 and len(dts) != 0: npgt = np.array(gts[0]["keypoints"]) npdt = np.array(dts[0]["keypoints"]) mask = npdt[2::3] >= joint_thres RMSE = np.sqrt((npgt[0::3] - npdt[0::3]) ** 2 + (npgt[1::3] - npdt[1::3]) ** 2) RMSE_mask = RMSE[mask] mean_rmse = np.round(np.nanmean(RMSE.flatten()), 2) mean_rmse_mask = np.round(np.nanmean(RMSE_mask.flatten()), 2) print(f"mean rmse: {mean_rmse}") print(f"mean rmse mask: {mean_rmse_mask}") mean_rmse_list.append(mean_rmse) mean_rmse_mask_list.append(mean_rmse_mask) inds = np.argsort([-d['score'] for d in dts], kind='mergesort') dts = [dts[i] for i in inds] if len(dts) > p.maxDets[-1]: dts = dts[0:p.maxDets[-1]] if len(gts) == 0 or len(dts) == 0: continue sum_score = 0 num_box = 0 # Read Images img_file = os.path.join(img_path, imgId["file_name"]) # img_file = img_path + img_name + '.jpg' data_numpy = cv2.imread(img_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) h = data_numpy.shape[0] w = data_numpy.shape[1] # Plot fig = plt.figure(figsize=(w / 100, h / 100), dpi=100) ax = plt.subplot(1, 1, 1) bk = plt.imshow(data_numpy[:, :, ::-1]) bk.set_zorder(-1) for j, gt in enumerate(gts): # matching dt_box and gt_box bb = gt['bbox'] x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2 y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2 # create bounds for ignore regions(double the gt bbox) g = np.array(gt['keypoints']) # xg = g[0::3]; yg = g[1::3]; vg = g[2::3] for i, dt in enumerate(dts): # Calculate Bbox IoU dt_bb = dt['bbox'] dt_x0 = dt_bb[0] - dt_bb[2]; dt_x1 = dt_bb[0] + dt_bb[2] * 2 dt_y0 = dt_bb[1] - dt_bb[3]; dt_y1 = dt_bb[1] + dt_bb[3] * 2 ol_x = min(x1, dt_x1) - max(x0, dt_x0) ol_y = min(y1, dt_y1) - max(y0, dt_y0) ol_area = ol_x * ol_y s_x = max(x1, dt_x1) - min(x0, dt_x0) s_y = max(y1, dt_y1) - min(y0, dt_y0) sum_area = s_x * s_y iou = np.round(ol_area / (sum_area + np.spacing(1)), 2) score = np.round(dt['score'], 2) print(f"score: {dt['score']}") if iou < 0.1 or score < threshold: continue else: print(f'iou: {iou}') dt_w = dt_x1 - dt_x0 dt_h = dt_y1 - dt_y0 ref = min(dt_w, dt_h) num_box += 1 sum_score += dt['score'] dt_joints = np.array(dt['keypoints']).reshape(20, -1) joints_dict = map_joint_dict(dt_joints) # print(joints_dict) # print(link_pairs) # print(dt_joints) # stick for k, link_pair in enumerate(link_pairs): if link_pair[0] in joints_dict \ and link_pair[1] in joints_dict: # print(link_pair[0]) # print(vg) if dt_joints[link_pair[0] - 1, 2] < joint_thres \ or dt_joints[link_pair[1] - 1, 2] < joint_thres \ or vg[link_pair[0] - 1] == 0 \ or vg[link_pair[1] - 1] == 0: continue # if k in range(6, 11): # lw = 1 # else: lw = ref / 100. line = mlines.Line2D( np.array([joints_dict[link_pair[0]][0], joints_dict[link_pair[1]][0]]), np.array([joints_dict[link_pair[0]][1], joints_dict[link_pair[1]][1]]), ls='-', lw=lw, alpha=1, color=link_pair[2], ) line.set_zorder(0) ax.add_line(line) # black ring for k in range(dt_joints.shape[0]): if dt_joints[k, 2] < joint_thres \ or vg[link_pair[0]] == 0 \ or vg[link_pair[1]] == 0: continue if dt_joints[k, 0] > w or dt_joints[k, 1] > h: continue # if k in range(5): # radius = 1 # else: radius = ref / 100 circle = mpatches.Circle(tuple(dt_joints[k, :2]), radius=radius, ec='black', fc=ring_color[k], alpha=1, linewidth=1) circle.set_zorder(1) ax.add_patch(circle) avg_score = (sum_score / (num_box + np.spacing(1))) * 1000 plt.gca().xaxis.set_major_locator(plt.NullLocator()) plt.gca().yaxis.set_major_locator(plt.NullLocator()) plt.axis('off') plt.subplots_adjust(top=1, bottom=0, left=0, right=1, hspace=0, wspace=0) plt.margins(0, 0) if save: plt.savefig(save_path + \ 'score_' + str(np.int(avg_score)) + "_" + imgId["file_name"].split(".")[0] + '.png', format='png', bbox_inckes='tight', dpi=100) # plt.savefig(save_path + 'id_' + str(imgId) + '.pdf', format='pdf', # bbox_inckes='tight', dpi=100) # plt.show() plt.close() print(f"total mean rmse: {np.mean(mean_rmse_list)}") print(f"total mean rmse mask: {np.mean(mean_rmse_mask_list)}")
class GOLFDataset(HumanPoseEstimationDataset): """ COCODataset class. """ def __init__( self, root_path="/mldisk/nfs_shared_/dh/golfKeypointDB/data/golfKeypointDB", data_version="train", is_train=True, use_gt_bboxes=True, bbox_path="", image_width=288, image_height=384, color_rgb=True, scale=True, scale_factor=0.35, flip_prob=0.5, rotate_prob=0.5, rotation_factor=45., half_body_prob=0.3, use_different_joints_weight=False, heatmap_sigma=3, soft_nms=False, ): """ Initializes a new COCODataset object. Image and annotation indexes are loaded and stored in memory. Annotations are preprocessed to have a simple list of annotations to iterate over. Bounding boxes can be loaded from the ground truth or from a pickle file (in this case, no annotations are provided). Args: root_path (str): dataset root path. Default: "./datasets/COCO" data_version (str): desired version/folder of COCO. Possible options are "train2017", "val2017". Default: "train2017" is_train (bool): train or eval mode. If true, train mode is used. Default: True use_gt_bboxes (bool): use ground truth bounding boxes. If False, bbox_path is required. Default: True bbox_path (str): bounding boxes pickle file path. Default: "" image_width (int): image width. Default: 288 image_height (int): image height. Default: ``384`` color_rgb (bool): rgb or bgr color mode. If True, rgb color mode is used. Default: True scale (bool): scale mode. Default: True scale_factor (float): scale factor. Default: 0.35 flip_prob (float): flip probability. Default: 0.5 rotate_prob (float): rotate probability. Default: 0.5 rotation_factor (float): rotation factor. Default: 45. half_body_prob (float): half body probability. Default: 0.3 use_different_joints_weight (bool): use different joints weights. If true, the following joints weights will be used: [1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5] Default: False heatmap_sigma (float): sigma of the gaussian used to create the heatmap. Default: 3 soft_nms (bool): enable soft non-maximum suppression. Default: False """ super(GOLFDataset, self).__init__() self.root_path = root_path self.data_version = data_version self.is_train = is_train self.use_gt_bboxes = use_gt_bboxes self.bbox_path = bbox_path self.image_width = image_width self.image_height = image_height self.color_rgb = color_rgb self.scale = scale # ToDo Check self.scale_factor = scale_factor self.flip_prob = flip_prob self.rotate_prob = rotate_prob self.rotation_factor = rotation_factor self.half_body_prob = half_body_prob self.use_different_joints_weight = use_different_joints_weight # ToDo Check self.heatmap_sigma = heatmap_sigma self.soft_nms = soft_nms self.annotation_dir = os.path.join(self.root_path, 'annotations') self.data_path = os.path.join(self.root_path, self.data_version) if self.data_version == 'train': self.annotation_path = os.path.join( self.annotation_dir, 'golfDB_18pts_train_200_2_conf50.json') elif self.data_version == 'val': self.annotation_path = os.path.join(self.annotation_dir, 'golfDB_18pts_val_20.json') self.image_size = (self.image_width, self.image_height) self.aspect_ratio = self.image_width * 1.0 / self.image_height self.heatmap_size = (int(self.image_width / 4), int(self.image_height / 4)) self.heatmap_type = 'gaussian' self.pixel_std = 200 # I don't understand the meaning of pixel_std (=200) in the original implementation self.nof_joints = 18 self.nof_joints_half_body = 8 self.flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] self.upper_body_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] self.lower_body_ids = [11, 12, 13, 14, 15, 16] self.joints_weight = np.asarray([ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5, 1.8 ], dtype=np.float32).reshape( (self.nof_joints, 1)) self.transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # Load COCO dataset - Create COCO object then load images and annotations self.coco = COCO(self.annotation_path) self.imgIds = self.coco.getImgIds() # Create a list of annotations and the corresponding image (each image can contain more than one detection) # Load bboxes and joints # if self.use_gt_bboxes -> Load GT bboxes and joints # else -> Load pre-predicted bboxes by a detector (as YOLOv3) and null joints if not self.use_gt_bboxes: # bboxes must be saved as the original COCO annotations # i.e. the format must be: # bboxes = { # '<imgId>': [ # { # 'id': <annId>, # progressive id for debugging # 'clean_bbox': np.array([<x>, <y>, <w>, <h>])} # }, # ... # ], # ... # } with open(self.bbox_path, 'rb') as fd: bboxes = pickle.load(fd) self.data = [] # load annotations for each image of COCO # import pdb;pdb.set_trace() for imgId in tqdm(self.imgIds): ann_ids = self.coco.getAnnIds(imgIds=imgId, iscrowd=False) img = self.coco.loadImgs(imgId)[0] if self.use_gt_bboxes: objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: # Skip non-person objects (it should never happen) if obj['category_id'] != 1: continue # ignore objs without keypoints annotation if max(obj['keypoints']) == 0: continue x, y, w, h = obj['bbox'] x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((img['width'] - 1, x1 + np.max((0, w - 1)))) y2 = np.min((img['height'] - 1, y1 + np.max((0, h - 1)))) # Use only valid bounding boxes if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs else: objs = bboxes[imgId] # for each annotation of this image, add the formatted annotation to self.data for obj in objs: joints = np.zeros((self.nof_joints, 2), dtype=np.float) joints_visibility = np.ones((self.nof_joints, 2), dtype=np.float) if self.use_gt_bboxes: # COCO pre-processing # # Moved above # # Skip non-person objects (it should never happen) # if obj['category_id'] != 1: # continue # # # ignore objs without keypoints annotation # if max(obj['keypoints']) == 0: # continue for pt in range(self.nof_joints): joints[pt, 0] = obj['keypoints'][pt * 3 + 0] joints[pt, 1] = obj['keypoints'][pt * 3 + 1] t_vis = int(np.clip(obj['keypoints'][pt * 3 + 2], 0, 1)) # ToDo check correctness # COCO: # if visibility == 0 -> keypoint is not in the image. # if visibility == 1 -> keypoint is in the image BUT not visible (e.g. behind an object). # if visibility == 2 -> keypoint looks clearly (i.e. it is not hidden). joints_visibility[pt, 0] = t_vis joints_visibility[pt, 1] = t_vis center, scale = self._box2cs(obj['clean_bbox'][:4]) self.data.append({ 'imgId': imgId, 'annId': obj['id'], 'imgPath': os.path.join(self.root_path, self.data_version, '%06d.jpg' % imgId), 'center': center, 'scale': scale, 'joints': joints, 'joints_visibility': joints_visibility, }) # Done check if we need prepare_data -> We should not # print('\nCOCO dataset loaded!') # Default values self.bbox_thre = 1.0 self.image_thre = 0.0 self.in_vis_thre = 0.2 self.nms_thre = 1.0 self.oks_thre = 0.9 def __len__(self): return len(self.data) def __getitem__(self, index): joints_data = self.data[index].copy() # Read the image from disk # print(joints_data['imgPath']) image = cv2.imread(joints_data['imgPath'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if image is None: raise ValueError('Fail to read %s' % image) joints = joints_data['joints'] joints_vis = joints_data['joints_visibility'] c = joints_data['center'] s = joints_data['scale'] score = joints_data['score'] if 'score' in joints_data else 1 r = 0 # Apply data augmentation if self.is_train: if self.half_body_prob and \ random.random() < self.half_body_prob and \ np.sum(joints_vis[:, 0]) > self.nof_joints_half_body: c_half_body, s_half_body = self._half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor if self.scale: s = s * np.clip( random.random() * sf + 1, 1 - sf, 1 + sf) # A random scale factor in [1 - sf, 1 + sf] if self.rotate_prob and random.random() < self.rotate_prob: r = np.clip(random.random() * rf, -rf * 2, rf * 2) # A random rotation factor in [-2 * rf, 2 * rf] else: r = 0 if self.flip_prob and random.random() < self.flip_prob: image = image[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, image.shape[1], self.flip_pairs) c[0] = image.shape[1] - c[0] - 1 # Apply affine transform on joints and image trans = get_affine_transform(c, s, self.pixel_std, r, self.image_size) #cv2.imwrite( '/home/mmlab/CCTV_Server/1.jpg',image) image = cv2.warpAffine( image, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) #cv2.imwrite('/home/mmlab/CCTV_Server/1_affined.jpg',image) #import pdb;pdb.set_trace() for i in range(self.nof_joints): if joints_vis[i, 0] > 0.: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # Convert image to tensor and normalize if self.transform is not None: # I could remove this check image = self.transform(image) target, target_weight = self._generate_target(joints, joints_vis) # Update metadata joints_data['joints'] = joints joints_data['joints_visibility'] = joints_vis joints_data['center'] = c joints_data['scale'] = s joints_data['rotation'] = r joints_data['score'] = score return image, target.astype(np.float32), target_weight.astype( np.float32), joints_data def evaluate_accuracy(self, output, target, params=None): if params is not None: hm_type = params['hm_type'] thr = params['thr'] accs, avg_acc, cnt, joints_preds, joints_target = evaluate_pck_accuracy( output, target, hm_type, thr) else: accs, avg_acc, cnt, joints_preds, joints_target = evaluate_pck_accuracy( output, target) return accs, avg_acc, cnt, joints_preds, joints_target def evaluate_overall_accuracy(self, predictions, bounding_boxes, image_paths, output_dir, rank=0.): res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): os.makedirs(res_folder) res_file = os.path.join( res_folder, f'golf_keypoints_{self.data_version}_results_{int(rank*100)}.json') # person x (keypoints) _kpts = [] for idx, kpt in enumerate(predictions): # print(image_paths[idx]) _kpts.append({ 'keypoints': kpt, 'center': bounding_boxes[idx][0:2], 'scale': bounding_boxes[idx][2:4], 'area': bounding_boxes[idx][4], 'score': bounding_boxes[idx][5], 'image': int(image_paths[idx][-10:-4]) }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = self.nof_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thre) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) if 'test' not in self.data_version: info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value, name_value['AP'] else: return {'Null': 0}, 0 # Private methods def _box2cs(self, box): x, y, w, h = box[:4] return self._xywh2cs(x, y, w, h) def _xywh2cs(self, x, y, w, h): center = np.zeros((2, ), dtype=np.float32) center[0] = x + w * 0.5 center[1] = y + h * 0.5 if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) if center[0] != -1: scale = scale * 1.25 return center, scale def _half_body_transform(self, joints, joints_vis): upper_joints = [] lower_joints = [] for joint_id in range(self.nof_joints): if joints_vis[joint_id][0] > 0: if joint_id in self.upper_body_ids: upper_joints.append(joints[joint_id]) else: lower_joints.append(joints[joint_id]) if random.random() < 0.5 and len(upper_joints) > 2: selected_joints = upper_joints else: selected_joints = lower_joints \ if len(lower_joints) > 2 else upper_joints if len(selected_joints) < 2: return None, None selected_joints = np.array(selected_joints, dtype=np.float32) center = selected_joints.mean(axis=0)[:2] left_top = np.amin(selected_joints, axis=0) right_bottom = np.amax(selected_joints, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) scale = scale * 1.5 return center, scale def _generate_target(self, joints, joints_vis): """ :param joints: [nof_joints, 3] :param joints_vis: [nof_joints, 3] :return: target, target_weight(1: visible, 0: invisible) """ target_weight = np.ones((self.nof_joints, 1), dtype=np.float32) target_weight[:, 0] = joints_vis[:, 0] if self.heatmap_type == 'gaussian': target = np.zeros( (self.nof_joints, self.heatmap_size[1], self.heatmap_size[0]), dtype=np.float32) tmp_size = self.heatmap_sigma * 3 for joint_id in range(self.nof_joints): feat_stride = np.asarray(self.image_size) / np.asarray( self.heatmap_size) mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) # Check that any part of the gaussian is in-bounds ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ or br[0] < 0 or br[1] < 0: # If not, just return the image as is target_weight[joint_id] = 0 continue # # Generate gaussian size = 2 * tmp_size + 1 x = np.arange(0, size, 1, np.float32) y = x[:, np.newaxis] x0 = y0 = size // 2 # The gaussian is not normalized, we want the center value to equal 1 g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.heatmap_sigma**2)) # Usable gaussian range g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] # Image range img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) v = target_weight[joint_id] if v > 0.5: target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ g[g_y[0]:g_y[1], g_x[0]:g_x[1]] else: raise NotImplementedError if self.use_different_joints_weight: target_weight = np.multiply(target_weight, self.joints_weight) return target, target_weight def _write_coco_keypoint_results(self, keypoints, res_file): data_pack = [{ 'cat_id': 1, # 1 == 'person' 'cls': 'person', 'ann_type': 'keypoints', 'keypoints': keypoints }] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) try: json.load(open(res_file)) except Exception: content = [] with open(res_file, 'r') as f: for line in f: content.append(line) content[-1] = ']' with open(res_file, 'w') as f: for c in content: f.write(c) def _coco_keypoint_results_one_category_kernel(self, data_pack): cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpts[k]['keypoints'] for k in range(len(img_kpts))], dtype=np.float32) key_points = np.zeros((_key_points.shape[0], self.nof_joints * 3), dtype=np.float32) for ipt in range(self.nof_joints): key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. result = [{ 'image_id': img_kpts[k]['image'], 'category_id': cat_id, 'keypoints': list(key_points[k]), 'score': img_kpts[k]['score'].astype(np.float32), 'center': list(img_kpts[k]['center']), 'scale': list(img_kpts[k]['scale']) } for k in range(len(img_kpts))] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') coco_eval.params.useSegm = None coco_eval.params.imgIds = self.coco.getImgIds() # import pdb;pdb.set_trace() coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[ind])) return info_str
labelnames[89] = "hair drier" labelnames[90] = "toothbrush" neededlabels = ["person", "dog", "car", "bicycle"] coco = COCO(annFile) cats = coco.loadCats(coco.getCatIds()) nms = [cat['name'] for cat in cats] imgIds = coco.getImgIds() takeXml = 0 directory = './annotations_pascalformat/' if not os.path.exists(directory): os.makedirs(directory) for n in range(len(imgIds)): img = coco.loadImgs(imgIds[n])[0] annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None) anns = coco.loadAnns(annIds) xml = '<annotation>\n<folder>\nCOCO2014pascalformat\n</folder>\n<filename>' xml += img[ 'file_name'] + '</filename>\n<source>\n<database>\nCOCO2014pascalformat\n</database>\n</source>\n<size>\n' xml += '<width>\n' + str( img['width']) + '\n</width>\n' + '<height>\n' + str( img['height']) + '\n</height>\n' xml += '<depth>\n3\n</depth>\n</size>\n<segmented>\n0\n</segmented>\n' for i in range(len(anns)): if (labelnames[int(anns[i]['category_id'])] in neededlabels): bbox = anns[i]['bbox'] xml += '<object>\n<name>' + str(labelnames[int(
class COCOTest(): def __init__(self, image_set, year): #imdb.__init__(self, 'coco_' + year + '_' + image_set) # COCO specific config options self.config = { 'top_k': 2000, 'use_salt': True, 'cleanup': True, 'crowd_thresh': 0.7, 'min_size': 2 } # name, paths self._year = year self._image_set = image_set self._data_path = osp.join(cfg.DATA_DIR, 'coco') # load COCO API, classes, class <-> id mappings self._COCO = COCO(self._get_ann_file()) cats = self._COCO.loadCats(self._COCO.getCatIds()) #print self._COCO.anns[185487] # anns = [self._COCO.anns[185487]] #self._COCO.showAnns(anns) #image_ids = self._COCO.getImgIds() #print image_ids self.test() def _get_ann_file(self): # prefix = 'instances' if self._image_set.find('test') == -1 \ # else 'image_info' # return osp.join(self._data_path, 'annotations', # prefix + '_' + self._image_set + self._year + '.json') return osp.join(self._data_path, 'annotations', 'person_keypoints_train2014.json') def get_img_file(self, im_ann): return osp.join(self._data_path, 'train2014', im_ann['file_name']) def test(self): image_ids = self._COCO.getImgIds() # print image_ids,'\n,len:',len(image_ids) for i in xrange(len(image_ids)): im_ann = self._COCO.loadImgs(image_ids[i])[0] print '\n:', i width = im_ann['width'] height = im_ann['height'] # print im_ann # print self.get_img_file(im_ann) annIds = self._COCO.getAnnIds(imgIds=image_ids[i], iscrowd=None) objs = self._COCO.loadAnns(annIds) # print annIds,objs im = cv2.imread(self.get_img_file(im_ann)) # Sanitize bboxes -- some are invalid valid_objs = [] im = im[:, :, (2, 1, 0)] im[:, :, :] = (0, 0, 0) #im = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY) fig, ax = plt.subplots(figsize=(12, 12)) for obj in objs: print obj mask = self._COCO.annToMask(obj) #im[mask==0,:]=(0,0,0); im[mask == 1, :] = (255, 255, 255) # for i in range(width): # for j in range(height): # if(mask[i][j] == 0) x1 = np.max((0, obj['bbox'][0])) y1 = np.max((0, obj['bbox'][1])) x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) # print mask.shape,x1,y1,x2,y2,width,height # print 'mask.shape[0]:mask_shape[1]',mask.shape[0],mask.shape[1] start_h = np.round(np.max((1, y1))).astype(np.int) end_h = np.round(np.min((height, y2))).astype(np.int) start_w = np.round(np.max((1, x1))).astype(np.int) end_w = np.round(np.min((width, x2))).astype(np.int) cropped_mask = mask[start_h:end_h, start_w:end_w] # print cropped_mask.shape # resize_mask = cv2.resize(cropped_mask, (28, 28), interpolation=cv2.INTER_NEAREST) # print resize_mask.shape,resize_mask if 'keypoints' in obj: print 'category_id', obj['category_id'] print '\nkeypoints', obj['keypoints'] print '\nlens', obj['num_keypoints']