def _load_all(self, anno_file, shuffle): """ initialize all entries given annotation json file Parameters: ---------- anno_file: str annotation json file shuffle: bool whether to shuffle image list """ image_set_index = [] labels = [] coco = COCO(anno_file) img_ids = coco.getImgIds() for img_id in img_ids: # filename image_info = coco.loadImgs(img_id)[0] filename = image_info["file_name"] subdir = filename.split('_')[1] height = image_info["height"] width = image_info["width"] # label anno_ids = coco.getAnnIds(imgIds=img_id) annos = coco.loadAnns(anno_ids) label = [] for anno in annos: cat_id = int(anno["category_id"]) bbox = anno["bbox"] assert len(bbox) == 4 xmin = float(bbox[0]) / width ymin = float(bbox[1]) / height xmax = xmin + float(bbox[2]) / width ymax = ymin + float(bbox[3]) / height label.append([cat_id, xmin, ymin, xmax, ymax, 0]) if label: labels.append(np.array(label)) image_set_index.append(os.path.join(subdir, filename)) if shuffle: import random indices = list(range(len(image_set_index))) random.shuffle(indices) image_set_index = [image_set_index[i] for i in indices] labels = [labels[i] for i in indices] # store the results self.image_set_index = image_set_index self.labels = labels
from coco import COCO # Ground truth annFile = '/nightowls/annotations/nightowls_training.json' image_directory = '/nightowls/images' cocoGt = COCO(annFile) imgIds = sorted(cocoGt.getImgIds()) print('There are %d images in the training set' % len(imgIds)) annotations = cocoGt.getAnnIds() print('There are %d annotations in the training set' % len(annotations)) # Select random annotation anno_id = annotations[random.randint(0, len(annotations))] anno = cocoGt.loadAnns(ids=anno_id)[0] print('Annotation (id=%d): %s' % (anno_id, anno)) cat = cocoGt.loadCats(ids=anno['category_id'])[0] category_name = cat['name'] print('Object type %s' % category_name) # Show the annotation in its image image = cocoGt.loadImgs(ids=anno['image_id'])[0] file_path = path.join(image_directory, image['file_name']) fig, ax = plt.subplots(1) img = mpimg.imread(file_path) ax.imshow(img)
def processing(args): ann_path = args.ann_path json_path = args.json_path mask_dir = args.mask_dir coco = COCO(ann_path) ids = list(coco.imgs.keys()) lists = [] flielist_fp = open(args.filelist_path, 'w') masklist_fp = open(args.masklist_path, 'w') for i, img_id in enumerate(ids): ann_ids = coco.getAnnIds(imgIds=img_id) img_anns = coco.loadAnns(ann_ids) numPeople = len(img_anns) name = coco.imgs[img_id]['file_name'] height = coco.imgs[img_id]['height'] width = coco.imgs[img_id]['width'] persons = [] person_centers = [] for p in range(numPeople): if img_anns[p]['num_keypoints'] < 5 or img_anns[p]['area'] < 32 * 32: continue kpt = img_anns[p]['keypoints'] dic = dict() # person center person_center = [img_anns[p]['bbox'][0] + img_anns[p]['bbox'][2] / 2.0, img_anns[p]['bbox'][1] + img_anns[p]['bbox'][3] / 2.0] scale = img_anns[p]['bbox'][3] / 368.0 # skip this person if the distance to exiting person is too small flag = 0 for pc in person_centers: dis = math.sqrt((person_center[0] - pc[0]) * (person_center[0] - pc[0]) + (person_center[1] - pc[1]) * (person_center[1] - pc[1])) if dis < pc[2] * 0.3: flag = 1; break if flag == 1: continue dic['objpos'] = person_center dic['keypoints'] = np.zeros((17, 3)).tolist() dic['scale'] = scale for part in range(17): dic['keypoints'][part][0] = kpt[part * 3] dic['keypoints'][part][1] = kpt[part * 3 + 1] # visiable is 1, unvisiable is 0 and not labeled is 2 if kpt[part * 3 + 2] == 2: dic['keypoints'][part][2] = 1 elif kpt[part * 3 + 2] == 1: dic['keypoints'][part][2] = 0 else: dic['keypoints'][part][2] = 2 persons.append(dic) person_centers.append(np.append(person_center, max(img_anns[p]['bbox'][2], img_anns[p]['bbox'][3]))) if len(persons) > 0: filelist_fp.write(name + '\n') info = dict() info['filename'] = name info['info'] = [] cnt = 1 for person in persons: dic = dict() dic['pos'] = person['objpos'] dic['keypoints'] = np.zeros((18,3)).tolist() dic['scale'] = person['scale'] for i in range(17): dic['keypoints'][COCO_TO_OURS[i]][0] = person['keypoints'][i][0] dic['keypoints'][COCO_TO_OURS[i]][1] = person['keypoints'][i][1] dic['keypoints'][COCO_TO_OURS[i]][2] = person['keypoints'][i][2] dic['keypoints'][1][0] = (person['keypoints'][5][0] + person['keypoints'][6][0]) * 0.5 dic['keypoints'][1][1] = (person['keypoints'][5][1] + person['keypoints'][6][1]) * 0.5 if person['keypoints'][5][2] == person['keypoints'][6][2]: dic['keypoints'][1][2] = person['keypoints'][5][2] elif person['keypoints'][5][2] == 2 or person['keypoints'][6][2] == 2: dic['keypoints'][1][2] = 2 else: dic['keypoints'][1][2] = 0 info['info'].append(dic) lists.append(info) mask_all = np.zeros((height, width), dtype=np.uint8) mask_miss = np.zeros((height, width), dtype=np.uint8) flag = 0 for p in img_anns: if p['iscrowd'] == 1: mask_crowd = coco.annToMask(p) temp = np.bitwise_and(mask_all, mask_crowd) mask_crowd = mask_crowd - temp flag += 1 continue else: mask = coco.annToMask(p) mask_all = np.bitwise_or(mask, mask_all) if p['num_keypoints'] <= 0: mask_miss = np.bitwise_or(mask, mask_miss) if flag < 1: mask_miss = np.logical_not(mask_miss) elif flag == 1: mask_miss = np.logical_not(np.bitwise_or(mask_miss, mask_crowd)) mask_all = np.bitwise_or(mask_all, mask_crowd) else: raise Exception('crowd segments > 1') np.save(os.path.join(mask_dir, name.split('.')[0] + '.npy'), mask_miss) masklist_fp.write(os.path.join(mask_dir, name.split('.')[0] + '.npy') + '\n') if i % 1000 == 0: print "Processed {} of {}".format(i, len(ids)) masklist_fp.close() filelist_fp.close() print 'write json file' fp = open(json_path, 'w') fp.write(json.dumps(lists)) fp.close() print 'done!'
class GOLFDataset(HumanPoseEstimationDataset): """ COCODataset class. """ def __init__( self, root_path="/mldisk/nfs_shared_/dh/golfKeypointDB/data/golfKeypointDB", data_version="train", is_train=True, use_gt_bboxes=True, bbox_path="", image_width=288, image_height=384, color_rgb=True, scale=True, scale_factor=0.35, flip_prob=0.5, rotate_prob=0.5, rotation_factor=45., half_body_prob=0.3, use_different_joints_weight=False, heatmap_sigma=3, soft_nms=False, ): """ Initializes a new COCODataset object. Image and annotation indexes are loaded and stored in memory. Annotations are preprocessed to have a simple list of annotations to iterate over. Bounding boxes can be loaded from the ground truth or from a pickle file (in this case, no annotations are provided). Args: root_path (str): dataset root path. Default: "./datasets/COCO" data_version (str): desired version/folder of COCO. Possible options are "train2017", "val2017". Default: "train2017" is_train (bool): train or eval mode. If true, train mode is used. Default: True use_gt_bboxes (bool): use ground truth bounding boxes. If False, bbox_path is required. Default: True bbox_path (str): bounding boxes pickle file path. Default: "" image_width (int): image width. Default: 288 image_height (int): image height. Default: ``384`` color_rgb (bool): rgb or bgr color mode. If True, rgb color mode is used. Default: True scale (bool): scale mode. Default: True scale_factor (float): scale factor. Default: 0.35 flip_prob (float): flip probability. Default: 0.5 rotate_prob (float): rotate probability. Default: 0.5 rotation_factor (float): rotation factor. Default: 45. half_body_prob (float): half body probability. Default: 0.3 use_different_joints_weight (bool): use different joints weights. If true, the following joints weights will be used: [1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5] Default: False heatmap_sigma (float): sigma of the gaussian used to create the heatmap. Default: 3 soft_nms (bool): enable soft non-maximum suppression. Default: False """ super(GOLFDataset, self).__init__() self.root_path = root_path self.data_version = data_version self.is_train = is_train self.use_gt_bboxes = use_gt_bboxes self.bbox_path = bbox_path self.image_width = image_width self.image_height = image_height self.color_rgb = color_rgb self.scale = scale # ToDo Check self.scale_factor = scale_factor self.flip_prob = flip_prob self.rotate_prob = rotate_prob self.rotation_factor = rotation_factor self.half_body_prob = half_body_prob self.use_different_joints_weight = use_different_joints_weight # ToDo Check self.heatmap_sigma = heatmap_sigma self.soft_nms = soft_nms self.annotation_dir = os.path.join(self.root_path, 'annotations') self.data_path = os.path.join(self.root_path, self.data_version) if self.data_version == 'train': self.annotation_path = os.path.join( self.annotation_dir, 'golfDB_18pts_train_200_2_conf50.json') elif self.data_version == 'val': self.annotation_path = os.path.join(self.annotation_dir, 'golfDB_18pts_val_20.json') self.image_size = (self.image_width, self.image_height) self.aspect_ratio = self.image_width * 1.0 / self.image_height self.heatmap_size = (int(self.image_width / 4), int(self.image_height / 4)) self.heatmap_type = 'gaussian' self.pixel_std = 200 # I don't understand the meaning of pixel_std (=200) in the original implementation self.nof_joints = 18 self.nof_joints_half_body = 8 self.flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12], [13, 14], [15, 16]] self.upper_body_ids = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] self.lower_body_ids = [11, 12, 13, 14, 15, 16] self.joints_weight = np.asarray([ 1., 1., 1., 1., 1., 1., 1., 1.2, 1.2, 1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5, 1.8 ], dtype=np.float32).reshape( (self.nof_joints, 1)) self.transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # Load COCO dataset - Create COCO object then load images and annotations self.coco = COCO(self.annotation_path) self.imgIds = self.coco.getImgIds() # Create a list of annotations and the corresponding image (each image can contain more than one detection) # Load bboxes and joints # if self.use_gt_bboxes -> Load GT bboxes and joints # else -> Load pre-predicted bboxes by a detector (as YOLOv3) and null joints if not self.use_gt_bboxes: # bboxes must be saved as the original COCO annotations # i.e. the format must be: # bboxes = { # '<imgId>': [ # { # 'id': <annId>, # progressive id for debugging # 'clean_bbox': np.array([<x>, <y>, <w>, <h>])} # }, # ... # ], # ... # } with open(self.bbox_path, 'rb') as fd: bboxes = pickle.load(fd) self.data = [] # load annotations for each image of COCO # import pdb;pdb.set_trace() for imgId in tqdm(self.imgIds): ann_ids = self.coco.getAnnIds(imgIds=imgId, iscrowd=False) img = self.coco.loadImgs(imgId)[0] if self.use_gt_bboxes: objs = self.coco.loadAnns(ann_ids) # sanitize bboxes valid_objs = [] for obj in objs: # Skip non-person objects (it should never happen) if obj['category_id'] != 1: continue # ignore objs without keypoints annotation if max(obj['keypoints']) == 0: continue x, y, w, h = obj['bbox'] x1 = np.max((0, x)) y1 = np.max((0, y)) x2 = np.min((img['width'] - 1, x1 + np.max((0, w - 1)))) y2 = np.min((img['height'] - 1, y1 + np.max((0, h - 1)))) # Use only valid bounding boxes if obj['area'] > 0 and x2 >= x1 and y2 >= y1: obj['clean_bbox'] = [x1, y1, x2 - x1, y2 - y1] valid_objs.append(obj) objs = valid_objs else: objs = bboxes[imgId] # for each annotation of this image, add the formatted annotation to self.data for obj in objs: joints = np.zeros((self.nof_joints, 2), dtype=np.float) joints_visibility = np.ones((self.nof_joints, 2), dtype=np.float) if self.use_gt_bboxes: # COCO pre-processing # # Moved above # # Skip non-person objects (it should never happen) # if obj['category_id'] != 1: # continue # # # ignore objs without keypoints annotation # if max(obj['keypoints']) == 0: # continue for pt in range(self.nof_joints): joints[pt, 0] = obj['keypoints'][pt * 3 + 0] joints[pt, 1] = obj['keypoints'][pt * 3 + 1] t_vis = int(np.clip(obj['keypoints'][pt * 3 + 2], 0, 1)) # ToDo check correctness # COCO: # if visibility == 0 -> keypoint is not in the image. # if visibility == 1 -> keypoint is in the image BUT not visible (e.g. behind an object). # if visibility == 2 -> keypoint looks clearly (i.e. it is not hidden). joints_visibility[pt, 0] = t_vis joints_visibility[pt, 1] = t_vis center, scale = self._box2cs(obj['clean_bbox'][:4]) self.data.append({ 'imgId': imgId, 'annId': obj['id'], 'imgPath': os.path.join(self.root_path, self.data_version, '%06d.jpg' % imgId), 'center': center, 'scale': scale, 'joints': joints, 'joints_visibility': joints_visibility, }) # Done check if we need prepare_data -> We should not # print('\nCOCO dataset loaded!') # Default values self.bbox_thre = 1.0 self.image_thre = 0.0 self.in_vis_thre = 0.2 self.nms_thre = 1.0 self.oks_thre = 0.9 def __len__(self): return len(self.data) def __getitem__(self, index): joints_data = self.data[index].copy() # Read the image from disk # print(joints_data['imgPath']) image = cv2.imread(joints_data['imgPath'], cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if self.color_rgb: image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) if image is None: raise ValueError('Fail to read %s' % image) joints = joints_data['joints'] joints_vis = joints_data['joints_visibility'] c = joints_data['center'] s = joints_data['scale'] score = joints_data['score'] if 'score' in joints_data else 1 r = 0 # Apply data augmentation if self.is_train: if self.half_body_prob and \ random.random() < self.half_body_prob and \ np.sum(joints_vis[:, 0]) > self.nof_joints_half_body: c_half_body, s_half_body = self._half_body_transform( joints, joints_vis) if c_half_body is not None and s_half_body is not None: c, s = c_half_body, s_half_body sf = self.scale_factor rf = self.rotation_factor if self.scale: s = s * np.clip( random.random() * sf + 1, 1 - sf, 1 + sf) # A random scale factor in [1 - sf, 1 + sf] if self.rotate_prob and random.random() < self.rotate_prob: r = np.clip(random.random() * rf, -rf * 2, rf * 2) # A random rotation factor in [-2 * rf, 2 * rf] else: r = 0 if self.flip_prob and random.random() < self.flip_prob: image = image[:, ::-1, :] joints, joints_vis = fliplr_joints(joints, joints_vis, image.shape[1], self.flip_pairs) c[0] = image.shape[1] - c[0] - 1 # Apply affine transform on joints and image trans = get_affine_transform(c, s, self.pixel_std, r, self.image_size) #cv2.imwrite( '/home/mmlab/CCTV_Server/1.jpg',image) image = cv2.warpAffine( image, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) #cv2.imwrite('/home/mmlab/CCTV_Server/1_affined.jpg',image) #import pdb;pdb.set_trace() for i in range(self.nof_joints): if joints_vis[i, 0] > 0.: joints[i, 0:2] = affine_transform(joints[i, 0:2], trans) # Convert image to tensor and normalize if self.transform is not None: # I could remove this check image = self.transform(image) target, target_weight = self._generate_target(joints, joints_vis) # Update metadata joints_data['joints'] = joints joints_data['joints_visibility'] = joints_vis joints_data['center'] = c joints_data['scale'] = s joints_data['rotation'] = r joints_data['score'] = score return image, target.astype(np.float32), target_weight.astype( np.float32), joints_data def evaluate_accuracy(self, output, target, params=None): if params is not None: hm_type = params['hm_type'] thr = params['thr'] accs, avg_acc, cnt, joints_preds, joints_target = evaluate_pck_accuracy( output, target, hm_type, thr) else: accs, avg_acc, cnt, joints_preds, joints_target = evaluate_pck_accuracy( output, target) return accs, avg_acc, cnt, joints_preds, joints_target def evaluate_overall_accuracy(self, predictions, bounding_boxes, image_paths, output_dir, rank=0.): res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): os.makedirs(res_folder) res_file = os.path.join( res_folder, f'golf_keypoints_{self.data_version}_results_{int(rank*100)}.json') # person x (keypoints) _kpts = [] for idx, kpt in enumerate(predictions): # print(image_paths[idx]) _kpts.append({ 'keypoints': kpt, 'center': bounding_boxes[idx][0:2], 'scale': bounding_boxes[idx][2:4], 'area': bounding_boxes[idx][4], 'score': bounding_boxes[idx][5], 'image': int(image_paths[idx][-10:-4]) }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = self.nof_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thre) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) if 'test' not in self.data_version: info_str = self._do_python_keypoint_eval(res_file) name_value = OrderedDict(info_str) return name_value, name_value['AP'] else: return {'Null': 0}, 0 # Private methods def _box2cs(self, box): x, y, w, h = box[:4] return self._xywh2cs(x, y, w, h) def _xywh2cs(self, x, y, w, h): center = np.zeros((2, ), dtype=np.float32) center[0] = x + w * 0.5 center[1] = y + h * 0.5 if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) if center[0] != -1: scale = scale * 1.25 return center, scale def _half_body_transform(self, joints, joints_vis): upper_joints = [] lower_joints = [] for joint_id in range(self.nof_joints): if joints_vis[joint_id][0] > 0: if joint_id in self.upper_body_ids: upper_joints.append(joints[joint_id]) else: lower_joints.append(joints[joint_id]) if random.random() < 0.5 and len(upper_joints) > 2: selected_joints = upper_joints else: selected_joints = lower_joints \ if len(lower_joints) > 2 else upper_joints if len(selected_joints) < 2: return None, None selected_joints = np.array(selected_joints, dtype=np.float32) center = selected_joints.mean(axis=0)[:2] left_top = np.amin(selected_joints, axis=0) right_bottom = np.amax(selected_joints, axis=0) w = right_bottom[0] - left_top[0] h = right_bottom[1] - left_top[1] if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio scale = np.array([w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std], dtype=np.float32) scale = scale * 1.5 return center, scale def _generate_target(self, joints, joints_vis): """ :param joints: [nof_joints, 3] :param joints_vis: [nof_joints, 3] :return: target, target_weight(1: visible, 0: invisible) """ target_weight = np.ones((self.nof_joints, 1), dtype=np.float32) target_weight[:, 0] = joints_vis[:, 0] if self.heatmap_type == 'gaussian': target = np.zeros( (self.nof_joints, self.heatmap_size[1], self.heatmap_size[0]), dtype=np.float32) tmp_size = self.heatmap_sigma * 3 for joint_id in range(self.nof_joints): feat_stride = np.asarray(self.image_size) / np.asarray( self.heatmap_size) mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5) mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5) # Check that any part of the gaussian is in-bounds ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)] br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)] if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \ or br[0] < 0 or br[1] < 0: # If not, just return the image as is target_weight[joint_id] = 0 continue # # Generate gaussian size = 2 * tmp_size + 1 x = np.arange(0, size, 1, np.float32) y = x[:, np.newaxis] x0 = y0 = size // 2 # The gaussian is not normalized, we want the center value to equal 1 g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.heatmap_sigma**2)) # Usable gaussian range g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0] g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1] # Image range img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0]) img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1]) v = target_weight[joint_id] if v > 0.5: target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \ g[g_y[0]:g_y[1], g_x[0]:g_x[1]] else: raise NotImplementedError if self.use_different_joints_weight: target_weight = np.multiply(target_weight, self.joints_weight) return target, target_weight def _write_coco_keypoint_results(self, keypoints, res_file): data_pack = [{ 'cat_id': 1, # 1 == 'person' 'cls': 'person', 'ann_type': 'keypoints', 'keypoints': keypoints }] results = self._coco_keypoint_results_one_category_kernel(data_pack[0]) with open(res_file, 'w') as f: json.dump(results, f, sort_keys=True, indent=4) try: json.load(open(res_file)) except Exception: content = [] with open(res_file, 'r') as f: for line in f: content.append(line) content[-1] = ']' with open(res_file, 'w') as f: for c in content: f.write(c) def _coco_keypoint_results_one_category_kernel(self, data_pack): cat_id = data_pack['cat_id'] keypoints = data_pack['keypoints'] cat_results = [] for img_kpts in keypoints: if len(img_kpts) == 0: continue _key_points = np.array( [img_kpts[k]['keypoints'] for k in range(len(img_kpts))], dtype=np.float32) key_points = np.zeros((_key_points.shape[0], self.nof_joints * 3), dtype=np.float32) for ipt in range(self.nof_joints): key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0] key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1] key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2] # keypoints score. result = [{ 'image_id': img_kpts[k]['image'], 'category_id': cat_id, 'keypoints': list(key_points[k]), 'score': img_kpts[k]['score'].astype(np.float32), 'center': list(img_kpts[k]['center']), 'scale': list(img_kpts[k]['scale']) } for k in range(len(img_kpts))] cat_results.extend(result) return cat_results def _do_python_keypoint_eval(self, res_file): coco_dt = self.coco.loadRes(res_file) coco_eval = COCOeval(self.coco, coco_dt, 'keypoints') coco_eval.params.useSegm = None coco_eval.params.imgIds = self.coco.getImgIds() # import pdb;pdb.set_trace() coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() stats_names = [ 'AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)' ] info_str = [] for ind, name in enumerate(stats_names): info_str.append((name, coco_eval.stats[ind])) return info_str
neededlabels = ["person", "dog", "car", "bicycle"] coco = COCO(annFile) cats = coco.loadCats(coco.getCatIds()) nms = [cat['name'] for cat in cats] imgIds = coco.getImgIds() takeXml = 0 directory = './annotations_pascalformat/' if not os.path.exists(directory): os.makedirs(directory) for n in range(len(imgIds)): img = coco.loadImgs(imgIds[n])[0] annIds = coco.getAnnIds(imgIds=img['id'], iscrowd=None) anns = coco.loadAnns(annIds) xml = '<annotation>\n<folder>\nCOCO2014pascalformat\n</folder>\n<filename>' xml += img[ 'file_name'] + '</filename>\n<source>\n<database>\nCOCO2014pascalformat\n</database>\n</source>\n<size>\n' xml += '<width>\n' + str( img['width']) + '\n</width>\n' + '<height>\n' + str( img['height']) + '\n</height>\n' xml += '<depth>\n3\n</depth>\n</size>\n<segmented>\n0\n</segmented>\n' for i in range(len(anns)): if (labelnames[int(anns[i]['category_id'])] in neededlabels): bbox = anns[i]['bbox'] xml += '<object>\n<name>' + str(labelnames[int( anns[i]['category_id'])]) + '</name>\n' xml += '<bndbox>\n<xmin>\n' + str(int(round(
class COCOTest(): def __init__(self, image_set, year): #imdb.__init__(self, 'coco_' + year + '_' + image_set) # COCO specific config options self.config = { 'top_k': 2000, 'use_salt': True, 'cleanup': True, 'crowd_thresh': 0.7, 'min_size': 2 } # name, paths self._year = year self._image_set = image_set self._data_path = osp.join(cfg.DATA_DIR, 'coco') # load COCO API, classes, class <-> id mappings self._COCO = COCO(self._get_ann_file()) cats = self._COCO.loadCats(self._COCO.getCatIds()) #print self._COCO.anns[185487] # anns = [self._COCO.anns[185487]] #self._COCO.showAnns(anns) #image_ids = self._COCO.getImgIds() #print image_ids self.test() def _get_ann_file(self): # prefix = 'instances' if self._image_set.find('test') == -1 \ # else 'image_info' # return osp.join(self._data_path, 'annotations', # prefix + '_' + self._image_set + self._year + '.json') return osp.join(self._data_path, 'annotations', 'person_keypoints_train2014.json') def get_img_file(self, im_ann): return osp.join(self._data_path, 'train2014', im_ann['file_name']) def test(self): image_ids = self._COCO.getImgIds() # print image_ids,'\n,len:',len(image_ids) for i in xrange(len(image_ids)): im_ann = self._COCO.loadImgs(image_ids[i])[0] print '\n:', i width = im_ann['width'] height = im_ann['height'] # print im_ann # print self.get_img_file(im_ann) annIds = self._COCO.getAnnIds(imgIds=image_ids[i], iscrowd=None) objs = self._COCO.loadAnns(annIds) # print annIds,objs im = cv2.imread(self.get_img_file(im_ann)) # Sanitize bboxes -- some are invalid valid_objs = [] im = im[:, :, (2, 1, 0)] im[:, :, :] = (0, 0, 0) #im = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY) fig, ax = plt.subplots(figsize=(12, 12)) for obj in objs: print obj mask = self._COCO.annToMask(obj) #im[mask==0,:]=(0,0,0); im[mask == 1, :] = (255, 255, 255) # for i in range(width): # for j in range(height): # if(mask[i][j] == 0) x1 = np.max((0, obj['bbox'][0])) y1 = np.max((0, obj['bbox'][1])) x2 = np.min((width - 1, x1 + np.max((0, obj['bbox'][2] - 1)))) y2 = np.min((height - 1, y1 + np.max((0, obj['bbox'][3] - 1)))) # print mask.shape,x1,y1,x2,y2,width,height # print 'mask.shape[0]:mask_shape[1]',mask.shape[0],mask.shape[1] start_h = np.round(np.max((1, y1))).astype(np.int) end_h = np.round(np.min((height, y2))).astype(np.int) start_w = np.round(np.max((1, x1))).astype(np.int) end_w = np.round(np.min((width, x2))).astype(np.int) cropped_mask = mask[start_h:end_h, start_w:end_w] # print cropped_mask.shape # resize_mask = cv2.resize(cropped_mask, (28, 28), interpolation=cv2.INTER_NEAREST) # print resize_mask.shape,resize_mask if 'keypoints' in obj: print 'category_id', obj['category_id'] print '\nkeypoints', obj['keypoints'] print '\nlens', obj['num_keypoints']
# initialize COCO api for instance annotations coco = COCO(annFile) # get the image Ids: imgIds = coco.getImgIds() s = len(imgIds) out = np.zeros(shape=(5 * s, 201)) count = 0 for imgId in imgIds: #get the Id of the annotation corresponding to the image Id: annIds = coco.getAnnIds(imgIds=imgId, iscrowd=None) #get the annotation: anotacao = coco.loadAnns(ids=annIds) for k in range(5): out[count, :] = assemble_output_vector2(anotacao) count += 1 # Saving the objects: with open('training_target', 'w') as f: pickle.dump(out, f) # initialize COCO api for caption annotations annFile = 'captions_%s.json' % (dataType) caps = COCO(annFile) with open('training_sentences.csv', 'wb') as f: wri = csv.writer(f, skipinitialspace=True)