def gt_db(self, is_train): cache_file = os.path.join(self.cache_path, self.name + '_gt_db.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: db = pickle.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format( self.name, cache_file, len(db))) return db if is_train: img_list, window_anno_list, shape_list = self.parse_gt_file( os.path.join(self.dataset_path, 'TRAIN', self.benchmark_name)) else: img_list, window_anno_list, shape_list = self.parse_gt_file( os.path.join(self.dataset_path, 'TEST')) gt_db = list() for n_img in range(len(img_list)): image_path = img_list[n_img] the_sample_window = np.array(window_anno_list[n_img], dtype=np.float) im_height, im_width, _ = shape_list[n_img] if len( the_sample_window ) > max_num_windows: #exclude windows whose num exceeds max_num_windows continue left_top = the_sample_window[:, 0:2].copy() left_bottom = the_sample_window[:, 2:4].copy() right_bottom = the_sample_window[:, 4:6].copy() right_top = the_sample_window[:, 6:8].copy() center = the_sample_window.reshape((-1, 4, 2)).mean(axis=1) the_sample_window = the_sample_window.reshape( (the_sample_window.shape[0], 4, 2)) gt_db.append({ 'image': image_path, 'left_top': left_top, 'left_bottom': left_bottom, 'right_bottom': right_bottom, 'right_top': right_top, 'center': center, 'windows': np.array(the_sample_window), 'im_width': im_width, 'im_height': im_height }) DEBUG = False if DEBUG: debug_vis(image_path, (left_top, left_bottom, right_bottom, right_top), 'Dataset Parsing') with open(cache_file, 'wb') as fid: pickle.dump(gt_db, fid, pickle.HIGHEST_PROTOCOL) print('{} samples ared wrote {}'.format(len(gt_db), cache_file)) return gt_db
def get_single_patch_sample(img_path, windows, flip_pairs, patch_width, patch_height, mean, std, do_augment, aug_config, label_func, label_config): # 1. load image cvimg = cv2.imread(img_path, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION) if not isinstance(cvimg, np.ndarray): raise IOError("Fail to read %s" % img_path) img_height, img_width, img_channels = cvimg.shape #original shape # 3. get augmentation params if do_augment: scale, rot, center, do_flip, color_scale = do_augmentation(aug_config) else: scale, rot, center, do_flip, color_scale = 1.0, 0, np.zeros(2), False, [1.0, 1.0, 1.0] # 4. generate image patch aug_param = [scale, rot, center] img_patch_cv, trans = generate_patch_image_cv(cvimg.copy(), img_width, img_height, patch_width, patch_height, do_flip, aug_param) img_patch_tensor = convert_cvimg_to_tensor(img_patch_cv) # apply normalization for n_c in range(img_channels): img_patch_tensor[n_c, :, :] = np.clip(img_patch_tensor[n_c, :, :] * color_scale[n_c], 0, 255) if aug_config.use_color_normalize and mean is not None and std is not None: img_patch_tensor[n_c, :, :] = (img_patch_tensor[n_c, :, :] - mean[n_c]) / std[n_c] # 5. generate patch joint ground truth,flip joints if do_flip: windows = fliplr_label(windows, img_width, flip_pairs) # 6. Apply Affine Transform on joints for idx in range(len(windows)): for n_jt in range(len(windows[idx])): windows[idx][n_jt, :] = trans_point2d(windows[idx][n_jt, :], trans) # 7. get label of some type according to certain need label = label_func(label_config, patch_width, patch_height, windows) # 8. get gt loc for AE method gt_loc = np.zeros((10, 4, 2)) if label_config.useAE: gt_loc = np.transpose(np.array(windows[0: 4]), (1, 0, 2)) gt_loc = (gt_loc / label_config.feat_stride + 0.5).astype(int) hm_size = patch_height // label_config.feat_stride gt_loc = KeypointsRef(gt_loc, hm_size) VIS = False if VIS: debug_vis(img_patch_cv, windows, label=label, raw_img = cvimg) return img_patch_tensor, label, gt_loc
def dt_db(self, det_bbox_src): print("Using Detector:", det_bbox_src) self.detector = det_bbox_src folders, sample_num, step, folder_start, folder_end = self._sample_dataset( self.image_set_name) dt_cache_file = os.path.join( self.cache_path, self.name + '_keypoint_dt_db_sample' + str(sample_num) + '.pkl') if os.path.exists(dt_cache_file): with open(dt_cache_file, 'rb') as fid: dt_db = pk.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format( self.name, dt_cache_file, len(dt_db))) return dt_db gt_cache_file = os.path.join( self.cache_path, self.name + '_keypoint_db_sample' + str(sample_num) + '.pkl') if os.path.exists(gt_cache_file): with open(gt_cache_file, 'rb') as fid: gt_db = pk.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format( self.name, gt_cache_file, len(gt_db))) else: assert 0, gt_cache_file + ' not exist...' self.num_sample_single = len(gt_db) self.mean_bone_length = np.asarray( [item['bone_len'] for item in gt_db]).mean() # update bbox using detection result print("Updating BBox from detector") bbox_file = os.path.join(self.cache_path, 'detection', det_bbox_src, 'kpts_bbox.pkl') with open(bbox_file, 'rb') as fid: bbox_list = pk.load(fid) assert len(bbox_list) == len(gt_db) for idx in range(len(gt_db)): box = bbox_list[idx] center_x = (box[0] + box[2]) * 0.5 center_y = (box[1] + box[3]) * 0.5 width = box[2] - box[0] height = box[3] - box[1] if width > self.aspect_ratio * height: height = width * 1.0 / self.aspect_ratio elif width < self.aspect_ratio * height: width = height * self.aspect_ratio width = width * 1.25 height = height * 1.25 gt_db[idx]['center_x'] = center_x gt_db[idx]['center_y'] = center_y gt_db[idx]['width'] = width gt_db[idx]['height'] = height DEBUG = False if DEBUG: box = [center_x, center_y, width, height] pose = [] debug_vis(os.path.join(gt_db[idx]['image']), box, pose) self.num_sample_single = len(gt_db) return gt_db
def dt_db(self, det_bbox_src): ''' This function is to organize image path, maskRCNN bbox, etc into data structure, for the purpose of predicting 2d pose by a mpii pose estimator. So only image&bbox related are useful, others by default set to zero :param det_bbox_src: :return: ''' self.detector = det_bbox_src cache_file = '{}_bbox_dt_{}_db.pkl'.format(self.name, self.detector) cache_file = os.path.join(self.cache_path, cache_file) db = None if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: db = pk.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format(self.name, cache_file, len(db))) if db != None: self.num_sample_single = len(db) return db dt_db = [] for idx in range(len(self.image_names)): img_path = os.path.join(self.dataset_path, self.image_set_name, 'IMG', '%05d.jpg' % (idx + 1)) bbox_file = os.path.join(self.dataset_path, self.image_set_name, 'detection', self.detector, '%05d.pkl' % (idx + 1)) # process bbox with open(bbox_file, 'rb') as fid: bbox = pk.load(fid) assert len(bbox) == 1, "Cannot be %d bbox for image %s"%(len(bbox), img_path) box = bbox[0] center_x = (box[0] + box[2]) * 0.5 center_y = (box[1] + box[3]) * 0.5 score = box[4] width = box[2] - box[0] height = box[3] - box[1] if width > self.aspect_ratio * height: height = width * 1.0 / self.aspect_ratio elif width < self.aspect_ratio * height: width = height * self.aspect_ratio width = width * 1.1 height = height * 1.1 dt_db.append({ 'image': img_path, 'flip_pairs': self.flip_pairs, 'parent_ids': self.parent_ids, # joint, useless 'joints_3d': np.zeros((18,3)), # [org_img_x, org_img_y, depth - root_depth] 'joints_3d_vis': np.zeros((18,3)), 'joints_3d_relative': np.zeros((18,3)), # [X-root, Y-root, Z-root] in camera coordinate, substracted by root 'bone_len':0, # bbox 'center_x': center_x, 'center_y': center_y, 'width': width, 'height': height, 'score': score, }) DEBUG = False if DEBUG: bbox = [center_x, center_y, width, height] pose = [] debug_vis(img_path, bbox, pose) with open(cache_file, 'wb') as fid: pk.dump(dt_db, fid, pk.HIGHEST_PROTOCOL) print('{} samples ared wrote {}'.format(len(dt_db), cache_file)) self.num_sample_single = len(dt_db) return dt_db
def jnt_bbox_db_core(name, cache_path, dataset_path, image_set_name, image_names, joint_num, aspect_ratio , flip_pairs, parent_ids): ''' This function is to 1)get aligned 2d pose; 2)record align params; 3)generate bbox around aligned 2d pose; 4)calc skeleton length It's the very db used to train and val :return: ''' cache_file = '{}_keypoint_jnt_bbox_db.pkl'.format(name) cache_file = os.path.join(cache_path, cache_file) db = None if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: db = pk.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format(name, cache_file, len(db))) if db != None: return db img_pred_file = '{}_{}samples_img_res.pkl'.format('HM36_eccv_challenge_' + image_set_name, len(image_names)) img_pred_file = os.path.join(dataset_path, image_set_name, img_pred_file) with open(img_pred_file, 'rb') as fid: img_pred = pk.load(fid) dt_db = [] for idx in range(len(image_names)): img_path = os.path.join(dataset_path, image_set_name, 'IMG', '%05d.jpg' % (idx + 1)) pred_pose_in_img_wz_score = img_pred[idx]["kpts"] # 18x3, already in hm36 skeleton structure pred_pose_vis = img_pred[idx]["vis"] if image_set_name == 'Test': # only thing need to do: generate bbox around kpts mask = np.where(pred_pose_vis[:, 0] > 0) # only align visible joints u, d, l, r = calc_kpt_bound(pred_pose_in_img_wz_score[mask[0], 0:2], pred_pose_vis[mask[0], 0:2]) align_joints_2d_wz = joints_2d_vis = gtPose = np.zeros((18, 3)) skeleton_length = s = rot = t = 0 elif image_set_name in ['Train', 'Val']: # process pose gt_file = os.path.join(dataset_path, image_set_name, 'POSE', '%05d.csv' % (idx + 1)) gtPose = genfromtxt(gt_file, delimiter=',') # add thorax if joint_num == s_36_jt_num: thorax = (gtPose[s_36_lsh_jt_idx] + gtPose[s_36_rsh_jt_idx]) * 0.5 thorax = thorax.reshape((1, 3)) gtPose = np.concatenate((gtPose, thorax), axis=0) assert len(gtPose) == s_36_jt_num, "#Joint Must be 18, Now #Joint %d" % len(gtPose) # align mask = np.where(pred_pose_vis[:, 0] > 0) # only align visible joints target_pose = pred_pose_in_img_wz_score[mask[0], 0:2] from_pose = gtPose[mask[0], 0:2] _, Z, rot, s, t = compute_similarity_transform(target_pose, from_pose, compute_optimal_scale=True) align_joints_2d_wz = s * gtPose[:, 0:2].dot(rot) + t align_joints_2d_wz = np.concatenate((align_joints_2d_wz, gtPose[:, 2:3] * s), axis=1) joints_2d_vis = np.ones(align_joints_2d_wz.shape, dtype=np.float) # other skeleton_length = calc_total_skeleton_length(gtPose, s_36_parent_ids) # generate bbox u, d, l, r = calc_kpt_bound(align_joints_2d_wz, joints_2d_vis) center_x = (l + r) * 0.5 center_y = (u + d) * 0.5 assert center_x >= 1 w = r - l h = d - u assert w > 0 assert h > 0 if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio w *= 1.25 h *= 1.25 dt_db.append({ 'image': img_path, 'flip_pairs': flip_pairs, 'parent_ids': parent_ids, # pose 'joints_3d': align_joints_2d_wz, # [org_img_x, org_img_y, depth - root_depth] 'joints_3d_vis': joints_2d_vis, 'joints_3d_relative': gtPose, # [X-root, Y-root, Z-root] in camera coordinate, substracted by root 'bone_len': skeleton_length, # bbox 'center_x': center_x, 'center_y': center_y, 'width': w, 'height': h, # align 's': s, 'rot': rot, 't': t }) DEBUG = False if DEBUG: bbox = [center_x, center_y, w, h] pose = [align_joints_2d_wz, joints_2d_vis] debug_vis(img_path, bbox, pose) with open(cache_file, 'wb') as fid: pk.dump(dt_db, fid, pk.HIGHEST_PROTOCOL) print('{} samples ared wrote {}'.format(len(dt_db), cache_file)) return dt_db
def gt_db(self): cache_file = os.path.join(self.cache_path, self.name + '_keypoint_db_v3' + '.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: db = pk.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format( self.name, cache_file, len(db))) return db # create train/val split with open( os.path.join(self.dataset_path, 'annot', self.image_set_name + '.json')) as anno_file: anno = json.load(anno_file) gt_db = [] for a in anno: # center and size c = np.array(a['center'], dtype=np.float) c_x = c[0] c_y = c[1] assert c_x >= 1 c_x = c_x - 1 c_y = c_y - 1 s = np.array([a['scale'], a['scale']], dtype=np.float) width = s[0] height = s[1] # Adjust center/scale slightly to avoid cropping limbs, this is the common practice on mpii dataset c_y = c_y + 15 * height width = width * 1.25 * self.pixel_std height = height * 1.25 * self.pixel_std if width / height >= 1.0 * self.patch_width / self.patch_height: width = 1.0 * height * self.patch_width / self.patch_height else: assert 0, "Error. Invalid patch width and height" # joints and vis jts_3d = np.zeros((self.joint_num, 3), dtype=np.float) jts_3d_vis = np.zeros((self.joint_num, 3), dtype=np.float) if self.image_set_name != 'test': jts = np.array(a['joints']) jts[:, 0:2] = jts[:, 0:2] - 1 jts_vis = np.array(a['joints_vis']) assert len( jts) == self.joint_num, 'joint num diff: {} vs {}'.format( len(jts), self.joint_num) jts_3d[:, 0:2] = jts[:, 0:2] jts_3d_vis[:, 0] = jts_vis[:] jts_3d_vis[:, 1] = jts_vis[:] img_path = os.path.join(self.dataset_path, '', 'images', a['image']) gt_db.append({ 'image': img_path, 'center_x': c_x, 'center_y': c_y, 'width': width, 'height': height, 'flip_pairs': self.flip_pairs, 'parent_ids': self.parent_ids, 'joints_3d': jts_3d, 'joints_3d_vis': jts_3d_vis, }) DEBUG = False if DEBUG: box = [c_x, c_y, width, height] pose = [jts_3d, jts_3d_vis] debug_vis(img_path, box, pose) with open(cache_file, 'wb') as fid: pk.dump(gt_db, fid, pk.HIGHEST_PROTOCOL) print('{} samples ared wrote {}'.format(len(gt_db), cache_file)) return gt_db
def jnt_bbox_db(self): cache_file = os.path.join(self.cache_path, self.name + '_keypoint_jntBBox_db.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: db = pk.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format( self.name, cache_file, len(db))) return db # create train/val split with open( os.path.join(self.dataset_path, 'annot', self.image_set_name + '.json')) as anno_file: anno = json.load(anno_file) gt_db = [] for a in anno: # joints and vis jts_3d = np.zeros((self.joint_num, 3), dtype=np.float) jts_3d_vis = np.zeros((self.joint_num, 3), dtype=np.float) if self.image_set_name != 'test': jts = np.array(a['joints']) jts[:, 0:2] = jts[:, 0:2] - 1 jts_vis = np.array(a['joints_vis']) assert len( jts) == self.joint_num, 'joint num diff: {} vs {}'.format( len(jts), self.joint_num) jts_3d[:, 0:2] = jts[:, 0:2] jts_3d_vis[:, 0] = jts_vis[:] jts_3d_vis[:, 1] = jts_vis[:] if np.sum(jts_3d_vis[:, 0]) < 2: # only one joint visible, skip continue u, d, l, r = calc_kpt_bound(jts_3d, jts_3d_vis) center = np.array([(l + r) * 0.5, (u + d) * 0.5], dtype=np.float32) c_x = center[0] c_y = center[1] assert c_x >= 1 w = r - l h = d - u assert w > 0 assert h > 0 if w > self.aspect_ratio * h: h = w * 1.0 / self.aspect_ratio elif w < self.aspect_ratio * h: w = h * self.aspect_ratio width = w * 1.25 height = h * 1.25 img_path = os.path.join(self.dataset_path, '', 'images', a['image']) gt_db.append({ 'image': img_path, 'center_x': c_x, 'center_y': c_y, 'width': width, 'height': height, 'flip_pairs': self.flip_pairs, 'parent_ids': self.parent_ids, 'joints_3d': jts_3d, 'joints_3d_vis': jts_3d_vis, }) DEBUG = False if DEBUG: box = [c_x, c_y, width, height] pose = [jts_3d, jts_3d_vis] debug_vis(img_path, box, pose) with open(cache_file, 'wb') as fid: pk.dump(gt_db, fid, pk.HIGHEST_PROTOCOL) print('{} samples ared wrote {}'.format(len(gt_db), cache_file)) return gt_db