def rescale_pose_from_patch_to_camera(preds_in_patch, target_bone_len, parent_ids): preds_in_patch_base_pelvis = preds_in_patch - preds_in_patch[0] skeleton_length = calc_total_skeleton_length(preds_in_patch_base_pelvis, parent_ids) rescale_factor = 1.0 * target_bone_len / skeleton_length preds_in_patch_base_pelvis = rescale_factor * preds_in_patch_base_pelvis return preds_in_patch_base_pelvis
def jnt_bbox_db(self): db = None folders, sample_num, step, folder_start, folder_end = self._sample_dataset( self.image_set_name) cache_file = os.path.join( self.cache_path, self.name + '_keypoint_jntBBox_db_sample' + str(sample_num) + '.pkl') if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: db = pk.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format( self.name, cache_file, len(db))) if db != None: self.num_sample_single = len(db) self.mean_bone_length = np.asarray( [item['bone_len'] for item in db]).mean() return db jnt_bbox_db = [] for n_folder in range(folder_start, folder_end): print('Loading folder ', n_folder, ' in ', len(folders)) # load ground truth keypoints, trans, jt_list, rot, fl, c_p, img_width, img_height = parsing_hm36_gt_file( os.path.join(self.dataset_path, "annot", folders[n_folder], 'matlab_meta.txt')) # random sample redundant video sequence if sample_num > 0: img_index = np.random.choice(keypoints.shape[0], sample_num, replace=False) else: img_index = np.arange(keypoints.shape[0]) img_index = img_index[0:keypoints.shape[0]:step] for n_img_ in range(0, img_index.shape[0]): n_img = img_index[n_img_] image_name = os.path.join( folders[n_folder], self._H36ImageName(folders[n_folder], n_img)) assert keypoints.shape[1] == self.joint_num _, _, _, _, pt_2d, pt_3d, vis, pelvis3d = \ from_worldjt_to_imagejt(n_img, self.joint_num, rot, keypoints, trans, fl, c_p, self.rect_3d_width, self.rect_3d_height) c_x, c_y, w, h = calc_kpt_bound_pad(pt_2d, vis, self.aspect_ratio) pt_3d_relative = pt_3d - pt_3d[0] skeleton_length = calc_total_skeleton_length( pt_3d_relative, s_36_parent_ids) jnt_bbox_db.append({ 'image': os.path.join(self.dataset_path, '', 'images', image_name), 'center_x': c_x, 'center_y': c_y, 'width': w, 'height': h, 'flip_pairs': self.flip_pairs, 'parent_ids': self.parent_ids, 'joints_3d': pt_2d, # [org_img_x, org_img_y, depth - root_depth] 'joints_3d_vis': vis, 'joints_3d_cam': pt_3d, # [X, Y, Z] in camera coordinate 'pelvis': pelvis3d, 'fl': fl, 'c_p': c_p, 'joints_3d_relative': pt_3d_relative, # [X-root, Y-root, Z-root] in camera coordinate 'bone_len': skeleton_length }) self.mean_bone_length = np.asarray( [item['bone_len'] for item in jnt_bbox_db]).mean() with open(cache_file, 'wb') as fid: pk.dump(jnt_bbox_db, fid, pk.HIGHEST_PROTOCOL) print('{} samples ared wrote {}'.format(len(jnt_bbox_db), cache_file)) self.num_sample_single = len(jnt_bbox_db) return jnt_bbox_db
def jnt_bbox_db_core(name, cache_path, dataset_path, image_set_name, image_names, joint_num, aspect_ratio , flip_pairs, parent_ids): ''' This function is to 1)get aligned 2d pose; 2)record align params; 3)generate bbox around aligned 2d pose; 4)calc skeleton length It's the very db used to train and val :return: ''' cache_file = '{}_keypoint_jnt_bbox_db.pkl'.format(name) cache_file = os.path.join(cache_path, cache_file) db = None if os.path.exists(cache_file): with open(cache_file, 'rb') as fid: db = pk.load(fid) print('{} gt db loaded from {}, {} samples are loaded'.format(name, cache_file, len(db))) if db != None: return db img_pred_file = '{}_{}samples_img_res.pkl'.format('HM36_eccv_challenge_' + image_set_name, len(image_names)) img_pred_file = os.path.join(dataset_path, image_set_name, img_pred_file) with open(img_pred_file, 'rb') as fid: img_pred = pk.load(fid) dt_db = [] for idx in range(len(image_names)): img_path = os.path.join(dataset_path, image_set_name, 'IMG', '%05d.jpg' % (idx + 1)) pred_pose_in_img_wz_score = img_pred[idx]["kpts"] # 18x3, already in hm36 skeleton structure pred_pose_vis = img_pred[idx]["vis"] if image_set_name == 'Test': # only thing need to do: generate bbox around kpts mask = np.where(pred_pose_vis[:, 0] > 0) # only align visible joints u, d, l, r = calc_kpt_bound(pred_pose_in_img_wz_score[mask[0], 0:2], pred_pose_vis[mask[0], 0:2]) align_joints_2d_wz = joints_2d_vis = gtPose = np.zeros((18, 3)) skeleton_length = s = rot = t = 0 elif image_set_name in ['Train', 'Val']: # process pose gt_file = os.path.join(dataset_path, image_set_name, 'POSE', '%05d.csv' % (idx + 1)) gtPose = genfromtxt(gt_file, delimiter=',') # add thorax if joint_num == s_36_jt_num: thorax = (gtPose[s_36_lsh_jt_idx] + gtPose[s_36_rsh_jt_idx]) * 0.5 thorax = thorax.reshape((1, 3)) gtPose = np.concatenate((gtPose, thorax), axis=0) assert len(gtPose) == s_36_jt_num, "#Joint Must be 18, Now #Joint %d" % len(gtPose) # align mask = np.where(pred_pose_vis[:, 0] > 0) # only align visible joints target_pose = pred_pose_in_img_wz_score[mask[0], 0:2] from_pose = gtPose[mask[0], 0:2] _, Z, rot, s, t = compute_similarity_transform(target_pose, from_pose, compute_optimal_scale=True) align_joints_2d_wz = s * gtPose[:, 0:2].dot(rot) + t align_joints_2d_wz = np.concatenate((align_joints_2d_wz, gtPose[:, 2:3] * s), axis=1) joints_2d_vis = np.ones(align_joints_2d_wz.shape, dtype=np.float) # other skeleton_length = calc_total_skeleton_length(gtPose, s_36_parent_ids) # generate bbox u, d, l, r = calc_kpt_bound(align_joints_2d_wz, joints_2d_vis) center_x = (l + r) * 0.5 center_y = (u + d) * 0.5 assert center_x >= 1 w = r - l h = d - u assert w > 0 assert h > 0 if w > aspect_ratio * h: h = w * 1.0 / aspect_ratio elif w < aspect_ratio * h: w = h * aspect_ratio w *= 1.25 h *= 1.25 dt_db.append({ 'image': img_path, 'flip_pairs': flip_pairs, 'parent_ids': parent_ids, # pose 'joints_3d': align_joints_2d_wz, # [org_img_x, org_img_y, depth - root_depth] 'joints_3d_vis': joints_2d_vis, 'joints_3d_relative': gtPose, # [X-root, Y-root, Z-root] in camera coordinate, substracted by root 'bone_len': skeleton_length, # bbox 'center_x': center_x, 'center_y': center_y, 'width': w, 'height': h, # align 's': s, 'rot': rot, 't': t }) DEBUG = False if DEBUG: bbox = [center_x, center_y, w, h] pose = [align_joints_2d_wz, joints_2d_vis] debug_vis(img_path, bbox, pose) with open(cache_file, 'wb') as fid: pk.dump(dt_db, fid, pk.HIGHEST_PROTOCOL) print('{} samples ared wrote {}'.format(len(dt_db), cache_file)) return dt_db