def get_fitting_error(self, coco_joint, smpl_mesh, cam_param, img2bb_trans, coco_joint_valid): # get coco joint from smpl mesh coco_from_smpl = np.dot(self.coco_joint_regressor, smpl_mesh) coco_from_smpl = self.add_pelvis( coco_from_smpl) # z-axis component will be removed coco_from_smpl = cam2pixel(coco_from_smpl, cam_param['focal'], cam_param['princpt']) coco_from_smpl_xy1 = np.concatenate( (coco_from_smpl[:, :2], np.ones_like(coco_from_smpl[:, 0:1])), 1) coco_from_smpl[:, :2] = np.dot(img2bb_trans, coco_from_smpl_xy1.transpose( 1, 0)).transpose(1, 0) coco_from_smpl[:, 0] = coco_from_smpl[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] coco_from_smpl[:, 1] = coco_from_smpl[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] # mask joint coordinates coco_joint = coco_joint[:, :2][np.tile(coco_joint_valid, ( 1, 2)) == 1].reshape(-1, 2) coco_from_smpl = coco_from_smpl[:, :2][np.tile(coco_joint_valid, ( 1, 2)) == 1].reshape(-1, 2) error = np.sqrt(np.sum((coco_joint - coco_from_smpl)**2, 1)).mean() return error
def get_bbox(joint_world, joint_valid, camrot, campos, focal, princpt): joint_cam = [] for i in range(len(joint_world)): joint_cam.append(world2cam(joint_world[i], camrot, campos)) joint_cam = np.array(joint_cam).reshape(-1, 3) x_img, y_img, z_img = cam2pixel(joint_cam, focal, princpt) x_img = x_img[joint_valid[:, 0] == 1] y_img = y_img[joint_valid[:, 0] == 1] xmin = min(x_img) ymin = min(y_img) xmax = max(x_img) ymax = max(y_img) x_center = (xmin + xmax) / 2. width = xmax - xmin xmin = x_center - 0.5 * width * 1.5 xmax = x_center + 0.5 * width * 1.5 y_center = (ymin + ymax) / 2. height = ymax - ymin ymin = y_center - 0.5 * height * 1.5 ymax = y_center + 0.5 * height * 1.5 bbox = np.array([xmin, ymin, xmax, ymax]).astype(np.float32) return bbox
def __init__(self, transform, mode, annot_subset): self.mode = mode # train, test, val self.annot_subset = annot_subset # all, human_annot, machine_annot self.img_path = '../data/InterHand2.6M/images' self.annot_path = '../data/InterHand2.6M/annotations' if self.annot_subset == 'machine_annot' and self.mode == 'val': self.rootnet_output_path = '../data/InterHand2.6M/rootnet_output/rootnet_interhand2.6m_output_machine_annot_val.json' else: self.rootnet_output_path = '../data/InterHand2.6M/rootnet_output/rootnet_interhand2.6m_output_all_test.json' self.transform = transform self.joint_num = 21 # single hand self.root_joint_idx = {'right': 20, 'left': 41} self.joint_type = {'right': np.arange(0,self.joint_num), 'left': np.arange(self.joint_num,self.joint_num*2)} self.skeleton = load_skeleton(osp.join(self.annot_path, 'skeleton.txt'), self.joint_num*2) self.datalist = [] self.datalist_sh = [] self.datalist_ih = [] self.sequence_names = [] # load annotation print("Load annotation from " + osp.join(self.annot_path, self.annot_subset)) db = COCO(osp.join(self.annot_path, self.annot_subset, 'InterHand2.6M_' + self.mode + '_data.json')) with open(osp.join(self.annot_path, self.annot_subset, 'InterHand2.6M_' + self.mode + '_camera.json')) as f: cameras = json.load(f) with open(osp.join(self.annot_path, self.annot_subset, 'InterHand2.6M_' + self.mode + '_joint_3d.json')) as f: joints = json.load(f) if (self.mode == 'val' or self.mode == 'test') and cfg.trans_test == 'rootnet': print("Get bbox and root depth from " + self.rootnet_output_path) rootnet_result = {} with open(self.rootnet_output_path) as f: annot = json.load(f) for i in range(len(annot)): rootnet_result[str(annot[i]['annot_id'])] = annot[i] else: print("Get bbox and root depth from groundtruth annotation") for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] capture_id = img['capture'] seq_name = img['seq_name'] cam = img['camera'] frame_idx = img['frame_idx'] img_path = osp.join(self.img_path, self.mode, img['file_name']) campos, camrot = np.array(cameras[str(capture_id)]['campos'][str(cam)], dtype=np.float32), np.array(cameras[str(capture_id)]['camrot'][str(cam)], dtype=np.float32) focal, princpt = np.array(cameras[str(capture_id)]['focal'][str(cam)], dtype=np.float32), np.array(cameras[str(capture_id)]['princpt'][str(cam)], dtype=np.float32) joint_world = np.array(joints[str(capture_id)][str(frame_idx)], dtype=np.float32) joint_cam = world2cam(joint_world.transpose(1,0), camrot, campos.reshape(3,1)).transpose(1,0) joint_img = cam2pixel(joint_cam, focal, princpt)[:,:2] joint_valid = np.array(ann['joint_valid'],dtype=np.float32).reshape(self.joint_num*2) # if root is not valid -> root-relative 3D pose is also not valid. Therefore, mark all joints as invalid joint_valid[self.joint_type['right']] *= joint_valid[self.root_joint_idx['right']] joint_valid[self.joint_type['left']] *= joint_valid[self.root_joint_idx['left']] hand_type = ann['hand_type'] hand_type_valid = np.array((ann['hand_type_valid']), dtype=np.float32) if (self.mode == 'val' or self.mode == 'test') and cfg.trans_test == 'rootnet': bbox = np.array(rootnet_result[str(aid)]['bbox'],dtype=np.float32) abs_depth = {'right': rootnet_result[str(aid)]['abs_depth'][0], 'left': rootnet_result[str(aid)]['abs_depth'][1]} else: img_width, img_height = img['width'], img['height'] bbox = np.array(ann['bbox'],dtype=np.float32) # x,y,w,h bbox = process_bbox(bbox, (img_height, img_width)) abs_depth = {'right': joint_cam[self.root_joint_idx['right'],2], 'left': joint_cam[self.root_joint_idx['left'],2]} cam_param = {'focal': focal, 'princpt': princpt} joint = {'cam_coord': joint_cam, 'img_coord': joint_img, 'valid': joint_valid} data = {'img_path': img_path, 'seq_name': seq_name, 'cam_param': cam_param, 'bbox': bbox, 'joint': joint, 'hand_type': hand_type, 'hand_type_valid': hand_type_valid, 'abs_depth': abs_depth, 'file_name': img['file_name'], 'capture': capture_id, 'cam': cam, 'frame': frame_idx} if hand_type == 'right' or hand_type == 'left': self.datalist_sh.append(data) else: self.datalist_ih.append(data) if seq_name not in self.sequence_names: self.sequence_names.append(seq_name) self.datalist = self.datalist_sh + self.datalist_ih print('Number of annotations in single hand sequences: ' + str(len(self.datalist_sh))) print('Number of annotations in interacting hand sequences: ' + str(len(self.datalist_ih)))
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox, joint_cam, cam_param, mano_param = data[ 'img_path'], data['img_shape'], data['bbox'], data[ 'joint_cam'], data['cam_param'], data['mano_param'] # img img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, _ = augmentation( img, bbox, self.data_split, exclude_flip=True ) # FreiHAND dataset only contains right hands. do not perform flip aug. img = self.transform(img.astype(np.float32)) / 255. if self.data_split == 'train': # mano coordinates mano_mesh_cam, mano_joint_cam, mano_pose, mano_shape = self.get_mano_coord( mano_param, cam_param) mano_coord_cam = np.concatenate((mano_mesh_cam, mano_joint_cam)) focal, princpt = cam_param['focal'], cam_param['princpt'] mano_coord_img = cam2pixel(mano_coord_cam, focal, princpt) # affine transform x,y coordinates. root-relative depth mano_coord_img_xy1 = np.concatenate( (mano_coord_img[:, :2], np.ones_like(mano_coord_img[:, :1])), 1) mano_coord_img[:, :2] = np.dot(img2bb_trans, mano_coord_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] root_joint_depth = mano_coord_cam[self.vertex_num + self.root_joint_idx][2] mano_coord_img[:, 2] = mano_coord_img[:, 2] - root_joint_depth mano_coord_img[:, 0] = mano_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] mano_coord_img[:, 1] = mano_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] mano_coord_img[:, 2] = (mano_coord_img[:, 2] / (cfg.bbox_3d_size / 2) + 1) / 2. * cfg.output_hm_shape[0] # check truncation mano_trunc = ((mano_coord_img[:,0] >= 0) * (mano_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (mano_coord_img[:,1] >= 0) * (mano_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (mano_coord_img[:,2] >= 0) * (mano_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates mano_mesh_img = mano_coord_img[:self.vertex_num] mano_joint_img = mano_coord_img[self.vertex_num:] mano_mesh_trunc = mano_trunc[:self.vertex_num] mano_joint_trunc = mano_trunc[self.vertex_num:] # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # parameter mano_pose = mano_pose.reshape(-1, 3) root_pose = mano_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) mano_pose[self.root_joint_idx] = root_pose.reshape(3) mano_pose = mano_pose.reshape(-1) # mano coordinate mano_joint_cam = mano_joint_cam - mano_joint_cam[ self.root_joint_idx, None] # root-relative mano_joint_cam = np.dot(rot_aug_mat, mano_joint_cam.transpose( 1, 0)).transpose(1, 0) orig_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy orig_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy orig_joint_valid = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy orig_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy inputs = {'img': img} targets = { 'orig_joint_img': orig_joint_img, 'fit_joint_img': mano_joint_img, 'fit_mesh_img': mano_mesh_img, 'orig_joint_cam': orig_joint_cam, 'fit_joint_cam': mano_joint_cam, 'pose_param': mano_pose, 'shape_param': mano_shape } meta_info = { 'orig_joint_valid': orig_joint_valid, 'orig_joint_trunc': orig_joint_trunc, 'fit_joint_trunc': mano_joint_trunc, 'fit_mesh_trunc': mano_mesh_trunc, 'is_valid_fit': float(True), 'is_3D': float(True) } else: inputs = {'img': img} targets = {} meta_info = {'bb2img_trans': bb2img_trans} return inputs, targets, meta_info
root_depth = 11250.5732421875 # obtain this from RootNet (https://github.com/mks0601/3DMPPE_ROOTNET_RELEASE/tree/master/demo) root_depth /= 1000 # output of RootNet is milimeter. change it to meter root_img = np.array([root_xy[0], root_xy[1], root_depth]) root_cam = pixel2cam(root_img[None,:], focal, princpt) mesh_lixel_img[:,2] += root_depth mesh_lixel_cam = pixel2cam(mesh_lixel_img, focal, princpt) mesh_param_cam += root_cam.reshape(1,3) # visualize lixel mesh in 2D space vis_img = original_img.copy() vis_img = vis_mesh(vis_img, mesh_lixel_img) cv2.imwrite('output_mesh_lixel.jpg', vis_img) # visualize lixel mesh in 2D space vis_img = original_img.copy() mesh_param_img = cam2pixel(mesh_param_cam, focal, princpt) vis_img = vis_mesh(vis_img, mesh_param_img) cv2.imwrite('output_mesh_param.jpg', vis_img) # save mesh (obj) save_obj(mesh_lixel_cam, face, 'output_mesh_lixel.obj') save_obj(mesh_param_cam, face, 'output_mesh_param.obj') # render mesh from lixel vis_img = original_img.copy() rendered_img = render_mesh(vis_img, mesh_lixel_cam, face, {'focal': focal, 'princpt': princpt}) cv2.imwrite('rendered_mesh_lixel.jpg', rendered_img) # render mesh from param vis_img = original_img.copy() rendered_img = render_mesh(vis_img, mesh_param_cam, face, {'focal': focal, 'princpt': princpt})
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox, smpl_param, cam_param = data[ 'img_path'], data['img_shape'], data['bbox'], data[ 'smpl_param'], data['cam_param'] # img img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation( img, bbox, self.data_split) img = self.transform(img.astype(np.float32)) / 255. # muco gt muco_joint_img = data['joint_img'] muco_joint_cam = data['joint_cam'] muco_joint_cam = muco_joint_cam - muco_joint_cam[ self.muco_root_joint_idx, None, :] # root-relative muco_joint_valid = data['joint_valid'] if do_flip: muco_joint_img[:, 0] = img_shape[1] - 1 - muco_joint_img[:, 0] muco_joint_cam[:, 0] = -muco_joint_cam[:, 0] for pair in self.muco_flip_pairs: muco_joint_img[pair[0], :], muco_joint_img[ pair[1], :] = muco_joint_img[ pair[1], :].copy(), muco_joint_img[pair[0], :].copy() muco_joint_cam[pair[0], :], muco_joint_cam[ pair[1], :] = muco_joint_cam[ pair[1], :].copy(), muco_joint_cam[pair[0], :].copy() muco_joint_valid[pair[0], :], muco_joint_valid[ pair[1], :] = muco_joint_valid[pair[1], :].copy( ), muco_joint_valid[pair[0], :].copy() muco_joint_img_xy1 = np.concatenate( (muco_joint_img[:, :2], np.ones_like(muco_joint_img[:, :1])), 1) muco_joint_img[:, :2] = np.dot(img2bb_trans, muco_joint_img_xy1.transpose( 1, 0)).transpose(1, 0) muco_joint_img[:, 0] = muco_joint_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] muco_joint_img[:, 1] = muco_joint_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] muco_joint_img[:, 2] = muco_joint_img[:, 2] - muco_joint_img[ self.muco_root_joint_idx][2] # root-relative muco_joint_img[:, 2] = ( muco_joint_img[:, 2] / (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[ 0] # change cfg.bbox_3d_size from meter to milimeter # check truncation muco_joint_trunc = muco_joint_valid * ((muco_joint_img[:,0] >= 0) * (muco_joint_img[:,0] < cfg.output_hm_shape[2]) * \ (muco_joint_img[:,1] >= 0) * (muco_joint_img[:,1] < cfg.output_hm_shape[1]) * \ (muco_joint_img[:,2] >= 0) * (muco_joint_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # transform muco joints to target db joints muco_joint_img = transform_joint_to_other_db(muco_joint_img, self.muco_joints_name, self.joints_name) muco_joint_cam = transform_joint_to_other_db(muco_joint_cam, self.muco_joints_name, self.joints_name) muco_joint_valid = transform_joint_to_other_db(muco_joint_valid, self.muco_joints_name, self.joints_name) muco_joint_trunc = transform_joint_to_other_db(muco_joint_trunc, self.muco_joints_name, self.joints_name) if smpl_param is not None: # smpl coordinates smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord( smpl_param, cam_param, do_flip, img_shape) smpl_coord_cam = np.concatenate((smpl_mesh_cam, smpl_joint_cam)) focal, princpt = cam_param['focal'], cam_param['princpt'] smpl_coord_img = cam2pixel(smpl_coord_cam, focal, princpt) # affine transform x,y coordinates. root-relative depth smpl_coord_img_xy1 = np.concatenate( (smpl_coord_img[:, :2], np.ones_like(smpl_coord_img[:, :1])), 1) smpl_coord_img[:, :2] = np.dot(img2bb_trans, smpl_coord_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[ self.vertex_num + self.root_joint_idx][2] smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] smpl_coord_img[:, 2] = ( smpl_coord_img[:, 2] / (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[ 0] # change cfg.bbox_3d_size from meter to milimeter # check truncation smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates smpl_mesh_img = smpl_coord_img[:self.vertex_num] smpl_joint_img = smpl_coord_img[self.vertex_num:] smpl_mesh_trunc = smpl_trunc[:self.vertex_num] smpl_joint_trunc = smpl_trunc[self.vertex_num:] # if fitted mesh is too far from muco gt, discard it is_valid_fit = True error = self.get_fitting_error(data['joint_cam'], smpl_mesh_cam, do_flip) if error > self.fitting_thr: is_valid_fit = False else: smpl_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_mesh_img = np.zeros((self.vertex_num, 3), dtype=np.float32) # dummy smpl_pose = np.zeros((72), dtype=np.float32) # dummy smpl_shape = np.zeros((10), dtype=np.float32) # dummy smpl_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy smpl_mesh_trunc = np.zeros((self.vertex_num, 1), dtype=np.float32) # dummy is_valid_fit = False # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # muco coordinate muco_joint_cam = np.dot(rot_aug_mat, muco_joint_cam.transpose( 1, 0)).transpose(1, 0) / 1000 # milimeter to meter # parameter smpl_pose = smpl_pose.reshape(-1, 3) root_pose = smpl_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) smpl_pose[self.root_joint_idx] = root_pose.reshape(3) smpl_pose = smpl_pose.reshape(-1) # smpl coordinate smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[self.root_joint_idx, None] # root-relative smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose( 1, 0)).transpose(1, 0) / 1000 # milimeter to meter inputs = {'img': img} targets = { 'orig_joint_img': muco_joint_img, 'fit_joint_img': smpl_joint_img, 'fit_mesh_img': smpl_mesh_img, 'orig_joint_cam': muco_joint_cam, 'fit_joint_cam': smpl_joint_cam, 'pose_param': smpl_pose, 'shape_param': smpl_shape } meta_info = { 'orig_joint_valid': muco_joint_valid, 'orig_joint_trunc': muco_joint_trunc, 'fit_joint_trunc': smpl_joint_trunc, 'fit_mesh_trunc': smpl_mesh_trunc, 'is_valid_fit': float(is_valid_fit), 'is_3D': float(True) } return inputs, targets, meta_info
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox = data['img_path'], data['img_shape'], data[ 'bbox'] # image load and affine transform img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation( img, bbox, self.data_split) img = self.transform(img.astype(np.float32)) / 255. if self.data_split == 'train': # coco gt coco_joint_img = data['joint_img'] coco_joint_valid = data['joint_valid'] if do_flip: coco_joint_img[:, 0] = img_shape[1] - 1 - coco_joint_img[:, 0] for pair in self.coco_flip_pairs: coco_joint_img[pair[0], :], coco_joint_img[ pair[1], :] = coco_joint_img[pair[1], :].copy( ), coco_joint_img[pair[0], :].copy() coco_joint_valid[pair[0], :], coco_joint_valid[ pair[1], :] = coco_joint_valid[pair[1], :].copy( ), coco_joint_valid[pair[0], :].copy() coco_joint_img_xy1 = np.concatenate( (coco_joint_img[:, :2], np.ones_like(coco_joint_img[:, :1])), 1) coco_joint_img[:, :2] = np.dot(img2bb_trans, coco_joint_img_xy1.transpose( 1, 0)).transpose(1, 0) coco_joint_img[:, 0] = coco_joint_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] coco_joint_img[:, 1] = coco_joint_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] # backup for calculating fitting error _coco_joint_img = coco_joint_img.copy() _coco_joint_valid = coco_joint_valid.copy() # check truncation coco_joint_trunc = coco_joint_valid * ((coco_joint_img[:,0] >= 0) * (coco_joint_img[:,0] < cfg.output_hm_shape[2]) * \ (coco_joint_img[:,1] >= 0) * (coco_joint_img[:,1] < cfg.output_hm_shape[1])).reshape(-1,1).astype(np.float32) # transform coco joints to target db joints coco_joint_img = transform_joint_to_other_db( coco_joint_img, self.coco_joints_name, self.joints_name) coco_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy coco_joint_valid = transform_joint_to_other_db( coco_joint_valid, self.coco_joints_name, self.joints_name) coco_joint_trunc = transform_joint_to_other_db( coco_joint_trunc, self.coco_joints_name, self.joints_name) smplify_result = data['smplify_result'] if smplify_result is not None: # use fitted mesh smpl_param, cam_param = smplify_result[ 'smpl_param'], smplify_result['cam_param'] smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord( smpl_param, cam_param, do_flip, img_shape) smpl_coord_cam = np.concatenate( (smpl_mesh_cam, smpl_joint_cam)) smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'], cam_param['princpt']) # x,y affine transform, root-relative depth smpl_coord_img_xy1 = np.concatenate( (smpl_coord_img[:, :2], np.ones_like( smpl_coord_img[:, 0:1])), 1) smpl_coord_img[:, :2] = np.dot( img2bb_trans, smpl_coord_img_xy1.transpose(1, 0)).transpose(1, 0)[:, :2] smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[ self.vertex_num + self.root_joint_idx][2] smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] smpl_coord_img[:, 2] = (smpl_coord_img[:, 2] / (cfg.bbox_3d_size / 2) + 1) / 2. * cfg.output_hm_shape[0] # check truncation smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates smpl_mesh_img = smpl_coord_img[:self.vertex_num] smpl_joint_img = smpl_coord_img[self.vertex_num:] smpl_mesh_trunc = smpl_trunc[:self.vertex_num] smpl_joint_trunc = smpl_trunc[self.vertex_num:] # if fitted mesh is too far from h36m gt, discard it is_valid_fit = True error = self.get_fitting_error(_coco_joint_img, smpl_mesh_cam, cam_param, img2bb_trans, _coco_joint_valid) if error > self.fitting_thr: is_valid_fit = False else: smpl_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_mesh_img = np.zeros((self.vertex_num, 3), dtype=np.float32) # dummy smpl_pose = np.zeros((72), dtype=np.float32) # dummy smpl_shape = np.zeros((10), dtype=np.float32) # dummy smpl_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) smpl_mesh_trunc = np.zeros((self.vertex_num, 1), dtype=np.float32) is_valid_fit = False # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # parameter smpl_pose = smpl_pose.reshape(-1, 3) root_pose = smpl_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) smpl_pose[self.root_joint_idx] = root_pose.reshape(3) smpl_pose = smpl_pose.reshape(-1) # smpl coordinate smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[ self.root_joint_idx, None] # root-relative smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose( 1, 0)).transpose(1, 0) inputs = {'img': img} targets = { 'orig_joint_img': coco_joint_img, 'fit_joint_img': smpl_joint_img, 'fit_mesh_img': smpl_mesh_img, 'orig_joint_cam': coco_joint_cam, 'fit_joint_cam': smpl_joint_cam, 'pose_param': smpl_pose, 'shape_param': smpl_shape } meta_info = { 'orig_joint_valid': coco_joint_valid, 'orig_joint_trunc': coco_joint_trunc, 'fit_joint_trunc': smpl_joint_trunc, 'fit_mesh_trunc': smpl_mesh_trunc, 'is_valid_fit': float(is_valid_fit), 'is_3D': float(False) } return inputs, targets, meta_info else: inputs = {'img': img} targets = {} meta_info = {'bb2img_trans': bb2img_trans} return inputs, targets, meta_info
def load_data(self): subject_list = self.get_subject() sampling_ratio = self.get_subsampling_ratio() # aggregate annotations from each subject db = COCO() cameras = {} joints = {} smpl_params = {} for subject in subject_list: # data load with open( osp.join(self.annot_path, 'Human36M_subject' + str(subject) + '_data.json'), 'r') as f: annot = json.load(f) if len(db.dataset) == 0: for k, v in annot.items(): db.dataset[k] = v else: for k, v in annot.items(): db.dataset[k] += v # camera load with open( osp.join( self.annot_path, 'Human36M_subject' + str(subject) + '_camera.json'), 'r') as f: cameras[str(subject)] = json.load(f) # joint coordinate load with open( osp.join( self.annot_path, 'Human36M_subject' + str(subject) + '_joint_3d.json'), 'r') as f: joints[str(subject)] = json.load(f) # smpl parameter load with open( osp.join( self.annot_path, 'Human36M_subject' + str(subject) + '_smpl_param.json'), 'r') as f: smpl_params[str(subject)] = json.load(f) db.createIndex() if self.data_split == 'test' and not cfg.use_gt_info: print("Get bounding box and root from " + self.human_bbox_root_dir) bbox_root_result = {} with open(self.human_bbox_root_dir) as f: annot = json.load(f) for i in range(len(annot)): bbox_root_result[str(annot[i]['image_id'])] = { 'bbox': np.array(annot[i]['bbox']), 'root': np.array(annot[i]['root_cam']) } else: print("Get bounding box and root from groundtruth") datalist = [] for aid in db.anns.keys(): ann = db.anns[aid] image_id = ann['image_id'] img = db.loadImgs(image_id)[0] img_path = osp.join(self.img_dir, img['file_name']) img_shape = (img['height'], img['width']) # check subject and frame_idx frame_idx = img['frame_idx'] if frame_idx % sampling_ratio != 0: continue # check smpl parameter exist subject = img['subject'] action_idx = img['action_idx'] subaction_idx = img['subaction_idx'] frame_idx = img['frame_idx'] try: smpl_param = smpl_params[str(subject)][str(action_idx)][str( subaction_idx)][str(frame_idx)] except KeyError: smpl_param = None # camera parameter cam_idx = img['cam_idx'] cam_param = cameras[str(subject)][str(cam_idx)] R, t, f, c = np.array(cam_param['R'], dtype=np.float32), np.array( cam_param['t'], dtype=np.float32), np.array( cam_param['f'], dtype=np.float32), np.array(cam_param['c'], dtype=np.float32) cam_param = {'R': R, 't': t, 'focal': f, 'princpt': c} # only use frontal camera following previous works (HMR and SPIN) if self.data_split == 'test' and str(cam_idx) != '4': continue # project world coordinate to cam, image coordinate space joint_world = np.array(joints[str(subject)][str(action_idx)][str( subaction_idx)][str(frame_idx)], dtype=np.float32) joint_cam = world2cam(joint_world, R, t) joint_img = cam2pixel(joint_cam, f, c) joint_valid = np.ones((self.h36m_joint_num, 1)) if self.data_split == 'test' and not cfg.use_gt_info: bbox = bbox_root_result[str( image_id )]['bbox'] # bbox should be aspect ratio preserved-extended. It is done in RootNet. root_joint_depth = bbox_root_result[str(image_id)]['root'][2] else: bbox = process_bbox(np.array(ann['bbox']), img['width'], img['height']) if bbox is None: continue root_joint_depth = joint_cam[self.h36m_root_joint_idx][2] datalist.append({ 'img_path': img_path, 'img_id': image_id, 'img_shape': img_shape, 'bbox': bbox, 'joint_img': joint_img, 'joint_cam': joint_cam, 'joint_valid': joint_valid, 'smpl_param': smpl_param, 'root_joint_depth': root_joint_depth, 'cam_param': cam_param }) return datalist