def __getitem__(self, idx): data = self.datalist[idx] img_path, bbox, joint, hand_type, hand_type_valid = data[ 'img_path'], data['bbox'], data['joint'], data['hand_type'], data[ 'hand_type_valid'] joint_cam = joint['cam_coord'].copy() joint_img = joint['img_coord'].copy() joint_valid = joint['valid'].copy() hand_type = self.handtype_str2array(hand_type) joint_coord = np.concatenate((joint_img, joint_cam[:, 2, None]), 1) # image load img = load_img(img_path) # augmentation img, joint_coord, joint_valid, hand_type, inv_trans = augmentation( img, bbox, joint_coord, joint_valid, hand_type, self.mode, self.joint_type) rel_root_depth = np.array([ joint_coord[self.root_joint_idx['left'], 2] - joint_coord[self.root_joint_idx['right'], 2] ], dtype=np.float32).reshape(1) root_valid = np.array( [ joint_valid[self.root_joint_idx['right']] * joint_valid[self.root_joint_idx['left']] ], dtype=np.float32).reshape( 1) if hand_type[0] * hand_type[1] == 1 else np.zeros( (1), dtype=np.float32) # transform to output heatmap space joint_coord, joint_valid, rel_root_depth, root_valid = transform_input_to_output_space( joint_coord, joint_valid, rel_root_depth, root_valid, self.root_joint_idx, self.joint_type) img = self.transform(img.astype(np.float32)) / 255. inputs = {'img': img} targets = { 'joint_coord': joint_coord, 'rel_root_depth': rel_root_depth, 'hand_type': hand_type } meta_info = { 'joint_valid': joint_valid, 'root_valid': root_valid, 'hand_type_valid': hand_type_valid, 'inv_trans': inv_trans, 'capture': int(data['capture']), 'cam': int(data['cam']), 'frame': int(data['frame']) } return inputs, targets, meta_info
def evaluate(self, outs, cur_sample_idx): annots = self.datalist sample_num = len(outs) eval_result = {'joint_out': [], 'mesh_out': []} for n in range(sample_num): annot = annots[cur_sample_idx + n] out = outs[n] # x,y: resize to input image space and perform bbox to image affine transform mesh_out_img = out['mesh_coord_img'] mesh_out_img[:, 0] = mesh_out_img[:, 0] / cfg.output_hm_shape[ 2] * cfg.input_img_shape[1] mesh_out_img[:, 1] = mesh_out_img[:, 1] / cfg.output_hm_shape[ 1] * cfg.input_img_shape[0] mesh_out_img_xy1 = np.concatenate( (mesh_out_img[:, :2], np.ones_like(mesh_out_img[:, :1])), 1) mesh_out_img[:, :2] = np.dot(out['bb2img_trans'], mesh_out_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] # z: devoxelize and translate to absolute depth root_joint_depth = annot['root_joint_depth'] mesh_out_img[:, 2] = (mesh_out_img[:, 2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2) mesh_out_img[:, 2] = mesh_out_img[:, 2] + root_joint_depth # camera back-projection cam_param = annot['cam_param'] focal, princpt = cam_param['focal'], cam_param['princpt'] mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt) if cfg.stage == 'param': mesh_out_cam = out['mesh_coord_cam'] joint_out_cam = np.dot(self.joint_regressor, mesh_out_cam) eval_result['mesh_out'].append(mesh_out_cam.tolist()) eval_result['joint_out'].append(joint_out_cam.tolist()) vis = False if vis: filename = annot['img_path'].split('/')[-1][:-4] img = load_img(annot['img_path'])[:, :, ::-1] img = vis_mesh(img, mesh_out_img, 0.5) cv2.imwrite(filename + '.jpg', img) save_obj(mesh_out_cam, self.mano.face, filename + '.obj') return eval_result
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, bbox, smpl_param = data['img_path'], data['bbox'], data['smpl_param'] # img img = load_img(img_path) img, img2bb_trans, bb2img_trans, _, _ = augmentation(img, bbox, self.data_split) img = self.transform(img.astype(np.float32))/255. # smpl coordinates smpl_mesh_cam, smpl_joint_cam = self.get_smpl_coord(smpl_param) inputs = {'img': img} targets = {'fit_mesh_coord_cam': smpl_mesh_cam} meta_info = {'bb2img_trans': bb2img_trans} return inputs, targets, meta_info
def save_test_files(self): data = self.datalist print(len(data)) for i in range(32): img_path, bbox = data[i]['img_path'], data[i]['bbox'] print(img_path) bbox = bbox.tolist() img = load_img(img_path) img = img[:, :, ::-1].copy() cv2.imwrite("../custom_data/" + "img_" + str(i) + ".jpg", img) self.filenames.write("../custom_data/img_" + str(i) + ".jpg" + "\n") self.bbs.write(str(bbox) + "\n") self.filenames.flush() self.bbs.flush()
image.assign(clip_0_1(image)) if len(losses) > 50: done = True for i in range(-51, -2): if (losses[i] - losses[i + 1]) > loss_tolerance * losses[i]: done = False losses.append(loss) return image, done if __name__ == "__main__": style_path = "../images/style.jpg" style_image, _ = load_img(style_path) images, yuvs = load_video("../videos/cat2.mp4", frame_interval) styled_images = [] losses = [] start = time.time() content_layers = ["block5_conv2"] style_layers = [ "block1_conv1", "block2_conv1", "block3_conv1", "block4_conv1", "block5_conv1",
def evaluate(self, outs, cur_sample_idx): annots = self.datalist sample_num = len(outs) eval_result = {'mpjpe_lixel': [], 'pa_mpjpe_lixel': [], 'mpjpe_param': [], 'pa_mpjpe_param': []} for n in range(sample_num): annot = annots[cur_sample_idx + n] out = outs[n] # h36m joint from gt mesh mesh_gt_cam = out['mesh_coord_cam_target'] pose_coord_gt_h36m = np.dot(self.h36m_joint_regressor, mesh_gt_cam) depth_gt_h36m = pose_coord_gt_h36m[self.h36m_root_joint_idx,2] pose_coord_gt_h36m = pose_coord_gt_h36m - pose_coord_gt_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_gt_h36m = pose_coord_gt_h36m[self.h36m_eval_joint,:] # mesh from lixel # x,y: resize to input image space and perform bbox to image affine transform mesh_out_img = out['mesh_coord_img'] mesh_out_img[:,0] = mesh_out_img[:,0] / cfg.output_hm_shape[2] * cfg.input_img_shape[1] mesh_out_img[:,1] = mesh_out_img[:,1] / cfg.output_hm_shape[1] * cfg.input_img_shape[0] mesh_out_img_xy1 = np.concatenate((mesh_out_img[:,:2], np.ones_like(mesh_out_img[:,:1])),1) mesh_out_img[:,:2] = np.dot(out['bb2img_trans'], mesh_out_img_xy1.transpose(1,0)).transpose(1,0)[:,:2] # z: devoxelize and translate to absolute depth if cfg.use_gt_info: root_joint_depth = depth_gt_h36m else: root_joint_depth = annot['root_joint_depth'] mesh_out_img[:,2] = (mesh_out_img[:,2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2) mesh_out_img[:,2] = mesh_out_img[:,2] + root_joint_depth # camera back-projection cam_param = annot['cam_param'] focal, princpt = cam_param['focal'], cam_param['princpt'] mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt) # h36m joint from lixel mesh pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam) pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:] pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m) eval_result['mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter eval_result['pa_mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter # h36m joint from parameter mesh if cfg.stage == 'param': mesh_out_cam = out['mesh_coord_cam'] pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam) pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:] pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m) eval_result['mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter eval_result['pa_mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter vis = False if vis: seq_name = annot['img_path'].split('/')[-2] img_name = annot['img_path'].split('/')[-1][:-4] filename = seq_name + '_' + img_name + '_' + str(n) img = load_img(annot['img_path'])[:,:,::-1] img = vis_mesh(img, mesh_out_img, 0.5) cv2.imwrite(filename + '.jpg', img) save_obj(mesh_out_cam, self.smpl.face, filename + '.obj') return eval_result
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox, joint_cam, cam_param, mano_param = data[ 'img_path'], data['img_shape'], data['bbox'], data[ 'joint_cam'], data['cam_param'], data['mano_param'] # img img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, _ = augmentation( img, bbox, self.data_split, exclude_flip=True ) # FreiHAND dataset only contains right hands. do not perform flip aug. img = self.transform(img.astype(np.float32)) / 255. if self.data_split == 'train': # mano coordinates mano_mesh_cam, mano_joint_cam, mano_pose, mano_shape = self.get_mano_coord( mano_param, cam_param) mano_coord_cam = np.concatenate((mano_mesh_cam, mano_joint_cam)) focal, princpt = cam_param['focal'], cam_param['princpt'] mano_coord_img = cam2pixel(mano_coord_cam, focal, princpt) # affine transform x,y coordinates. root-relative depth mano_coord_img_xy1 = np.concatenate( (mano_coord_img[:, :2], np.ones_like(mano_coord_img[:, :1])), 1) mano_coord_img[:, :2] = np.dot(img2bb_trans, mano_coord_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] root_joint_depth = mano_coord_cam[self.vertex_num + self.root_joint_idx][2] mano_coord_img[:, 2] = mano_coord_img[:, 2] - root_joint_depth mano_coord_img[:, 0] = mano_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] mano_coord_img[:, 1] = mano_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] mano_coord_img[:, 2] = (mano_coord_img[:, 2] / (cfg.bbox_3d_size / 2) + 1) / 2. * cfg.output_hm_shape[0] # check truncation mano_trunc = ((mano_coord_img[:,0] >= 0) * (mano_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (mano_coord_img[:,1] >= 0) * (mano_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (mano_coord_img[:,2] >= 0) * (mano_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates mano_mesh_img = mano_coord_img[:self.vertex_num] mano_joint_img = mano_coord_img[self.vertex_num:] mano_mesh_trunc = mano_trunc[:self.vertex_num] mano_joint_trunc = mano_trunc[self.vertex_num:] # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # parameter mano_pose = mano_pose.reshape(-1, 3) root_pose = mano_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) mano_pose[self.root_joint_idx] = root_pose.reshape(3) mano_pose = mano_pose.reshape(-1) # mano coordinate mano_joint_cam = mano_joint_cam - mano_joint_cam[ self.root_joint_idx, None] # root-relative mano_joint_cam = np.dot(rot_aug_mat, mano_joint_cam.transpose( 1, 0)).transpose(1, 0) orig_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy orig_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy orig_joint_valid = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy orig_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy inputs = {'img': img} targets = { 'orig_joint_img': orig_joint_img, 'fit_joint_img': mano_joint_img, 'fit_mesh_img': mano_mesh_img, 'orig_joint_cam': orig_joint_cam, 'fit_joint_cam': mano_joint_cam, 'pose_param': mano_pose, 'shape_param': mano_shape } meta_info = { 'orig_joint_valid': orig_joint_valid, 'orig_joint_trunc': orig_joint_trunc, 'fit_joint_trunc': mano_joint_trunc, 'fit_mesh_trunc': mano_mesh_trunc, 'is_valid_fit': float(True), 'is_3D': float(True) } else: inputs = {'img': img} targets = {} meta_info = {'bb2img_trans': bb2img_trans} return inputs, targets, meta_info
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox, smpl_param, cam_param = data[ 'img_path'], data['img_shape'], data['bbox'], data[ 'smpl_param'], data['cam_param'] # img img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation( img, bbox, self.data_split) img = self.transform(img.astype(np.float32)) / 255. # muco gt muco_joint_img = data['joint_img'] muco_joint_cam = data['joint_cam'] muco_joint_cam = muco_joint_cam - muco_joint_cam[ self.muco_root_joint_idx, None, :] # root-relative muco_joint_valid = data['joint_valid'] if do_flip: muco_joint_img[:, 0] = img_shape[1] - 1 - muco_joint_img[:, 0] muco_joint_cam[:, 0] = -muco_joint_cam[:, 0] for pair in self.muco_flip_pairs: muco_joint_img[pair[0], :], muco_joint_img[ pair[1], :] = muco_joint_img[ pair[1], :].copy(), muco_joint_img[pair[0], :].copy() muco_joint_cam[pair[0], :], muco_joint_cam[ pair[1], :] = muco_joint_cam[ pair[1], :].copy(), muco_joint_cam[pair[0], :].copy() muco_joint_valid[pair[0], :], muco_joint_valid[ pair[1], :] = muco_joint_valid[pair[1], :].copy( ), muco_joint_valid[pair[0], :].copy() muco_joint_img_xy1 = np.concatenate( (muco_joint_img[:, :2], np.ones_like(muco_joint_img[:, :1])), 1) muco_joint_img[:, :2] = np.dot(img2bb_trans, muco_joint_img_xy1.transpose( 1, 0)).transpose(1, 0) muco_joint_img[:, 0] = muco_joint_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] muco_joint_img[:, 1] = muco_joint_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] muco_joint_img[:, 2] = muco_joint_img[:, 2] - muco_joint_img[ self.muco_root_joint_idx][2] # root-relative muco_joint_img[:, 2] = ( muco_joint_img[:, 2] / (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[ 0] # change cfg.bbox_3d_size from meter to milimeter # check truncation muco_joint_trunc = muco_joint_valid * ((muco_joint_img[:,0] >= 0) * (muco_joint_img[:,0] < cfg.output_hm_shape[2]) * \ (muco_joint_img[:,1] >= 0) * (muco_joint_img[:,1] < cfg.output_hm_shape[1]) * \ (muco_joint_img[:,2] >= 0) * (muco_joint_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # transform muco joints to target db joints muco_joint_img = transform_joint_to_other_db(muco_joint_img, self.muco_joints_name, self.joints_name) muco_joint_cam = transform_joint_to_other_db(muco_joint_cam, self.muco_joints_name, self.joints_name) muco_joint_valid = transform_joint_to_other_db(muco_joint_valid, self.muco_joints_name, self.joints_name) muco_joint_trunc = transform_joint_to_other_db(muco_joint_trunc, self.muco_joints_name, self.joints_name) if smpl_param is not None: # smpl coordinates smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord( smpl_param, cam_param, do_flip, img_shape) smpl_coord_cam = np.concatenate((smpl_mesh_cam, smpl_joint_cam)) focal, princpt = cam_param['focal'], cam_param['princpt'] smpl_coord_img = cam2pixel(smpl_coord_cam, focal, princpt) # affine transform x,y coordinates. root-relative depth smpl_coord_img_xy1 = np.concatenate( (smpl_coord_img[:, :2], np.ones_like(smpl_coord_img[:, :1])), 1) smpl_coord_img[:, :2] = np.dot(img2bb_trans, smpl_coord_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[ self.vertex_num + self.root_joint_idx][2] smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] smpl_coord_img[:, 2] = ( smpl_coord_img[:, 2] / (cfg.bbox_3d_size * 1000 / 2) + 1) / 2. * cfg.output_hm_shape[ 0] # change cfg.bbox_3d_size from meter to milimeter # check truncation smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates smpl_mesh_img = smpl_coord_img[:self.vertex_num] smpl_joint_img = smpl_coord_img[self.vertex_num:] smpl_mesh_trunc = smpl_trunc[:self.vertex_num] smpl_joint_trunc = smpl_trunc[self.vertex_num:] # if fitted mesh is too far from muco gt, discard it is_valid_fit = True error = self.get_fitting_error(data['joint_cam'], smpl_mesh_cam, do_flip) if error > self.fitting_thr: is_valid_fit = False else: smpl_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_mesh_img = np.zeros((self.vertex_num, 3), dtype=np.float32) # dummy smpl_pose = np.zeros((72), dtype=np.float32) # dummy smpl_shape = np.zeros((10), dtype=np.float32) # dummy smpl_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) # dummy smpl_mesh_trunc = np.zeros((self.vertex_num, 1), dtype=np.float32) # dummy is_valid_fit = False # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # muco coordinate muco_joint_cam = np.dot(rot_aug_mat, muco_joint_cam.transpose( 1, 0)).transpose(1, 0) / 1000 # milimeter to meter # parameter smpl_pose = smpl_pose.reshape(-1, 3) root_pose = smpl_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) smpl_pose[self.root_joint_idx] = root_pose.reshape(3) smpl_pose = smpl_pose.reshape(-1) # smpl coordinate smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[self.root_joint_idx, None] # root-relative smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose( 1, 0)).transpose(1, 0) / 1000 # milimeter to meter inputs = {'img': img} targets = { 'orig_joint_img': muco_joint_img, 'fit_joint_img': smpl_joint_img, 'fit_mesh_img': smpl_mesh_img, 'orig_joint_cam': muco_joint_cam, 'fit_joint_cam': smpl_joint_cam, 'pose_param': smpl_pose, 'shape_param': smpl_shape } meta_info = { 'orig_joint_valid': muco_joint_valid, 'orig_joint_trunc': muco_joint_trunc, 'fit_joint_trunc': smpl_joint_trunc, 'fit_mesh_trunc': smpl_mesh_trunc, 'is_valid_fit': float(is_valid_fit), 'is_3D': float(True) } return inputs, targets, meta_info
def __getitem__(self, idx): data = copy.deepcopy(self.datalist[idx]) img_path, img_shape, bbox = data['img_path'], data['img_shape'], data[ 'bbox'] # image load and affine transform img = load_img(img_path) img, img2bb_trans, bb2img_trans, rot, do_flip = augmentation( img, bbox, self.data_split) img = self.transform(img.astype(np.float32)) / 255. if self.data_split == 'train': # coco gt coco_joint_img = data['joint_img'] coco_joint_valid = data['joint_valid'] if do_flip: coco_joint_img[:, 0] = img_shape[1] - 1 - coco_joint_img[:, 0] for pair in self.coco_flip_pairs: coco_joint_img[pair[0], :], coco_joint_img[ pair[1], :] = coco_joint_img[pair[1], :].copy( ), coco_joint_img[pair[0], :].copy() coco_joint_valid[pair[0], :], coco_joint_valid[ pair[1], :] = coco_joint_valid[pair[1], :].copy( ), coco_joint_valid[pair[0], :].copy() coco_joint_img_xy1 = np.concatenate( (coco_joint_img[:, :2], np.ones_like(coco_joint_img[:, :1])), 1) coco_joint_img[:, :2] = np.dot(img2bb_trans, coco_joint_img_xy1.transpose( 1, 0)).transpose(1, 0) coco_joint_img[:, 0] = coco_joint_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] coco_joint_img[:, 1] = coco_joint_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] # backup for calculating fitting error _coco_joint_img = coco_joint_img.copy() _coco_joint_valid = coco_joint_valid.copy() # check truncation coco_joint_trunc = coco_joint_valid * ((coco_joint_img[:,0] >= 0) * (coco_joint_img[:,0] < cfg.output_hm_shape[2]) * \ (coco_joint_img[:,1] >= 0) * (coco_joint_img[:,1] < cfg.output_hm_shape[1])).reshape(-1,1).astype(np.float32) # transform coco joints to target db joints coco_joint_img = transform_joint_to_other_db( coco_joint_img, self.coco_joints_name, self.joints_name) coco_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy coco_joint_valid = transform_joint_to_other_db( coco_joint_valid, self.coco_joints_name, self.joints_name) coco_joint_trunc = transform_joint_to_other_db( coco_joint_trunc, self.coco_joints_name, self.joints_name) smplify_result = data['smplify_result'] if smplify_result is not None: # use fitted mesh smpl_param, cam_param = smplify_result[ 'smpl_param'], smplify_result['cam_param'] smpl_mesh_cam, smpl_joint_cam, smpl_pose, smpl_shape = self.get_smpl_coord( smpl_param, cam_param, do_flip, img_shape) smpl_coord_cam = np.concatenate( (smpl_mesh_cam, smpl_joint_cam)) smpl_coord_img = cam2pixel(smpl_coord_cam, cam_param['focal'], cam_param['princpt']) # x,y affine transform, root-relative depth smpl_coord_img_xy1 = np.concatenate( (smpl_coord_img[:, :2], np.ones_like( smpl_coord_img[:, 0:1])), 1) smpl_coord_img[:, :2] = np.dot( img2bb_trans, smpl_coord_img_xy1.transpose(1, 0)).transpose(1, 0)[:, :2] smpl_coord_img[:, 2] = smpl_coord_img[:, 2] - smpl_coord_cam[ self.vertex_num + self.root_joint_idx][2] smpl_coord_img[:, 0] = smpl_coord_img[:, 0] / cfg.input_img_shape[ 1] * cfg.output_hm_shape[2] smpl_coord_img[:, 1] = smpl_coord_img[:, 1] / cfg.input_img_shape[ 0] * cfg.output_hm_shape[1] smpl_coord_img[:, 2] = (smpl_coord_img[:, 2] / (cfg.bbox_3d_size / 2) + 1) / 2. * cfg.output_hm_shape[0] # check truncation smpl_trunc = ((smpl_coord_img[:,0] >= 0) * (smpl_coord_img[:,0] < cfg.output_hm_shape[2]) * \ (smpl_coord_img[:,1] >= 0) * (smpl_coord_img[:,1] < cfg.output_hm_shape[1]) * \ (smpl_coord_img[:,2] >= 0) * (smpl_coord_img[:,2] < cfg.output_hm_shape[0])).reshape(-1,1).astype(np.float32) # split mesh and joint coordinates smpl_mesh_img = smpl_coord_img[:self.vertex_num] smpl_joint_img = smpl_coord_img[self.vertex_num:] smpl_mesh_trunc = smpl_trunc[:self.vertex_num] smpl_joint_trunc = smpl_trunc[self.vertex_num:] # if fitted mesh is too far from h36m gt, discard it is_valid_fit = True error = self.get_fitting_error(_coco_joint_img, smpl_mesh_cam, cam_param, img2bb_trans, _coco_joint_valid) if error > self.fitting_thr: is_valid_fit = False else: smpl_joint_img = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_joint_cam = np.zeros((self.joint_num, 3), dtype=np.float32) # dummy smpl_mesh_img = np.zeros((self.vertex_num, 3), dtype=np.float32) # dummy smpl_pose = np.zeros((72), dtype=np.float32) # dummy smpl_shape = np.zeros((10), dtype=np.float32) # dummy smpl_joint_trunc = np.zeros((self.joint_num, 1), dtype=np.float32) smpl_mesh_trunc = np.zeros((self.vertex_num, 1), dtype=np.float32) is_valid_fit = False # 3D data rotation augmentation rot_aug_mat = np.array( [[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], [0, 0, 1]], dtype=np.float32) # parameter smpl_pose = smpl_pose.reshape(-1, 3) root_pose = smpl_pose[self.root_joint_idx, :] root_pose, _ = cv2.Rodrigues(root_pose) root_pose, _ = cv2.Rodrigues(np.dot(rot_aug_mat, root_pose)) smpl_pose[self.root_joint_idx] = root_pose.reshape(3) smpl_pose = smpl_pose.reshape(-1) # smpl coordinate smpl_joint_cam = smpl_joint_cam - smpl_joint_cam[ self.root_joint_idx, None] # root-relative smpl_joint_cam = np.dot(rot_aug_mat, smpl_joint_cam.transpose( 1, 0)).transpose(1, 0) inputs = {'img': img} targets = { 'orig_joint_img': coco_joint_img, 'fit_joint_img': smpl_joint_img, 'fit_mesh_img': smpl_mesh_img, 'orig_joint_cam': coco_joint_cam, 'fit_joint_cam': smpl_joint_cam, 'pose_param': smpl_pose, 'shape_param': smpl_shape } meta_info = { 'orig_joint_valid': coco_joint_valid, 'orig_joint_trunc': coco_joint_trunc, 'fit_joint_trunc': smpl_joint_trunc, 'fit_mesh_trunc': smpl_mesh_trunc, 'is_valid_fit': float(is_valid_fit), 'is_3D': float(False) } return inputs, targets, meta_info else: inputs = {'img': img} targets = {} meta_info = {'bb2img_trans': bb2img_trans} return inputs, targets, meta_info
def __getitem__(self, idx): frame = self.framelist[idx] seq_name, cam, frame_idx, joint = frame['seq_name'], frame[ 'cam'], frame['frame_idx'], frame['joint'] joint_coord, joint_valid = joint['world_coord'], joint['valid'] # input data # bbox calculate bbox = get_bbox(joint_coord, joint_valid, self.camrot[cam], self.campos[cam], self.focal[cam], self.princpt[cam]) xmin, ymin, xmax, ymax = bbox xmin = max(xmin, 0) ymin = max(ymin, 0) xmax = min(xmax, self.original_img_shape[1] - 1) ymax = min(ymax, self.original_img_shape[0] - 1) bbox = np.array([xmin, ymin, xmax, ymax]) # image read img_path = osp.join(self.root_path, seq_name, 'images', 'cam' + cam, 'image' + "{:04d}".format(frame_idx) + '.png') img = load_img(img_path) xmin, ymin, xmax, ymax = bbox xmin, xmax = np.array([xmin, xmax ]) / self.original_img_shape[1] * img.shape[1] ymin, ymax = np.array([ymin, ymax ]) / self.original_img_shape[0] * img.shape[0] bbox_img = np.array([xmin, ymin, xmax - xmin + 1, ymax - ymin + 1]) img = generate_patch_image(img, bbox_img, False, 1.0, 0.0, cfg.input_img_shape) input_img = self.transform(img) / 255. target_depthmaps = [] cam_params = [] affine_transes = [] for cam in random.sample(self.selected_cameras, cfg.render_view_num): # bbox calculate bbox = get_bbox(joint_coord, joint_valid, self.camrot[cam], self.campos[cam], self.focal[cam], self.princpt[cam]) xmin, ymin, xmax, ymax = bbox xmin = max(xmin, 0) ymin = max(ymin, 0) xmax = min(xmax, self.original_img_shape[1] - 1) ymax = min(ymax, self.original_img_shape[0] - 1) bbox = np.array([xmin, ymin, xmax, ymax]) # depthmap read depthmap_path = osp.join(self.depthmap_root_path, "{:06d}".format(frame_idx), 'depthmap' + cam + '.pkl') with open(depthmap_path, 'rb') as f: depthmap = pickle.load(f).astype(np.float32) xmin, ymin, xmax, ymax = bbox xmin, xmax = np.array( [xmin, xmax]) / self.original_img_shape[1] * depthmap.shape[1] ymin, ymax = np.array( [ymin, ymax]) / self.original_img_shape[0] * depthmap.shape[0] bbox_depthmap = np.array( [xmin, ymin, xmax - xmin + 1, ymax - ymin + 1]) depthmap = generate_patch_image(depthmap[:, :, None], bbox_depthmap, False, 1.0, 0.0, cfg.rendered_img_shape) target_depthmaps.append(self.transform(depthmap)) xmin, ymin, xmax, ymax = bbox affine_transes.append( gen_trans_from_patch_cv( (xmin + xmax + 1) / 2., (ymin + ymax + 1) / 2., xmax - xmin + 1, ymax - ymin + 1, cfg.rendered_img_shape[1], cfg.rendered_img_shape[0], 1.0, 0.0).astype(np.float32)) cam_params.append({ 'camrot': self.camrot[cam], 'campos': self.campos[cam], 'focal': self.focal[cam], 'princpt': self.princpt[cam] }) inputs = {'img': input_img} targets = {'depthmap': target_depthmaps, 'joint': joint} meta_info = {'cam_param': cam_params, 'affine_trans': affine_transes} return inputs, targets, meta_info