def visSMPLoutput_bboxSpace(smpl, pred_output, image=None, bUseSMPLX=False, waittime=-1, winsizeScale=4, color=None): """ From prediction output, obtain smpl mesh and joint TODO: Currently just assume single batch Input: pred_output['pred_shape'] pred_output['pred_rotmat'] or pred_output['pred_pose'] pred_output['pred_camera'] if waittime <0, do not call glViwer.show() Example: visSMPLoutput(self.smpl, {"pred_rotmat":pred_rotmat, "pred_shape":pred_betas, "pred_camera":pred_camera }, image = images[0]) """ smpl_output, smpl_output_bbox = getSMPLoutput_bboxSpace( smpl, pred_output, bUseSMPLX) if color is not None: smpl_output_bbox['body_mesh']['color'] = color glViewer.setMeshData([smpl_output_bbox['body_mesh']], bComputeNormal=True) glViewer.setSkeleton( [np.reshape(smpl_output_bbox['body_joints'], (-1, 1))], colorRGB=glViewer.g_colorSet['spin']) if image is not None: if type(image) == torch.Tensor: image = denormImg(image) smpl_output_bbox['img'] = image glViewer.setBackgroundTexture(image) glViewer.setWindowSize(image.shape[1] * winsizeScale, image.shape[0] * winsizeScale) glViewer.SetOrthoCamera(True) if waittime >= 0: glViewer.show(waittime) return smpl_output, smpl_output_bbox
def _visualize_gui_naive(self, meshList, skelList=None, body_bbox_list=None, img_original=None, normal_compute=True): """ args: meshList: list of {'ver': pred_vertices, 'f': smpl.faces} skelList: list of [JointNum*3, 1] (where 1 means num. of frames in glviewer) bbr_list: list of [x,y,w,h] """ if body_bbox_list is not None: for bbr in body_bbox_list: viewer2D.Vis_Bbox(img_original, bbr) # viewer2D.ImShow(img_original) glViewer.setWindowSize(img_original.shape[1], img_original.shape[0]) # glViewer.setRenderOutputSize(inputImg.shape[1],inputImg.shape[0]) glViewer.setBackgroundTexture(img_original) glViewer.SetOrthoCamera(True) glViewer.setMeshData(meshList, bComputeNormal= normal_compute) # meshes = {'ver': pred_vertices, 'f': smplWrapper.f} if skelList is not None: glViewer.setSkeleton(skelList) if True: #Save to File if True: #Cam view rendering # glViewer.setSaveFolderName(overlaidImageFolder) glViewer.setNearPlane(50) glViewer.setWindowSize(img_original.shape[1], img_original.shape[0]) # glViewer.show_SMPL(bSaveToFile = True, bResetSaveImgCnt = False, countImg = False, mode = 'camera') glViewer.show(1) if False: #Side view rendering # glViewer.setSaveFolderName(sideImageFolder) glViewer.setNearPlane(50) glViewer.setWindowSize(img_original.shape[1], img_original.shape[0]) glViewer.show_SMPL(bSaveToFile = True, bResetSaveImgCnt = False, countImg = True, zoom=1108, mode = 'youtube')
def train_step(self, input_batch): self.model.train() # Get data from the batch images = input_batch['img'] # input image gt_keypoints_2d = input_batch[ 'keypoints'] # 2D keypoints #[N,49,3] gt_pose = input_batch[ 'pose'] # SMPL pose parameters #[N,72] gt_betas = input_batch[ 'betas'] # SMPL beta parameters #[N,10] gt_joints = input_batch[ 'pose_3d'] # 3D pose #[N,24,4] has_smpl = input_batch['has_smpl'].byte( ) == 1 # flag that indicates whether SMPL parameters are valid has_pose_3d = input_batch['has_pose_3d'].byte( ) == 1 # flag that indicates whether 3D pose is valid is_flipped = input_batch[ 'is_flipped'] # flag that indicates whether image was flipped during data augmentation rot_angle = input_batch[ 'rot_angle'] # rotation angle used for data augmentation dataset_name = input_batch[ 'dataset_name'] # name of the dataset the image comes from indices = input_batch[ 'sample_index'] # index of example inside its dataset batch_size = images.shape[0] #Debug temporary scaling for h36m # Get GT vertices and model joints # Note that gt_model_joints is different from gt_joints as it comes from SMPL gt_out = self.smpl(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]) gt_model_joints = gt_out.joints.detach() #[N, 49, 3] gt_vertices = gt_out.vertices # else: # gt_out = self.smpl(betas=gt_betas, body_pose=gt_pose[:,3:-6], global_orient=gt_pose[:,:3]) # gt_model_joints = gt_out.joints.detach() #[N, 49, 3] # gt_vertices = gt_out.vertices # Get current best fits from the dictionary opt_pose, opt_betas, opt_validity = self.fits_dict[(dataset_name, indices.cpu(), rot_angle.cpu(), is_flipped.cpu())] opt_pose = opt_pose.to(self.device) opt_betas = opt_betas.to(self.device) # if g_smplx == False: opt_output = self.smpl(betas=opt_betas, body_pose=opt_pose[:, 3:], global_orient=opt_pose[:, :3]) opt_vertices = opt_output.vertices opt_joints = opt_output.joints.detach() # else: # opt_output = self.smpl(betas=opt_betas, body_pose=opt_pose[:,3:-6], global_orient=opt_pose[:,:3]) # opt_vertices = opt_output.vertices # opt_joints = opt_output.joints.detach() #assuer that non valid opt has GT values if len(has_smpl[opt_validity == 0]) > 0: assert min(has_smpl[opt_validity == 0]) #All should be True #assuer that non valid opt has GT values if len(has_smpl[opt_validity == 0]) > 0: assert min(has_smpl[opt_validity == 0]) #All should be True # De-normalize 2D keypoints from [-1,1] to pixel space gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * self.options.img_res * ( gt_keypoints_2d_orig[:, :, :-1] + 1) # Estimate camera translation given the model joints and 2D keypoints # by minimizing a weighted least squares loss gt_cam_t = estimate_translation(gt_model_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) opt_cam_t = estimate_translation(opt_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) opt_joint_loss = self.smplify.get_fitting_loss( opt_pose, opt_betas, opt_cam_t, #opt_pose (N,72) (N,10) opt_cam_t: (N,3) 0.5 * self.options.img_res * torch.ones(batch_size, 2, device=self.device), #(N,2) (112, 112) gt_keypoints_2d_orig).mean(dim=-1) # Feed images in the network to predict camera and SMPL parameters pred_rotmat, pred_betas, pred_camera = self.model(images) # if g_smplx == False: #Original pred_output = self.smpl(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) # else: # pred_output = self.smpl(betas=pred_betas, body_pose=pred_rotmat[:,1:-2], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices pred_joints = pred_output.joints # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size # This camera translation can be used in a full perspective projection pred_cam_t = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * self.focal_length / (self.options.img_res * pred_camera[:, 0] + 1e-9) ], dim=-1) camera_center = torch.zeros(batch_size, 2, device=self.device) pred_keypoints_2d = perspective_projection( pred_joints, rotation=torch.eye(3, device=self.device).unsqueeze(0).expand( batch_size, -1, -1), translation=pred_cam_t, focal_length=self.focal_length, camera_center=camera_center) # Normalize keypoints to [-1,1] pred_keypoints_2d = pred_keypoints_2d / (self.options.img_res / 2.) #Weak Projection if self.options.bUseWeakProj: pred_keypoints_2d = weakProjection_gpu(pred_joints, pred_camera[:, 0], pred_camera[:, 1:]) #N, 49, 2 bFootOriLoss = False if bFootOriLoss: #Ignore hips and hip centers, foot # LENGTH_THRESHOLD = 0.0089 #1/112.0 #at least it should be 5 pixel #Disable parts gt_keypoints_2d[:, 2 + 25, 2] = 0 gt_keypoints_2d[:, 3 + 25, 2] = 0 gt_keypoints_2d[:, 14 + 25, 2] = 0 #Disable Foots gt_keypoints_2d[:, 5 + 25, 2] = 0 #Left foot gt_keypoints_2d[:, 0 + 25, 2] = 0 #Right foot if self.options.run_smplify: # Convert predicted rotation matrices to axis-angle pred_rotmat_hom = torch.cat([ pred_rotmat.detach().view(-1, 3, 3).detach(), torch.tensor( [0, 0, 1], dtype=torch.float32, device=self.device).view( 1, 3, 1).expand(batch_size * 24, -1, -1) ], dim=-1) pred_pose = rotation_matrix_to_angle_axis( pred_rotmat_hom).contiguous().view(batch_size, -1) # tgm.rotation_matrix_to_angle_axis returns NaN for 0 rotation, so manually hack it pred_pose[torch.isnan(pred_pose)] = 0.0 # Run SMPLify optimization starting from the network prediction new_opt_vertices, new_opt_joints,\ new_opt_pose, new_opt_betas,\ new_opt_cam_t, new_opt_joint_loss = self.smplify( pred_pose.detach(), pred_betas.detach(), pred_cam_t.detach(), 0.5 * self.options.img_res * torch.ones(batch_size, 2, device=self.device), gt_keypoints_2d_orig) new_opt_joint_loss = new_opt_joint_loss.mean(dim=-1) # Will update the dictionary for the examples where the new loss is less than the current one update = (new_opt_joint_loss < opt_joint_loss) # print("new_opt_joint_loss{} vs opt_joint_loss{}".format(new_opt_joint_loss)) if True: #Visualize opt for b in range(batch_size): curImgVis = images[b] #3,224,224 curImgVis = self.de_normalize_img(curImgVis).cpu().numpy() curImgVis = np.transpose(curImgVis, (1, 2, 0)) * 255.0 curImgVis = curImgVis[:, :, [2, 1, 0]] #Denormalize image curImgVis = np.ascontiguousarray(curImgVis, dtype=np.uint8) viewer2D.ImShow(curImgVis, name='rawIm') originalImg = curImgVis.copy() pred_camera_vis = pred_camera.detach().cpu().numpy() opt_vert_vis = opt_vertices[b].detach().cpu().numpy() opt_vert_vis *= pred_camera_vis[b, 0] opt_vert_vis[:, 0] += pred_camera_vis[ b, 1] #no need +1 (or 112). Rendernig has this offset already opt_vert_vis[:, 1] += pred_camera_vis[ b, 2] #no need +1 (or 112). Rendernig has this offset already opt_vert_vis *= 112 opt_meshes = {'ver': opt_vert_vis, 'f': self.smpl.faces} gt_vert_vis = gt_vertices[b].detach().cpu().numpy() gt_vert_vis *= pred_camera_vis[b, 0] gt_vert_vis[:, 0] += pred_camera_vis[ b, 1] #no need +1 (or 112). Rendernig has this offset already gt_vert_vis[:, 1] += pred_camera_vis[ b, 2] #no need +1 (or 112). Rendernig has this offset already gt_vert_vis *= 112 gt_meshes = {'ver': gt_vert_vis, 'f': self.smpl.faces} new_opt_output = self.smpl( betas=new_opt_betas, body_pose=new_opt_pose[:, 3:], global_orient=new_opt_pose[:, :3]) new_opt_vertices = new_opt_output.vertices new_opt_joints = new_opt_output.joints new_opt_vert_vis = new_opt_vertices[b].detach().cpu( ).numpy() new_opt_vert_vis *= pred_camera_vis[b, 0] new_opt_vert_vis[:, 0] += pred_camera_vis[ b, 1] #no need +1 (or 112). Rendernig has this offset already new_opt_vert_vis[:, 1] += pred_camera_vis[ b, 2] #no need +1 (or 112). Rendernig has this offset already new_opt_vert_vis *= 112 new_opt_meshes = { 'ver': new_opt_vert_vis, 'f': self.smpl.faces } glViewer.setMeshData( [new_opt_meshes, gt_meshes, new_opt_meshes], bComputeNormal=True) glViewer.setBackgroundTexture(originalImg) glViewer.setWindowSize(curImgVis.shape[1], curImgVis.shape[0]) glViewer.SetOrthoCamera(True) print(has_smpl[b]) glViewer.show() opt_joint_loss[update] = new_opt_joint_loss[update] opt_vertices[update, :] = new_opt_vertices[update, :] opt_joints[update, :] = new_opt_joints[update, :] opt_pose[update, :] = new_opt_pose[update, :] opt_betas[update, :] = new_opt_betas[update, :] opt_cam_t[update, :] = new_opt_cam_t[update, :] self.fits_dict[(dataset_name, indices.cpu(), rot_angle.cpu(), is_flipped.cpu(), update.cpu())] = (opt_pose.cpu(), opt_betas.cpu()) else: update = torch.zeros(batch_size, device=self.device).byte() # Replace the optimized parameters with the ground truth parameters, if available opt_vertices[has_smpl, :, :] = gt_vertices[has_smpl, :, :] opt_cam_t[has_smpl, :] = gt_cam_t[has_smpl, :] opt_joints[has_smpl, :, :] = gt_model_joints[has_smpl, :, :] opt_pose[has_smpl, :] = gt_pose[has_smpl, :] opt_betas[has_smpl, :] = gt_betas[has_smpl, :] # Assert whether a fit is valid by comparing the joint loss with the threshold valid_fit = (opt_joint_loss < self.options.smplify_threshold).to( self.device) if self.options.ablation_no_pseudoGT: valid_fit[:] = False #Disable all pseudoGT # Add the examples with GT parameters to the list of valid fits valid_fit = valid_fit | has_smpl # if len(valid_fit) > sum(valid_fit): # print(">> Rejected fit: {}/{}".format(len(valid_fit) - sum(valid_fit), len(valid_fit) )) opt_keypoints_2d = perspective_projection( opt_joints, rotation=torch.eye(3, device=self.device).unsqueeze(0).expand( batch_size, -1, -1), translation=opt_cam_t, focal_length=self.focal_length, camera_center=camera_center) opt_keypoints_2d = opt_keypoints_2d / (self.options.img_res / 2.) # Compute loss on SMPL parameters loss_regr_pose, loss_regr_betas = self.smpl_losses( pred_rotmat, pred_betas, opt_pose, opt_betas, valid_fit) # Compute 2D reprojection loss for the keypoints loss_keypoints = self.keypoint_loss(pred_keypoints_2d, gt_keypoints_2d, self.options.openpose_train_weight, self.options.gt_train_weight) # Compute 3D keypoint loss loss_keypoints_3d = self.keypoint_3d_loss(pred_joints, gt_joints, has_pose_3d) # Per-vertex loss for the shape loss_shape = self.shape_loss(pred_vertices, opt_vertices, valid_fit) #Regularization term for shape loss_regr_betas_noReject = torch.mean(pred_betas**2) # Compute total loss # The last component is a loss that forces the network to predict positive depth values if self.options.ablation_loss_2dkeyonly: #2D keypoint only loss = self.options.keypoint_loss_weight * loss_keypoints +\ ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean() +\ self.options.beta_loss_weight * loss_regr_betas_noReject #Beta regularization elif self.options.ablation_loss_noSMPLloss: #2D no Pose parameter loss = self.options.keypoint_loss_weight * loss_keypoints +\ self.options.keypoint_loss_weight * loss_keypoints_3d +\ ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean() +\ self.options.beta_loss_weight * loss_regr_betas_noReject #Beta regularization else: loss = self.options.shape_loss_weight * loss_shape +\ self.options.keypoint_loss_weight * loss_keypoints +\ self.options.keypoint_loss_weight * loss_keypoints_3d +\ loss_regr_pose + self.options.beta_loss_weight * loss_regr_betas +\ ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean() # loss = self.options.keypoint_loss_weight * loss_keypoints #Debug: 2d error only # print("DEBUG: 2donly loss") loss *= 60 # Do backprop self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Pack output arguments for tensorboard logging output = { 'pred_vertices': pred_vertices.detach(), 'opt_vertices': opt_vertices, 'pred_cam_t': pred_cam_t.detach(), 'opt_cam_t': opt_cam_t } losses = { 'loss': loss.detach().item(), 'loss_keypoints': loss_keypoints.detach().item(), 'loss_keypoints_3d': loss_keypoints_3d.detach().item(), 'loss_regr_pose': loss_regr_pose.detach().item(), 'loss_regr_betas': loss_regr_betas.detach().item(), 'loss_shape': loss_shape.detach().item() } if self.options.bDebug_visEFT: #g_debugVisualize: #Debug Visualize input for b in range(batch_size): #denormalizeImg curImgVis = images[b] #3,224,224 curImgVis = self.de_normalize_img(curImgVis).cpu().numpy() curImgVis = np.transpose(curImgVis, (1, 2, 0)) * 255.0 curImgVis = curImgVis[:, :, [2, 1, 0]] #Denormalize image curImgVis = np.ascontiguousarray(curImgVis, dtype=np.uint8) viewer2D.ImShow(curImgVis, name='rawIm') originalImg = curImgVis.copy() # curImgVis = viewer2D.Vis_Skeleton_2D_general(gt_keypoints_2d_orig[b,:,:2].cpu().numpy(), gt_keypoints_2d_orig[b,:,2], bVis= False, image=curImgVis) pred_keypoints_2d_vis = pred_keypoints_2d[ b, :, :2].detach().cpu().numpy() pred_keypoints_2d_vis = 0.5 * self.options.img_res * ( pred_keypoints_2d_vis + 1) #49: (25+24) x 3 curImgVis = viewer2D.Vis_Skeleton_2D_general( pred_keypoints_2d_vis, bVis=False, image=curImgVis) viewer2D.ImShow(curImgVis, scale=2.0, waitTime=1) #Get camera pred_params pred_camera_vis = pred_camera.detach().cpu().numpy() ############### Visualize Mesh ############### pred_vert_vis = pred_vertices[b].detach().cpu().numpy() # meshVertVis = gt_vertices[b].detach().cpu().numpy() # meshVertVis = meshVertVis-pelvis #centering pred_vert_vis *= pred_camera_vis[b, 0] pred_vert_vis[:, 0] += pred_camera_vis[ b, 1] #no need +1 (or 112). Rendernig has this offset already pred_vert_vis[:, 1] += pred_camera_vis[ b, 2] #no need +1 (or 112). Rendernig has this offset already pred_vert_vis *= 112 pred_meshes = {'ver': pred_vert_vis, 'f': self.smpl.faces} opt_vert_vis = opt_vertices[b].detach().cpu().numpy() opt_vert_vis *= pred_camera_vis[b, 0] opt_vert_vis[:, 0] += pred_camera_vis[ b, 1] #no need +1 (or 112). Rendernig has this offset already opt_vert_vis[:, 1] += pred_camera_vis[ b, 2] #no need +1 (or 112). Rendernig has this offset already opt_vert_vis *= 112 opt_meshes = {'ver': opt_vert_vis, 'f': self.smpl.faces} # glViewer.setMeshData([pred_meshes, opt_meshes], bComputeNormal= True) glViewer.setMeshData([pred_meshes, opt_meshes], bComputeNormal=True) # glViewer.setMeshData([opt_meshes], bComputeNormal= True) ############### Visualize Skeletons ############### #Vis pred-SMPL joint pred_joints_vis = pred_joints[ b, :, :3].detach().cpu().numpy() #[N,49,3] pred_joints_vis = pred_joints_vis.ravel()[:, np.newaxis] #Weak-perspective projection pred_joints_vis *= pred_camera_vis[b, 0] pred_joints_vis[::3] += pred_camera_vis[b, 1] pred_joints_vis[1::3] += pred_camera_vis[b, 2] pred_joints_vis *= 112 #112 == 0.5*224 glViewer.setSkeleton([pred_joints_vis]) # #GT joint gt_jointsVis = gt_joints[b, :, :3].cpu().numpy() #[N,49,3] # gt_pelvis = (gt_smpljointsVis[ 25+2,:] + gt_smpljointsVis[ 25+3,:]) / 2 # gt_smpljointsVis = gt_smpljointsVis- gt_pelvis gt_jointsVis = gt_jointsVis.ravel()[:, np.newaxis] gt_jointsVis *= pred_camera_vis[b, 0] gt_jointsVis[::3] += pred_camera_vis[b, 1] gt_jointsVis[1::3] += pred_camera_vis[b, 2] gt_jointsVis *= 112 glViewer.addSkeleton([gt_jointsVis], jointType='spin') # #Vis SMPL's Skeleton # gt_smpljointsVis = gt_model_joints[b,:,:3].cpu().numpy() #[N,49,3] # # gt_pelvis = (gt_smpljointsVis[ 25+2,:] + gt_smpljointsVis[ 25+3,:]) / 2 # # gt_smpljointsVis = gt_smpljointsVis- gt_pelvis # gt_smpljointsVis = gt_smpljointsVis.ravel()[:,np.newaxis] # gt_smpljointsVis*=pred_camera_vis[b,0] # gt_smpljointsVis[::3] += pred_camera_vis[b,1] # gt_smpljointsVis[1::3] += pred_camera_vis[b,2] # gt_smpljointsVis*=112 # glViewer.addSkeleton( [gt_smpljointsVis]) # #Vis GT joint (not model (SMPL) joint!!) # if has_pose_3d[b]: # gt_jointsVis = gt_model_joints[b,:,:3].cpu().numpy() #[N,49,3] # # gt_jointsVis = gt_joints[b,:,:3].cpu().numpy() #[N,49,3] # # gt_pelvis = (gt_jointsVis[ 25+2,:] + gt_jointsVis[ 25+3,:]) / 2 # # gt_jointsVis = gt_jointsVis- gt_pelvis # gt_jointsVis = gt_jointsVis.ravel()[:,np.newaxis] # gt_jointsVis*=pred_camera_vis[b,0] # gt_jointsVis[::3] += pred_camera_vis[b,1] # gt_jointsVis[1::3] += pred_camera_vis[b,2] # gt_jointsVis*=112 # glViewer.addSkeleton( [gt_jointsVis]) # # glViewer.show() glViewer.setBackgroundTexture(originalImg) glViewer.setWindowSize(curImgVis.shape[1], curImgVis.shape[0]) glViewer.SetOrthoCamera(True) glViewer.show(0) # continue return output, losses
def LoadAllH36mdata_wSMPL_perSeq(out_path): # data_dir = '/home/hjoo/data/h36m-fetch/human36m_50fps/' # data_dir = '/home/hjoo/data/h36m-fetch/human36m_10fps/' list_skel2Ds_h36m = [] list_skel3Ds_h36m = [] list_smplPose = [] list_smplShape = [] list_openpose = [] list_imgNames = [] list_scale = [] list_center = [] # list_joint2d_spin24 = [] # list_joint3d_spin24 = [] TRAIN_SUBJECTS = [1, 5, 6, 7, 8] actionList = [ "Directions", "Discussion", "Eating", "Greeting", "Phoning", "Photo", "Posing", "Purchases", "Sitting", "SittingDown", "Smoking", "Waiting", "WalkDog", "Walking", "WalkTogether" ] subjectList = TRAIN_SUBJECTS for subId in subjectList: for action in actionList: gtPathList = sorted( glob.glob('{}/S{}/{}_*/*/gt_poses_coco_smpl.pkl'.format( h36mraw_dir, subId, action))) print("S{} - {}: {} files".format(subId, action, len(gtPathList))) for gtPath in gtPathList: with open(gtPath, 'rb') as f: gt_data = pickle.load(f, encoding='latin1') #Get Image List imgDir = os.path.dirname(gtPath) imgList_original = sorted( glob.glob(os.path.join(imgDir, '*.png'))) folderLeg = len(h36mraw_dir) + 1 imgList = [n[folderLeg:] for n in imgList_original] data2D_h36m = np.array(gt_data['2d']) #List -> (N,17,2) data3D_h36m = np.array(gt_data['3d']) #List -> (N,17,3) data3D_smplParams_pose = np.array( gt_data['smplParms']['poses_camCoord']) #List -> (N,72) data3D_smplParams_shape = np.array( gt_data['smplParms']['betas']) #(10,) N = data3D_smplParams_pose.shape[0] data3D_smplParams_shape = np.repeat( data3D_smplParams_shape[np.newaxis, :], N, axis=0) #List -> (N,10) #Scaling skeleton 3D (currently mm) -> meter data3D_h36m *= 0.001 #optional (centering) data3D_h36m = data3D_h36m - data3D_h36m[:, 0:1, :] scalelist = [] centerlist = [] bboxlist = [] #Generate BBox for i in range(len(data2D_h36m)): min_pt = np.min(data2D_h36m[i], axis=0) max_pt = np.max(data2D_h36m[i], axis=0) bbox = [min_pt[0], min_pt[1], max_pt[0], max_pt[1]] center = [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2] scale = scaleFactor * max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 200 bboxlist.append(bbox) centerlist.append(center) scalelist.append(scale) bDraw = True if bDraw: rawImg = cv2.imread(imgFullPath) # bbox_xyxy = conv_bboxinfo_centerscale_to_bboxXYXY(center, scale) # rawImg = viewer2D.Vis_Bbox_minmaxPt(rawImg,bbox_xyxy[:2], bbox_xyxy[2:]) croppedImg, boxScale_o2n, bboxTopLeft = crop_bboxInfo( rawImg, center, scale, (constants.IMG_RES, constants.IMG_RES)) #Visualize image if False: rawImg = viewer2D.Vis_Skeleton_2D_SPIN49( data['keypoint2d'][0][:, :2], pt2d_visibility=data['keypoint2d'][0][:, 2], image=rawImg) viewer2D.ImShow(rawImg, name='rawImg') viewer2D.ImShow(croppedImg, name='croppedImg') b = 0 ############### Visualize Mesh ############### camParam_scale = pred_camera_vis[b, 0] camParam_trans = pred_camera_vis[b, 1:] pred_vert_vis = ours_vertices[b].copy() pred_vert_vis = convert_smpl_to_bbox( pred_vert_vis, camParam_scale, camParam_trans) #From cropped space to original pred_vert_vis = convert_bbox_to_oriIm( pred_vert_vis, boxScale_o2n, bboxTopLeft, rawImg.shape[1], rawImg.shape[0]) #Generate multi-level BBOx bbox_list = multilvel_bbox_crop_gen(rawImg, pred_vert_vis, center, scale, bDebug=False) if False: pred_meshes = { 'ver': pred_vert_vis, 'f': smpl.faces } glViewer.setMeshData([pred_meshes], bComputeNormal=True) # ################ Visualize Skeletons ############### #Vis pred-SMPL joint pred_joints_vis = ours_joints_3d[ b, :, :3].copy() #(N,3) pred_joints_vis = convert_smpl_to_bbox( pred_joints_vis, camParam_scale, camParam_trans) pred_joints_vis = convert_bbox_to_oriIm( pred_joints_vis, boxScale_o2n, bboxTopLeft, rawImg.shape[1], rawImg.shape[0]) glViewer.setBackgroundTexture(rawImg) glViewer.setWindowSize(rawImg.shape[1], rawImg.shape[0]) glViewer.SetOrthoCamera(True) glViewer.show(1) assert len(imgList) == len(data2D_h36m) assert len(imgList) == len(data3D_h36m) assert len(imgList) == len(data3D_smplParams_pose) assert len(imgList) == len(data3D_smplParams_shape) assert len(imgList) == len(scalelist) assert len(imgList) == len(centerlist) assert len(imgList) == len(bboxlist) list_skel2Ds_h36m.append(data2D_h36m) list_skel3Ds_h36m.append(data3D_h36m) list_smplPose.append(data3D_smplParams_pose) list_smplShape.append(data3D_smplParams_shape) list_imgNames += imgList list_scale += scalelist list_center += centerlist blankopenpose = np.zeros([N, 25, 3]) list_openpose.append(blankopenpose) #Debug 2D Visualize if True: for idx in range(data2D_h36m.shape[0]): img = cv2.imread(imgList_original[idx]) img = viewer2D.Vis_Skeleton_2D_H36m(data2D_h36m[idx], image=img) img = viewer2D.Vis_Bbox_minmaxPt( img, bboxlist[idx][:2], bboxlist[idx][2:]) viewer2D.ImShow(img) #Debug 3D Visualize smpl_coco if False: # data3D_coco_vis = np.reshape(data3D_coco, (data3D_coco.shape[0],-1)).transpose() #(Dim, F) # data3D_coco_vis *=0.1 #mm to cm # glViewer.setSkeleton( [ data3D_coco_vis] ,jointType='smplcoco') # glViewer.show() #Debug 3D Visualize, h36m data3D_h36m_vis = np.reshape( data3D_h36m, (data3D_h36m.shape[0], -1)).transpose() #(Dim, F) data3D_h36m_vis *= 100 #meter to cm # data3D_smpl24 = np.reshape(data3D_smpl24, (data3D_smpl24.shape[0],-1)).transpose() #(Dim, F) # data3D_smpl24 *=0.1 glViewer.setSkeleton([data3D_h36m_vis], jointType='smplcoco') glViewer.show() # break #debug # break #debug list_skel2Ds_h36m = np.vstack( list_skel2Ds_h36m) #List of (N,17,2) -> (NM, 17, 2) list_skel3Ds_h36m = np.vstack( list_skel3Ds_h36m) #List of (N,17,3) -> (NM, 17, 3) list_smplPose = np.vstack(list_smplPose) #List of (N,72) -> (NM, 72) list_smplShape = np.vstack(list_smplShape) #List of (N,10) -> (NM, 10/) list_openpose = np.vstack(list_openpose) #List of (N,10) -> (NM, 10/) assert len(list_imgNames) == list_skel2Ds_h36m.shape[0] assert len(list_imgNames) == list_skel3Ds_h36m.shape[0] assert len(list_imgNames) == list_smplPose.shape[0] assert len(list_imgNames) == list_smplShape.shape[0] assert len(list_imgNames) == list_openpose.shape[0] assert len(list_imgNames) == len(list_scale) assert len(list_imgNames) == len(list_center) #Convert H36M -> SPIN24 # convert joints to global order # h36m_idx = [11, 6, 7, 8, 1, 2, 3, 12, 24, 14, 15, 17, 18, 19, 25, 26, 27] h36m_idx = [0, 4, 5, 6, 1, 2, 3, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16] global_idx = [14, 3, 4, 5, 2, 1, 0, 16, 12, 17, 18, 9, 10, 11, 8, 7, 6] sampleNum = len(list_imgNames) joint2d_spin24 = np.zeros((sampleNum, 24, 3)) joint2d_spin24[:, global_idx, :2] = list_skel2Ds_h36m[:, h36m_idx, :] joint2d_spin24[:, global_idx, 2] = 1 joint3d_spin24 = np.zeros((sampleNum, 24, 4)) joint3d_spin24[:, global_idx, :3] = list_skel3Ds_h36m[:, h36m_idx, :] joint3d_spin24[:, global_idx, 3] = 1 list_has_smpl = np.ones((sampleNum, ), dtype=np.uint8) # store the data struct if not os.path.isdir(out_path): os.makedirs(out_path) out_file = os.path.join(out_path, 'h36m_training_fair_meter.npz') print("output: {}".format(out_file)) np.savez(out_file, imgname=list_imgNames, center=list_center, scale=list_scale, part=joint2d_spin24, pose=list_smplPose, shape=list_smplShape, has_smpl=list_has_smpl, S=joint3d_spin24, openpose=list_openpose)
def visEFT_singleSubject(inputDir, imDir, smplModelDir, bUseSMPLX): if bUseSMPLX: smpl = SMPLX(smplModelDir, batch_size=1, create_transl=False) else: smpl = SMPL(smplModelDir, batch_size=1, create_transl=False) fileList = listdir(inputDir) #Check all fitting files print(">> Found {} files in the fitting folder {}".format(len(fileList), inputDir)) totalCnt =0 erroneousCnt =0 # fileList =['00_00_00008422_0.pkl', '00_00_00008422_1731.pkl', '00_00_00008422_3462.pkl'] #debug for f in sorted(fileList): #Load fileFullPath = join(inputDir, f) with open(fileFullPath,'rb') as f: dataDict = pickle.load(f) print(f"Loaded :{fileFullPath}") if 'imageName' in dataDict.keys(): #If this pkl has only one instance. Made this to hand panoptic output where pkl has multi instances dataDict = {0:dataDict} for jj, k in enumerate(dataDict): if jj%50 !=0: continue data = dataDict[k] # print(data['subjectId']) # continue if 'smpltype' in data: if (data['smpltype'] =='smpl' and bUseSMPLX) or (data['smpltype'] =='smplx' and bUseSMPLX==False): print("SMPL type mismatch error") assert False imgFullPathOri = data['imageName'][0] imgFullPath = os.path.join(imDir, os.path.basename(imgFullPathOri)) data['subjectId'] =0 #TODO debug fileName = "{}_{}".format(data['subjectId'], os.path.basename(imgFullPathOri)[:-4]) if args.bRenderToFiles and os.path.exists(os.path.join(render_dirName, fileName+".jpg")): continue if True: #Additional path checking, if not valid if os.path.exists(imgFullPath) == False: imgFullPath =getpath_level(imDir, imgFullPathOri ,1) if os.path.exists(imgFullPath) == False: imgFullPath =getpath_level(imDir, imgFullPathOri,2) if os.path.exists(imgFullPath) == False: imgFullPath =getpath_level(imDir, imgFullPathOri, 3 ) scale = data['scale'][0] center = data['center'][0] # print(data['annotId']) ours_betas = torch.from_numpy(data['pred_shape']) ours_pose_rotmat = torch.from_numpy(data['pred_pose_rotmat']) # spin_betas = torch.from_numpy(data['opt_beta']) #Compute 2D reprojection error # if not (data['loss_keypoints_2d']<0.0001 or data['loss_keypoints_2d']>0.001 : # continue maxBeta = abs(torch.max( abs(ours_betas)).item()) if data['loss_keypoints_2d']>0.0005 or maxBeta>3: erroneousCnt +=1 print(">>> loss2d: {}, maxBeta: {}".format( data['loss_keypoints_2d'],maxBeta) ) # spin_pose = torch.from_numpy(data['opt_pose']) pred_camera_vis = data['pred_camera'] if os.path.exists(imgFullPath) == False: print(imgFullPath) assert os.path.exists(imgFullPath) rawImg = cv2.imread(imgFullPath) print(imgFullPath) croppedImg, boxScale_o2n, bboxTopLeft = crop_bboxInfo(rawImg, center, scale, (BBOX_IMG_RES, BBOX_IMG_RES) ) #Visualize 2D image if args.bRenderToFiles ==False: viewer2D.ImShow(rawImg, name='rawImg', waitTime=10) #You should press any key viewer2D.ImShow(croppedImg, name='croppedImg', waitTime=10) ours_output = smpl(betas=ours_betas, body_pose=ours_pose_rotmat[:,1:], global_orient=ours_pose_rotmat[:,0].unsqueeze(1), pose2rot=False ) ours_vertices = ours_output.vertices.detach().cpu().numpy() ours_joints_3d = ours_output.joints.detach().cpu().numpy() #Visualize 3D mesh and 3D skeleton in BBox Space if True: b =0 camParam_scale = pred_camera_vis[b,0] camParam_trans = pred_camera_vis[b,1:] ############### Visualize Mesh ############### pred_vert_vis = ours_vertices[b].copy() pred_vert_vis = convert_smpl_to_bbox(pred_vert_vis, camParam_scale, camParam_trans) pred_meshes = {'ver': pred_vert_vis, 'f': smpl.faces} glViewer.setMeshData([pred_meshes], bComputeNormal= True) ################ Visualize Skeletons ############### #Vis pred-SMPL joint pred_joints_vis = ours_joints_3d[b,:,:3].copy() #(N,3) pred_joints_vis = convert_smpl_to_bbox(pred_joints_vis, camParam_scale, camParam_trans) glViewer.setSkeleton( [pred_joints_vis.ravel()[:,np.newaxis]]) ################ Other 3D setup############### glViewer.setBackgroundTexture(croppedImg) glViewer.setWindowSize(croppedImg.shape[1]*args.windowscale, croppedImg.shape[0]*args.windowscale) glViewer.SetOrthoCamera(True) print("Press 'q' in the 3D window to go to the next sample") glViewer.show(0) #Visualize 3D mesh and 3D skeleton on original image space if True: b =0 camParam_scale = pred_camera_vis[b,0] camParam_trans = pred_camera_vis[b,1:] ############### Visualize Mesh ############### pred_vert_vis = ours_vertices[b].copy() pred_vert_vis = convert_smpl_to_bbox(pred_vert_vis, camParam_scale, camParam_trans) #From cropped space to original pred_vert_vis = convert_bbox_to_oriIm(pred_vert_vis, boxScale_o2n, bboxTopLeft, rawImg.shape[1], rawImg.shape[0]) pred_meshes = {'ver': pred_vert_vis, 'f': smpl.faces} glViewer.setMeshData([pred_meshes], bComputeNormal= True) # ################ Visualize Skeletons ############### #Vis pred-SMPL joint pred_joints_vis = ours_joints_3d[b,:,:3].copy() #(N,3) pred_joints_vis = convert_smpl_to_bbox(pred_joints_vis, camParam_scale, camParam_trans) pred_joints_vis = convert_bbox_to_oriIm(pred_joints_vis, boxScale_o2n, bboxTopLeft, rawImg.shape[1], rawImg.shape[0]) glViewer.setSkeleton( [pred_joints_vis.ravel()[:,np.newaxis]]) glViewer.setBackgroundTexture(rawImg) glViewer.setWindowSize(rawImg.shape[1]*args.magnifyFactor, rawImg.shape[0]*args.magnifyFactor) glViewer.SetOrthoCamera(True) print("Press 'q' in the 3D window to go to the next sample") if args.bRenderToFiles: #Export rendered files if os.path.exists(render_dirName) == False: #make a output folder if necessary os.mkdir(render_dirName) # subjId = data['subjectId'][22:24] fileName = "{}_{}".format(data['subjectId'], os.path.basename(imgFullPathOri)[:-4]) # rawImg = cv2.putText(rawImg,data['subjectId'],(100,100), cv2.FONT_HERSHEY_PLAIN, 2, (255,255,0),2) glViewer.render_on_image(render_dirName, fileName, rawImg) print(f"Render to {fileName}")
def visEFT_multiSubjects(inputDir, imDir, smplModelDir, bUseSMPLX = False): if bUseSMPLX: smpl = SMPLX(smplModelDir, batch_size=1, create_transl=False) else: smpl = SMPL(smplModelDir, batch_size=1, create_transl=False) fileList = listdir(inputDir) #Check all fitting files print(">> Found {} files in the fitting folder {}".format(len(fileList), inputDir)) totalCnt =0 erroneousCnt =0 #Merge sample from the same image data_perimage ={} for f in sorted(fileList): if "_init" in f: continue #Load imageName = f[:f.rfind('_')] if imageName not in data_perimage.keys(): data_perimage[imageName] =[] data_perimage[imageName].append(f) for imgName in data_perimage: eftFileNames = data_perimage[imgName] meshData =[] skelData =[] for f in eftFileNames: fileFullPath = join(inputDir, f) with open(fileFullPath,'rb') as f: data = pickle.load(f) imgFullPathOri = data['imageName'][0] imgFullPath = os.path.join(imDir, os.path.basename(imgFullPathOri)) if True: #Additional path checking, if not valid if os.path.exists(imgFullPath) == False: imgFullPath =getpath_level(imDir, imgFullPathOri ,1) if os.path.exists(imgFullPath) == False: imgFullPath =getpath_level(imDir, imgFullPathOri,2) if os.path.exists(imgFullPath) == False: imgFullPath =getpath_level(imDir, imgFullPathOri, 3 ) scale = data['scale'][0] center = data['center'][0] ours_betas = torch.from_numpy(data['pred_shape']) ours_pose_rotmat = torch.from_numpy(data['pred_pose_rotmat']) # spin_betas = torch.from_numpy(data['opt_beta']) #Compute 2D reprojection error # if not (data['loss_keypoints_2d']<0.0001 or data['loss_keypoints_2d']>0.001 : # continue maxBeta = abs(torch.max( abs(ours_betas)).item()) if data['loss_keypoints_2d']>0.0005 or maxBeta>3: erroneousCnt +=1 print(">>> loss2d: {}, maxBeta: {}".format( data['loss_keypoints_2d'],maxBeta) ) # spin_pose = torch.from_numpy(data['opt_pose']) pred_camera_vis = data['pred_camera'] assert os.path.exists(imgFullPath) rawImg = cv2.imread(imgFullPath) print(imgFullPath) croppedImg, boxScale_o2n, bboxTopLeft = crop_bboxInfo(rawImg, center, scale, (constants.IMG_RES, constants.IMG_RES) ) #Visualize 2D image if args.bRenderToFiles ==False: viewer2D.ImShow(rawImg, name='rawImg', waitTime=10) #You should press any key viewer2D.ImShow(croppedImg, name='croppedImg', waitTime=10) if bUseSMPLX: ours_output = smpl(betas=ours_betas, body_pose=ours_pose_rotmat[:,1:-2], global_orient=ours_pose_rotmat[:,0].unsqueeze(1), pose2rot=False ) # ours_output = smpl() #Default test else: ours_output = smpl(betas=ours_betas, body_pose=ours_pose_rotmat[:,1:], global_orient=ours_pose_rotmat[:,0].unsqueeze(1), pose2rot=False ) # ours_output = smpl() #Default test ours_vertices = ours_output.vertices.detach().cpu().numpy() ours_joints_3d = ours_output.joints.detach().cpu().numpy() if False: #Debugging # ours_vertices = ours_vertices - ours_joints_3d[0,12,:] save_mesh_obj(ours_vertices[0], smpl.faces, 'test.obj') #Visualize 3D mesh and 3D skeleton on original image space if True: b =0 camParam_scale = pred_camera_vis[b,0] camParam_trans = pred_camera_vis[b,1:] ############### Visualize Mesh ############### pred_vert_vis = ours_vertices[b].copy() pred_vert_vis = convert_smpl_to_bbox(pred_vert_vis, camParam_scale, camParam_trans) #From cropped space to original pred_vert_vis = convert_bbox_to_oriIm(pred_vert_vis, boxScale_o2n, bboxTopLeft, rawImg.shape[1], rawImg.shape[0]) pred_meshes = {'ver': pred_vert_vis, 'f': smpl.faces} # glViewer.setMeshData([pred_meshes], bComputeNormal= True) # ################ Visualize Skeletons ############### #Vis pred-SMPL joint # pred_joints_vis = ours_joints_3d[b,-9:,:3].copy() #(N,3) #Debuggin pred_joints_vis = ours_joints_3d[b,:,:3].copy() #(N,3) pred_joints_vis = convert_smpl_to_bbox(pred_joints_vis, camParam_scale, camParam_trans) pred_joints_vis = convert_bbox_to_oriIm(pred_joints_vis, boxScale_o2n, bboxTopLeft, rawImg.shape[1], rawImg.shape[0]) meshData.append(pred_meshes) skelData.append(pred_joints_vis.ravel()[:,np.newaxis]) # glViewer.setSkeleton( [pred_joints_vis.ravel()[:,np.newaxis]]) glViewer.setBackgroundTexture(rawImg) glViewer.setWindowSize(rawImg.shape[1]*args.magnifyFactor, rawImg.shape[0]*args.magnifyFactor) glViewer.SetOrthoCamera(True) # print("Press 'q' in the 3D window to go to the next sample") # glViewer.show(0) glViewer.setSkeleton(skelData) glViewer.setMeshData(meshData, bComputeNormal= True) if args.bRenderToFiles: #Export rendered files if os.path.exists(render_dirName) == False: #make a output folder if necessary os.mkdir(render_dirName) fileName = imgFullPathOri[:-4].replace("/","_") glViewer.render_on_image(render_dirName, fileName, rawImg) print(f"render to {fileName}") glViewer.show(args.displaytime)
def run_evaluation(model, dataset_name, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50, bVerbose= True): """Run evaluation on the datasets and metrics we report in the paper. """ print(dataset_name) device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # # Transfer model to the GPU # model.to(device) # Load SMPL model global g_smpl_neutral, g_smpl_male, g_smpl_female if g_smpl_neutral is None: g_smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device) # g_smpl_neutral = SMPLX(config.SMPL_MODEL_DIR, # create_transl=False).to(device) g_smpl_male = SMPL(config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) g_smpl_female = SMPL(config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) smpl_neutral = g_smpl_neutral smpl_male = g_smpl_male smpl_female = g_smpl_female else: smpl_neutral = g_smpl_neutral smpl_male = g_smpl_male smpl_female = g_smpl_female # renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle=False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes # mpjpe = np.zeros(len(dataset)) # recon_err = np.zeros(len(dataset)) quant_mpjpe = {}#np.zeros(len(dataset)) quant_recon_err = {}#np.zeros(len(dataset)) mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2,1)) fp = np.zeros((2,1)) fn = np.zeros((2,1)) parts_tp = np.zeros((7,1)) parts_fp = np.zeros((7,1)) parts_fn = np.zeros((7,1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) eval_pose = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == 'lspet-test' \ or dataset_name == '3dpw' or dataset_name == 'coco2014-val-3d-amt' or dataset_name == 'ochuman-test' \ or dataset_name == '3dpw-vibe' or dataset_name == '3dpw-crop' or dataset_name == '3dpw-headcrop' or dataset_name == 'mpi-inf-3dhp-test': eval_pose = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = config.DATASET_FOLDERS['upi-s1h'] joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp-test' else constants.H36M_TO_J14 joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp-test' else constants.J24_TO_J14 # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch imgName = batch['imgname'][0] seqName = os.path.basename ( os.path.dirname(imgName) ) gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices images = batch['img'].to(device) gender = batch['gender'].to(device) curr_batch_size = images.shape[0] # gt_bbox_scale = batch['scale'].cpu().numpy() # gt_bbox_center = batch['center'].cpu().numpy() with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(images) pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices if save_results: rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1) rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1) pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72) smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy() smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :] = pred_betas.cpu().numpy() smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :] = pred_camera.cpu().numpy() # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1] # For 3DPW get the 14 common joints from the rendered shape else: gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices if seqName=='val2014': gt_vertices_neutral = smpl_neutral(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices = gt_vertices_neutral else: gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0],:].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis # Get 14 predicted joints from the mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) if save_results: pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy() pred_pelvis = pred_keypoints_3d[:, [0],:].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() error_upper = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() # mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) r_error_upper = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) # recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error #Visualize GT vs prediction if False: from renderer import viewer2D from renderer import glViewer import humanModelViewer gt_cam_param = batch['cam_param'].cpu().numpy() pred_cam_param = pred_camera.detach().cpu().numpy() batchNum = gt_pose.shape[0] for i in range(batchNum): curImgVis = deNormalizeBatchImg(images[i].cpu()) viewer2D.ImShow(curImgVis, name='rawIm', scale=1.0) #move mesh to bbox space vert_gt = gt_vertices[i].cpu().numpy() vert_gt = convert_smpl_to_bbox(vert_gt, gt_cam_param[i][0], gt_cam_param[i][1:]) vert_pred = pred_vertices[i].cpu().numpy() vert_pred = convert_smpl_to_bbox(vert_pred, pred_cam_param[i][0], pred_cam_param[i][1:]) smpl_face = humanModelViewer.GetSMPLFace() # meshes_gt = {'ver': gt_vertices[i].cpu().numpy()*100, 'f': smpl_face, 'color': (255,0,0)} # meshes_pred = {'ver': pred_vertices[i].cpu().numpy()*100, 'f': smpl_face, 'color': (0,255,0)} meshes_gt = {'ver': vert_gt, 'f': smpl_face, 'color': (200,50,50)} meshes_pred = {'ver': vert_pred, 'f': smpl_face, 'color': (50,200,50)} glViewer.setMeshData([meshes_gt, meshes_pred], bComputeNormal= True) glViewer.setBackgroundTexture(curImgVis) #Vis raw video as background glViewer.setWindowSize(curImgVis.shape[1]*5, curImgVis.shape[0]*5) glViewer.SetOrthoCamera(True) glViewer.show(0) for ii, p in enumerate(batch['imgname'][:len(r_error)]): seqName = os.path.basename( os.path.dirname(p)) # quant_mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error if seqName not in quant_mpjpe.keys(): quant_mpjpe[seqName] = [] quant_recon_err[seqName] = [] quant_mpjpe[seqName].append(error[ii]) quant_recon_err[seqName].append(r_error[ii]) #Visualize GT mesh and Pred Sekeleton if False: from renderer import viewer2D from renderer import glViewer import humanModelViewer gt_keypoints_3d_vis = gt_keypoints_3d.cpu().numpy() gt_keypoints_3d_vis = np.reshape(gt_keypoints_3d_vis, (gt_keypoints_3d_vis.shape[0],-1)) #N,14x3 gt_keypoints_3d_vis = np.swapaxes(gt_keypoints_3d_vis, 0,1) *100 pred_keypoints_3d_vis = pred_keypoints_3d.cpu().numpy() pred_keypoints_3d_vis = np.reshape(pred_keypoints_3d_vis, (pred_keypoints_3d_vis.shape[0],-1)) #N,14x3 pred_keypoints_3d_vis = np.swapaxes(pred_keypoints_3d_vis, 0,1) *100 # output_sample = output_sample[ : , np.newaxis]*0.1 # gt_sample = gt_sample[: , np.newaxis]*0.1 # (skelNum, dim, frames) for f in range(gt_keypoints_3d_vis.shape[1]): glViewer.setSkeleton( [gt_keypoints_3d_vis[:,[f]], pred_keypoints_3d_vis[:,[f]]] ,jointType='smplcoco')#(skelNum, dim, frames) glViewer.show(0) # Reconstuction_error # quant_recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error list_mpjpe = np.hstack([ quant_mpjpe[k] for k in quant_mpjpe]) list_reconError = np.hstack([ quant_recon_err[k] for k in quant_recon_err]) if bVerbose: print(">>> {} : MPJPE {:.02f} mm, error: {:.02f} mm | Total MPJPE {:.02f} mm, error {:.02f} mm".format(seqName, np.mean(error)*1000, np.mean(r_error)*1000, np.hstack(list_mpjpe).mean()*1000, np.hstack(list_reconError).mean()*1000) ) # print("MPJPE {}, error: {}".format(np.mean(error)*100, np.mean(r_error)*100)) # If mask or part evaluation, render the mask and part images # if eval_masks or eval_parts: # mask, parts = renderer(pred_vertices, pred_camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if bVerbose: if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() # if step==3: #Debug # break # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera) # Print final results during evaluation if bVerbose: print('*** Final Results ***') print() evalLog ={} if eval_pose: # if bVerbose: # print('MPJPE: ' + str(1000 * mpjpe.mean())) # print('Reconstruction Error: ' + str(1000 * recon_err.mean())) # print() list_mpjpe = np.hstack([ quant_mpjpe[k] for k in quant_mpjpe]) list_reconError = np.hstack([ quant_recon_err[k] for k in quant_recon_err]) output_str ='SeqNames; ' for seq in quant_mpjpe: output_str += seq + ';' output_str +='\n MPJPE; ' quant_mpjpe_avg_mm = np.hstack(list_mpjpe).mean()*1000 output_str += "Avg {:.02f} mm; ".format( quant_mpjpe_avg_mm) for seq in quant_mpjpe: output_str += '{:.02f}; '.format(1000 * np.hstack(quant_mpjpe[seq]).mean()) output_str +='\n Recon Error; ' quant_recon_error_avg_mm = np.hstack(list_reconError).mean()*1000 output_str +="Avg {:.02f}mm; ".format( quant_recon_error_avg_mm ) for seq in quant_recon_err: output_str += '{:.02f}; '.format(1000 * np.hstack(quant_recon_err[seq]).mean()) if bVerbose: print(output_str) else: print(">>> Test on 3DPW: MPJPE: {} | quant_recon_error_avg_mm: {}".format(quant_mpjpe_avg_mm, quant_recon_error_avg_mm) ) #Save output to dict # evalLog['checkpoint']= args.checkpoint evalLog['testdb'] = dataset_name evalLog['datasize'] = len(data_loader.dataset) for seq in quant_mpjpe: quant_mpjpe[seq] = 1000 * np.hstack(quant_mpjpe[seq]).mean() for seq in quant_recon_err: quant_recon_err[seq] = 1000 * np.hstack(quant_recon_err[seq]).mean() evalLog['quant_mpjpe'] = quant_mpjpe #MPJPE evalLog['quant_recon_err']= quant_recon_err #PA-MPJPE evalLog['quant_output_logstr']= output_str #PA-MPJPE evalLog['quant_mpjpe_avg_mm'] = quant_mpjpe_avg_mm #MPJPE evalLog['quant_recon_error_avg_mm']= quant_recon_error_avg_mm #PA-MPJPE # return quant_mpjpe_avg_mm, quant_recon_error_avg_mm, evalLog return evalLog if bVerbose: if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() return -1 #Should return something
if False: pred_meshes = {'ver': pred_vert_vis, 'f': smpl.faces} glViewer.setMeshData([pred_meshes], bComputeNormal=True) # ################ Visualize Skeletons ############### #Vis pred-SMPL joint pred_joints_vis = ours_joints_3d[b, :, :3].copy() #(N,3) pred_joints_vis = convert_smpl_to_bbox( pred_joints_vis, camParam_scale, camParam_trans) pred_joints_vis = convert_bbox_to_oriIm( pred_joints_vis, boxScale_o2n, bboxTopLeft, rawImg.shape[1], rawImg.shape[0]) glViewer.setBackgroundTexture(rawImg) glViewer.setWindowSize(rawImg.shape[1], rawImg.shape[0]) glViewer.SetOrthoCamera(True) glViewer.show(1) #Save data if dbName == 'mpii' or dbName == 'lsp': imgnames_.append( os.path.join('images', os.path.basename(imgName))) elif dbName == 'coco' or dbName == 'cocoall': imgnames_.append( os.path.join('train2014', os.path.basename(imgName))) elif dbName == 'coco_semmap': imgnames_.append(os.path.basename(imgName)) #No folder name elif dbName == 'pennaction' or dbName == 'posetrack' or dbName == 'posetrack_train':
def run_withWeakProj(self, init_pose, init_betas, init_cameras, camera_center, keypoints_2d, bDebugVis=False, bboxInfo=None, imagevis=None, ablation_smplify_noCamOptFirst=False, ablation_smplify_noPrior=False): """Perform body fitting. Input: init_pose: SMPL pose estimate init_betas: SMPL betas estimate init_cam_t: Camera translation estimate camera_center: Camera center location keypoints_2d: Keypoints used for the optimization Returns: vertices: Vertices of optimized shape joints: 3D joints of optimized shape pose: SMPL pose parameters of optimized shape betas: SMPL beta parameters of optimized shape camera_translation: Camera translation reprojection_loss: Final joint reprojection loss """ # batch_size = init_pose.shape[0] # Make camera translation a learnable parameter # camera_translation = init_cam_t.clone() # Get joint confidence joints_2d = keypoints_2d[:, :, :2] joints_conf = keypoints_2d[:, :, -1] # Split SMPL pose to body pose and global orientation body_pose = init_pose[:, 3:].detach().clone() global_orient = init_pose[:, :3].detach().clone() betas = init_betas.detach().clone() camera = init_cameras.detach().clone() # Step 1: Optimize camera translation and body orientation # Optimize only camera translation and body orientation body_pose.requires_grad = False betas.requires_grad = False global_orient.requires_grad = True camera.requires_grad = True camera_opt_params = [global_orient, camera] camera_optimizer = torch.optim.Adam(camera_opt_params, lr=self.step_size, betas=(0.9, 0.999)) # joints_2d # for i in range(self.num_iters*10): # g_timer.tic() if ablation_smplify_noCamOptFirst == False: for i in range(self.num_iters): smpl_output = self.smpl(global_orient=global_orient, body_pose=body_pose, betas=betas) model_joints = smpl_output.joints loss = camera_fitting_loss_weakperspective( model_joints, camera, init_cameras, joints_2d, joints_conf) camera_optimizer.zero_grad() loss.backward() camera_optimizer.step() # print(loss) #Render if False: from renderer import glViewer body_pose_all = torch.cat([global_orient, body_pose], dim=-1) #[N,72] smpl_output, smpl_output_bbox = visSMPLoutput_bboxSpace( self.smpl, { "pred_pose": body_pose_all, "pred_shape": betas, "pred_camera": camera }, color=glViewer.g_colorSet['spin'], image=imagevis) glViewer.show(1) #Render if False: root_imgname = os.path.basename( bboxInfo['imgname'])[:-4] renderRoot = f'/home/hjoo/temp/render_eft/smplify_{root_imgname}' imgname = '{:04d}'.format(i) renderSMPLoutput(renderRoot, 'overlaid', 'mesh', imgname=imgname) renderSMPLoutput(renderRoot, 'overlaid', 'skeleton', imgname=imgname) renderSMPLoutput(renderRoot, 'side', 'mesh', imgname=imgname) # g_timer.toc(average =True, bPrint=True,title="Single Camera Optimization") # Fix camera translation after optimizing camera # camera.requires_grad = False # Step 2: Optimize body joints # Optimize only the body pose and global orientation of the body body_pose.requires_grad = True betas.requires_grad = True global_orient.requires_grad = True if ablation_smplify_noCamOptFirst == False: #Original from SPIN camera.requires_grad = False body_opt_params = [body_pose, betas, global_orient] else: #New camera.requires_grad = True body_opt_params = [body_pose, betas, global_orient, camera] # For joints ignored during fitting, set the confidence to 0 joints_conf[:, self.ign_joints] = 0. # g_timer.tic() body_optimizer = torch.optim.Adam(body_opt_params, lr=self.step_size, betas=(0.9, 0.999)) for i in range(self.num_iters): smpl_output = self.smpl(global_orient=global_orient, body_pose=body_pose, betas=betas) g_timer.tic() model_joints = smpl_output.joints # loss = body_fitting_loss(body_pose, betas, model_joints, camera, camera_center, # joints_2d, joints_conf, self.pose_prior, # focal_length=self.focal_length) if ablation_smplify_noPrior: # print('ablation_smplify_noPrior') loss, reprojection_loss = body_fitting_loss_weakperspective( body_pose, betas, model_joints, camera, joints_2d, joints_conf, self.pose_prior, angle_prior_weight=0 ) #,pose_prior_weight=0) # pose_prior_weight=0)#, angle_prior_weight=0) else: loss, reprojection_loss = body_fitting_loss_weakperspective( body_pose, betas, model_joints, camera, joints_2d, joints_conf, self.pose_prior) #Stop with sufficiently small pixel error if reprojection_loss.mean() < 2.0: break body_optimizer.zero_grad() loss.backward() body_optimizer.step() g_timer.toc(average=False, bPrint=True, title="SMPLify iter") if bDebugVis: from renderer import glViewer body_pose_all = torch.cat([global_orient, body_pose], dim=-1) #[N,72] smpl_output, smpl_output_bbox = visSMPLoutput_bboxSpace( self.smpl, { "pred_pose": body_pose_all, "pred_shape": betas, "pred_camera": camera }, waittime=1, color=glViewer.g_colorSet['spin'], image=imagevis) # #Render if False: root_imgname = os.path.basename(bboxInfo['imgname'])[:-4] renderRoot = f'/home/hjoo/temp/render_eft/smplify_{root_imgname}' imgname = '{:04d}'.format(i + self.num_iters) renderSMPLoutput(renderRoot, 'overlaid', 'mesh', imgname=imgname) renderSMPLoutput(renderRoot, 'overlaid', 'skeleton', imgname=imgname) renderSMPLoutput(renderRoot, 'side', 'mesh', imgname=imgname) # g_timer.toc(average =True, bPrint=True,title="Whole body optimization") # Get final loss value with torch.no_grad(): smpl_output = self.smpl(global_orient=global_orient, body_pose=body_pose, betas=betas, return_full_pose=True) model_joints = smpl_output.joints # reprojection_loss = body_fitting_loss(body_pose, betas, model_joints, camera_translation, camera_center, # joints_2d, joints_conf, self.pose_prior, # focal_length=self.focal_length, # output='reprojection') if ablation_smplify_noPrior: reprojection_loss = body_fitting_loss_weakperspective( body_pose, betas, model_joints, camera, joints_2d, joints_conf, self.pose_prior, angle_prior_weight= 0, #pose_prior_weight=0, # pose_prior_weight=0 angle_prior_weight=0, output='reprojection') else: #Original reprojection_loss = body_fitting_loss_weakperspective( body_pose, betas, model_joints, camera, joints_2d, joints_conf, self.pose_prior, output='reprojection') vertices = smpl_output.vertices.detach() joints = smpl_output.joints.detach() pose = torch.cat([global_orient, body_pose], dim=-1).detach() betas = betas.detach() if bDebugVis: from renderer import glViewer body_pose_all = torch.cat([global_orient, body_pose], dim=-1) #[N,72] visSMPLoutput_bboxSpace(self.smpl, { "pred_pose": body_pose_all, "pred_shape": betas, "pred_camera": camera }, color=glViewer.g_colorSet['spin']) if False: glViewer.show() elif False: #Render to Files in original image space bboxCenter = bboxInfo['bboxCenter'] bboxScale = bboxInfo['bboxScale'] imgname = bboxInfo['imgname'] #Get Skeletons import cv2 img_original = cv2.imread(imgname) # viewer2D.ImShow(img_original, waitTime=0) imgShape = img_original.shape[:2] smpl_output, smpl_output_bbox, smpl_output_imgspace = getSMPLoutput_imgSpace( self.smpl, { "pred_pose": body_pose_all, "pred_shape": betas, "pred_camera": camera }, bboxCenter, bboxScale, imgShape) glViewer.setBackgroundTexture( img_original) #Vis raw video as background glViewer.setWindowSize(img_original.shape[1] * 2, img_original.shape[0] * 2) #Vis raw video as background smpl_output_imgspace['body_mesh'][ 'color'] = glViewer.g_colorSet['spin'] glViewer.setMeshData( [smpl_output_imgspace['body_mesh']], bComputeNormal=True) #Vis raw video as background glViewer.setSkeleton([]) imgname = os.path.basename(imgname)[:-4] fileName = "smplify_{0}_{1:04d}".format(imgname, 0) # rawImg = cv2.putText(rawImg,data['subjectId'],(100,100), cv2.FONT_HERSHEY_PLAIN, 2, (255,255,0),2) glViewer.render_on_image('/home/hjoo/temp/render_eft', fileName, img_original, scaleFactor=2) glViewer.show() else: root_imgname = os.path.basename(bboxInfo['imgname'])[:-4] # renderRoot=f'/home/hjoo/temp/render_eft/smplify_{root_imgname}' renderRoot = f'/home/hjoo/temp/render_rebuttal/smplify_{root_imgname}' imgname = '{:04d}'.format(i + self.num_iters) renderSMPLoutput(renderRoot, 'overlaid', 'mesh', imgname=imgname) renderSMPLoutput(renderRoot, 'overlaid', 'skeleton', imgname=imgname) renderSMPLoutput(renderRoot, 'side', 'mesh', imgname=imgname) renderSMPLoutput_merge(renderRoot) return vertices, joints, pose, betas, camera, reprojection_loss
pred_meshes = {'ver': pred_vert_vis, 'f': smpl.faces} # pred_vert_vis = spin_vertices[b] # pred_vert_vis *=pred_camera_vis_init[b,0] # pred_vert_vis[:,0] += pred_camera_vis_init[b,1] #no need +1 (or 112). Rendernig has this offset already # pred_vert_vis[:,1] += pred_camera_vis_init[b,2] #no need +1 (or 112). Rendernig has this offset already # pred_vert_vis*=112 # spin_meshes = {'ver': pred_vert_vis, 'f': smpl.faces} glViewer.setMeshData([pred_meshes], bComputeNormal=True) # glViewer.setMeshData([pred_meshes, spin_meshes], bComputeNormal= True) # glViewer.setMeshData([pred_meshes], bComputeNormal= True) # glViewer.SetMeshColor('red') glViewer.setBackgroundTexture(croppedImg) glViewer.setWindowSize(croppedImg.shape[1], croppedImg.shape[0]) glViewer.SetOrthoCamera(True) glViewer.show(0) #Save data if dbName == 'mpii' or dbName == 'lsp': imgnames_.append( os.path.join('images', os.path.basename(imgName))) elif dbName == 'coco': imgnames_.append( os.path.join('train2014', os.path.basename(imgName))) elif dbName == 'coco_semmap': imgnames_.append(os.path.basename(imgName)) #No folder name elif dbName == 'pennaction' or dbName == 'posetrack' or dbName == 'posetrack_train':
def train_exemplar_step(self, input_batch): self.model.train() if self.options.bExemplarMode: self.exemplerTrainingMode() # Get data from the batch images = input_batch['img'] # input image gt_keypoints_2d = input_batch[ 'keypoints'] # 2D keypoints #[N,49,3] gt_pose = input_batch[ 'pose'] # SMPL pose parameters #[N,72] gt_betas = input_batch[ 'betas'] # SMPL beta parameters #[N,10] gt_joints = input_batch[ 'pose_3d'] # 3D pose #[N,24,4] has_smpl = input_batch['has_smpl'].byte( ) == 1 # flag that indicates whether SMPL parameters are valid has_pose_3d = input_batch['has_pose_3d'].byte( ) == 1 # flag that indicates whether 3D pose is valid is_flipped = input_batch[ 'is_flipped'] # flag that indicates whether image was flipped during data augmentation rot_angle = input_batch[ 'rot_angle'] # rotation angle used for data augmentation dataset_name = input_batch[ 'dataset_name'] # name of the dataset the image comes from indices = input_batch[ 'sample_index'] # index of example inside its dataset batch_size = images.shape[0] # Get GT vertices and model joints # Note that gt_model_joints is different from gt_joints as it comes from SMPL gt_out = self.smpl(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]) gt_model_joints = gt_out.joints #[N, 49, 3] #Note this is different from gt_joints! gt_vertices = gt_out.vertices # Get current best fits from the dictionary opt_pose, opt_betas, opt_validity = self.fits_dict[(dataset_name, indices.cpu(), rot_angle.cpu(), is_flipped.cpu())] opt_pose = opt_pose.to(self.device) opt_betas = opt_betas.to(self.device) opt_output = self.smpl(betas=opt_betas, body_pose=opt_pose[:, 3:], global_orient=opt_pose[:, :3]) opt_vertices = opt_output.vertices # opt_joints = opt_output.joints opt_joints = opt_output.joints.detach() #for smpl-x #assuer that non valid opt has GT values if len(has_smpl[opt_validity == 0]) > 0: assert min(has_smpl[opt_validity == 0]) #All should be True # else: #Assue 3D DB! # opt_pose = gt_pose # opt_betas = gt_betas # opt_vertices = gt_vertices # opt_joints = gt_model_joints # De-normalize 2D keypoints from [-1,1] to pixel space gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * self.options.img_res * ( gt_keypoints_2d_orig[:, :, :-1] + 1) #49: (25+24) x 3 # Estimate camera translation given the model joints and 2D keypoints # by minimizing a weighted least squares loss # gt_cam_t = estimate_translation(gt_model_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) opt_cam_t = estimate_translation(opt_joints, gt_keypoints_2d_orig, focal_length=self.focal_length, img_size=self.options.img_res) opt_joint2d_loss = self.smplify.get_fitting_loss( opt_pose, opt_betas, opt_cam_t, #opt_pose (N,72) (N,10) opt_cam_t: (N,3) 0.5 * self.options.img_res * torch.ones(batch_size, 2, device=self.device), #(N,2) (112, 112) gt_keypoints_2d_orig).mean(dim=-1) # Feed images in the network to predict camera and SMPL parameters pred_rotmat, pred_betas, pred_camera = self.model(images) pred_output = self.smpl(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices pred_joints_3d = pred_output.joints # # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size # # This camera translation can be used in a full perspective projection # pred_cam_t = torch.stack([pred_camera[:,1], # pred_camera[:,2], # 2*self.focal_length/(self.options.img_res * pred_camera[:,0] +1e-9)],dim=-1) # camera_center = torch.zeros(batch_size, 2, device=self.device) # pred_keypoints_2d = perspective_projection(pred_joints_3d, # rotation=torch.eye(3, device=self.device).unsqueeze(0).expand(batch_size, -1, -1), # translation=pred_cam_t, # focal_length=self.focal_length, # camera_center=camera_center) # # Normalize keypoints to [-1,1] # pred_keypoints_2d = pred_keypoints_2d / (self.options.img_res / 2.) ### Centering Vertex..model... more complicated! if True: pred_pelvis = (pred_joints_3d[:, 27:28, :3] + pred_joints_3d[:, 28:29, :3]) / 2 pred_joints_3d[:, :, : 3] = pred_joints_3d[:, :, : 3] - pred_pelvis #centering pred_vertices = pred_vertices - pred_pelvis gt_pelvis = (gt_joints[:, 2:3, :3] + gt_joints[:, 2:3, :3]) / 2 gt_joints[:, :, :3] = gt_joints[:, :, :3] - gt_pelvis #centering gt_model_pelvis = (gt_model_joints[:, 27:28, :3] + gt_model_joints[:, 28:29, :3]) / 2 gt_model_joints[:, :, : 3] = gt_model_joints[:, :, : 3] - gt_model_pelvis #centering gt_vertices = gt_vertices - gt_model_pelvis # Replace the optimized parameters with the ground truth parameters, if available opt_vertices[has_smpl, :, :] = gt_vertices[has_smpl, :, :] # opt_cam_t[has_smpl, :] = gt_cam_t[has_smpl, :] opt_joints[has_smpl, :, :] = gt_model_joints[has_smpl, :, :] opt_pose[has_smpl, :] = gt_pose[has_smpl, :] opt_betas[has_smpl, :] = gt_betas[has_smpl, :] #Weak projection pred_keypoints_2d = weakProjection_gpu(pred_joints_3d, pred_camera[:, 0], pred_camera[:, 1:]) #N, 49, 2 # Assert whether a fit is valid by comparing the joint loss with the threshold valid_fit = (opt_joint2d_loss < self.options.smplify_threshold).to( self.device) # Add the examples with GT parameters to the list of valid fits valid_fit = valid_fit | has_smpl # opt_keypoints_2d = perspective_projection(opt_joints, # rotation=torch.eye(3, device=self.device).unsqueeze(0).expand(batch_size, -1, -1), # translation=opt_cam_t, # focal_length=self.focal_length, # camera_center=camera_center) # opt_keypoints_2d = opt_keypoints_2d / (self.options.img_res / 2.) # Compute loss on SMPL parameters loss_regr_pose, loss_regr_betas = self.smpl_losses( pred_rotmat, pred_betas, opt_pose, opt_betas, valid_fit) # loss_regr_pose, loss_regr_betas = self.smpl_losses(pred_rotmat, pred_betas, gt_pose, gt_betas, valid_fit) # Compute 2D reprojection loss for the keypoints loss_keypoints_2d = self.keypoint2d_loss( pred_keypoints_2d, gt_keypoints_2d, self.options.openpose_train_weight, self.options.gt_train_weight) # Compute 3D keypoint loss # loss_keypoints_3d = self.keypoint_3d_loss(pred_joints_3d, gt_joints, has_pose_3d) loss_keypoints_3d = self.keypoint_3d_loss_modelSkel( pred_joints_3d, gt_model_joints[:, 25:, :], has_pose_3d) # Per-vertex loss for the shape loss_shape = self.shape_loss(pred_vertices, opt_vertices, valid_fit) # loss_shape = self.shape_loss(pred_vertices, gt_vertices, valid_fit) # Compute total loss # The last component is a loss that forces the network to predict positive depth values loss = self.options.shape_loss_weight * loss_shape +\ self.options.keypoint_loss_weight * loss_keypoints_2d +\ self.options.keypoint_loss_weight * loss_keypoints_3d +\ loss_regr_pose + self.options.beta_loss_weight * loss_regr_betas +\ ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean() #NOte!! Loss is defined below again! if True: #Exemplar Loss. 2D only + Keep the original shape + camera regularization #At this moment just use opt_betas. Ideally, we can use the beta estimated from direct result betaMax = abs(torch.max(abs(opt_betas)).item()) pred_betasMax = abs(torch.max(abs(pred_betas)).item()) # print(pred_betasMax) if False: #betaMax<2.0: loss_regr_betas_noReject = self.criterion_regr( pred_betas, opt_betas) else: loss_regr_betas_noReject = torch.mean(pred_betas**2) #Prevent bending knee? # red_rotmat[0,6,:,:] - loss = self.options.keypoint_loss_weight * loss_keypoints_2d + \ self.options.beta_loss_weight * loss_regr_betas_noReject + \ ((torch.exp(-pred_camera[:,0]*10)) ** 2 ).mean() # print(loss_regr_betas) loss *= 60 # print("loss2D: {}, loss3D: {}".format( self.options.keypoint_loss_weight * loss_keypoints_2d,self.options.keypoint_loss_weight * loss_keypoints_3d ) ) # Do backprop self.optimizer.zero_grad() loss.backward() self.optimizer.step() # Pack output arguments for tensorboard logging output = { 'pred_vertices': 0, #pred_vertices.detach(), 'opt_vertices': 0, 'pred_cam_t': 0, #pred_cam_t.detach(), 'opt_cam_t': 0 } #Save result output = {} output['pred_pose_rotmat'] = pred_rotmat.detach().cpu().numpy() output['pred_shape'] = pred_betas.detach().cpu().numpy() output['pred_camera'] = pred_camera.detach().cpu().numpy() output['opt_pose'] = opt_pose.detach().cpu().numpy() output['opt_beta'] = opt_betas.detach().cpu().numpy() output['sampleIdx'] = input_batch['sample_index'].detach().cpu().numpy( ) #To use loader directly output['imageName'] = input_batch['imgname'] output['scale'] = input_batch['scale'].detach().cpu().numpy() output['center'] = input_batch['center'].detach().cpu().numpy() #To save new db file output['keypoint2d'] = input_batch['keypoints_original'].detach().cpu( ).numpy() losses = { 'loss': loss.detach().item(), 'loss_keypoints': loss_keypoints_2d.detach().item(), 'loss_keypoints_3d': loss_keypoints_3d.detach().item(), 'loss_regr_pose': loss_regr_pose.detach().item(), 'loss_regr_betas': loss_regr_betas.detach().item(), 'loss_shape': loss_shape.detach().item() } if self.options.bDebug_visEFT: #g_debugVisualize: #Debug Visualize input for b in range(batch_size): #denormalizeImg curImgVis = images[b] #3,224,224 curImgVis = self.de_normalize_img(curImgVis).cpu().numpy() curImgVis = np.transpose(curImgVis, (1, 2, 0)) * 255.0 curImgVis = curImgVis[:, :, [2, 1, 0]] #Denormalize image curImgVis = np.ascontiguousarray(curImgVis, dtype=np.uint8) originalImgVis = curImgVis.copy() viewer2D.ImShow(curImgVis, name='rawIm') curImgVis = viewer2D.Vis_Skeleton_2D_general( gt_keypoints_2d_orig[b, :, :2].cpu().numpy(), gt_keypoints_2d_orig[b, :, 2], bVis=False, image=curImgVis) pred_keypoints_2d_vis = pred_keypoints_2d[ b, :, :2].detach().cpu().numpy() pred_keypoints_2d_vis = 0.5 * self.options.img_res * ( pred_keypoints_2d_vis + 1) #49: (25+24) x 3 if glViewer.g_bShowSkeleton: curImgVis = viewer2D.Vis_Skeleton_2D_general( pred_keypoints_2d_vis, bVis=False, image=curImgVis) viewer2D.ImShow(curImgVis, scale=2.0, waitTime=1) #Vis Opt-SMPL joint if False: smpl_jointsVis = opt_joints[ b, :, :3].cpu().numpy() * 100 #[N,49,3] smpl_jointsVis = smpl_jointsVis.ravel()[:, np.newaxis] glViewer.setSkeleton([smpl_jointsVis]) # glViewer.show() #Vis Opt-Vertex meshVertVis = opt_vertices[b].cpu().numpy() * 100 meshes = {'ver': meshVertVis, 'f': self.smpl.faces} glViewer.setMeshData([meshes], bComputeNormal=True) #Get camera pred_params pred_camera_vis = pred_camera.detach().cpu().numpy() ############### Visualize Mesh ############### pred_vert_vis = pred_vertices[b].detach().cpu().numpy() # meshVertVis = gt_vertices[b].detach().cpu().numpy() # meshVertVis = meshVertVis-pelvis #centering pred_vert_vis *= pred_camera_vis[b, 0] pred_vert_vis[:, 0] += pred_camera_vis[ b, 1] #no need +1 (or 112). Rendernig has this offset already pred_vert_vis[:, 1] += pred_camera_vis[ b, 2] #no need +1 (or 112). Rendernig has this offset already pred_vert_vis *= 112 pred_meshes = {'ver': pred_vert_vis, 'f': self.smpl.faces} if has_smpl[b] == False: opt_vertices[has_smpl, :, :] = gt_vertices[has_smpl, :, :] opt_joints[has_smpl, :, :] = gt_model_joints[ has_smpl, :, :] opt_model_pelvis = (opt_joints[:, 27:28, :3] + opt_joints[:, 28:29, :3]) / 2 opt_vertices = opt_vertices - opt_model_pelvis opt_vert_vis = opt_vertices[b].detach().cpu().numpy() opt_vert_vis *= pred_camera_vis[b, 0] opt_vert_vis[:, 0] += pred_camera_vis[ b, 1] #no need +1 (or 112). Rendernig has this offset already opt_vert_vis[:, 1] += pred_camera_vis[ b, 2] #no need +1 (or 112). Rendernig has this offset already opt_vert_vis *= 112 opt_meshes = {'ver': opt_vert_vis, 'f': self.smpl.faces} # glViewer.setMeshData([pred_meshes, opt_meshes], bComputeNormal= True) # glViewer.setMeshData([pred_meshes], bComputeNormal= True) glViewer.setMeshData([pred_meshes, opt_meshes], bComputeNormal=True) ############### Visualize Skeletons ############### #Vis pred-SMPL joint pred_joints_vis = pred_joints_3d[ b, :, :3].detach().cpu().numpy() #[N,49,3] pred_joints_vis = pred_joints_vis.ravel()[:, np.newaxis] #Weak-perspective projection pred_joints_vis *= pred_camera_vis[b, 0] pred_joints_vis[::3] += pred_camera_vis[b, 1] pred_joints_vis[1::3] += pred_camera_vis[b, 2] pred_joints_vis *= 112 #112 == 0.5*224 glViewer.setSkeleton([pred_joints_vis]) #Vis SMPL's Skeleton # gt_smpljointsVis = gt_model_joints[b,:,:3].cpu().numpy() #[N,49,3] # # gt_pelvis = (gt_smpljointsVis[ 25+2,:] + gt_smpljointsVis[ 25+3,:]) / 2 # # gt_smpljointsVis = gt_smpljointsVis- gt_pelvis # gt_smpljointsVis = gt_smpljointsVis.ravel()[:,np.newaxis] # gt_smpljointsVis*=pred_camera_vis[b,0] # gt_smpljointsVis[::3] += pred_camera_vis[b,1] # gt_smpljointsVis[1::3] += pred_camera_vis[b,2] # gt_smpljointsVis*=112 # glViewer.addSkeleton( [gt_smpljointsVis]) # #Vis GT joint (not model (SMPL) joint!!) # if has_pose_3d[b]: # gt_jointsVis = gt_model_joints[b,:,:3].cpu().numpy() #[N,49,3] # # gt_jointsVis = gt_joints[b,:,:3].cpu().numpy() #[N,49,3] # # gt_pelvis = (gt_jointsVis[ 25+2,:] + gt_jointsVis[ 25+3,:]) / 2 # # gt_jointsVis = gt_jointsVis- gt_pelvis # gt_jointsVis = gt_jointsVis.ravel()[:,np.newaxis] # gt_jointsVis*=pred_camera_vis[b,0] # gt_jointsVis[::3] += pred_camera_vis[b,1] # gt_jointsVis[1::3] += pred_camera_vis[b,2] # gt_jointsVis*=112 # glViewer.addSkeleton( [gt_jointsVis]) # # glViewer.show() glViewer.setBackgroundTexture(originalImgVis) glViewer.setWindowSize(curImgVis.shape[1], curImgVis.shape[0]) glViewer.SetOrthoCamera(True) glViewer.show(1) # continue return output, losses