def run_evaluation(model, opt, options, dataset_name, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Create SMPL model smpl = SMPL().to(device) if dataset_name == '3dpw' or dataset_name == 'surreal': smpl_male = SMPL(cfg.MALE_SMPL_FILE).to(device) smpl_female = SMPL(cfg.FEMALE_SMPL_FILE).to(device) batch_size = opt.batch_size # Create dataloader for the dataset if dataset_name == 'surreal': dataset = SurrealDataset(options, use_augmentation=False, is_train=False, use_IUV=False) else: dataset = BaseDataset(options, dataset_name, use_augmentation=False, is_train=False, use_IUV=False) data_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.num_workers), pin_memory=True) print('data loader finish') # Transfer model to the GPU device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.eval() # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) mpjpe_pa = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2, 1)) fp = np.zeros((2, 1)) fn = np.zeros((2, 1)) parts_tp = np.zeros((7, 1)) parts_fp = np.zeros((7, 1)) parts_fn = np.zeros((7, 1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 eval_pose = False eval_shape = False eval_masks = False eval_parts = False joint_mapper = cfg.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else cfg.J24_TO_J14 # Choose appropriate evaluation for each dataset if 'h36m' in dataset_name or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp': eval_pose = True elif dataset_name in ['up-3d', 'surreal']: eval_shape = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = cfg.DATASET_FOLDERS['upi-s1h'] if eval_parts or eval_masks: from utils.part_utils import PartRenderer renderer = PartRenderer() # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl(gt_pose, gt_betas) images = batch['img'].to(device) curr_batch_size = images.shape[0] # Run inference with torch.no_grad(): out_dict = model(images) pred_vertices = out_dict['pred_vertices'] camera = out_dict['camera'] # 3D pose evaluation if eval_pose: # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper, :-1] gt_pelvis = (gt_keypoints_3d[:, [2]] + gt_keypoints_3d[:, [3]]) / 2 gt_keypoints_3d = gt_keypoints_3d - gt_pelvis else: gender = batch['gender'].to(device) gt_vertices = smpl_male(gt_pose, gt_betas) gt_vertices_female = smpl_female(gt_pose, gt_betas) gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :] gt_keypoints_3d = smpl.get_train_joints(gt_vertices)[:, joint_mapper] # gt_keypoints_3d = smpl.get_lsp_joints(gt_vertices) # joints_regressor used in cmr gt_pelvis = (gt_keypoints_3d[:, [2]] + gt_keypoints_3d[:, [3]]) / 2 gt_keypoints_3d = gt_keypoints_3d - gt_pelvis # Get 14 predicted joints from the non-parametic mesh pred_keypoints_3d = smpl.get_train_joints(pred_vertices)[:, joint_mapper] # pred_keypoints_3d = smpl.get_lsp_joints(pred_vertices) # joints_regressor used in cmr pred_pelvis = (pred_keypoints_3d[:, [2]] + pred_keypoints_3d[:, [3]]) / 2 pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) mpjpe_pa[step * batch_size:step * batch_size + curr_batch_size] = r_error # Shape evaluation (Mean per-vertex error) if eval_shape: if dataset_name == 'surreal': gender = batch['gender'].to(device) gt_vertices = smpl_male(gt_pose, gt_betas) gt_vertices_female = smpl_female(gt_pose, gt_betas) gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :] gt_pelvis_mesh = smpl.get_eval_joints(gt_vertices) pred_pelvis_mesh = smpl.get_eval_joints(pred_vertices) gt_pelvis_mesh = (gt_pelvis_mesh[:, [2]] + gt_pelvis_mesh[:, [3]]) / 2 pred_pelvis_mesh = (pred_pelvis_mesh[:, [2]] + pred_pelvis_mesh[:, [3]]) / 2 # se = torch.sqrt(((pred_vertices - gt_vertices) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() se = torch.sqrt(((pred_vertices - pred_pelvis_mesh - gt_vertices + gt_pelvis_mesh) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() shape_err[step * batch_size:step * batch_size + curr_batch_size] = se # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('MPJPE-PA: ' + str(1000 * mpjpe_pa[:step * batch_size].mean())) print() if eval_shape: print('Shape Error: ' + str(1000 * shape_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() # Print final results during evaluation print('*** Final Results ***') print() if eval_pose: print('MPJPE: ' + str(1000 * mpjpe.mean())) print('MPJPE-PA: ' + str(1000 * mpjpe_pa.mean())) print() if eval_shape: print('Shape Error: ' + str(1000 * shape_err.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() # Save final results to .txt file txt_name = join(opt.save_root, dataset_name + '.txt') f = open(txt_name, 'w') f.write('*** Final Results ***') f.write('\n') if eval_pose: f.write('MPJPE: ' + str(1000 * mpjpe.mean())) f.write('\n') f.write('MPJPE-PA: ' + str(1000 * mpjpe_pa.mean())) f.write('\n') if eval_shape: f.write('Shape Error: ' + str(1000 * shape_err.mean())) f.write('\n') if eval_masks: f.write('Accuracy: ' + str(accuracy / pixel_count)) f.write('\n') f.write('F1: ' + str(f1.mean())) f.write('\n') if eval_parts: f.write('Parts Accuracy: ' + str(parts_accuracy / parts_pixel_count)) f.write('\n') f.write('Parts F1 (BG): ' + str(parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean())) f.write('\n')
def run_evaluation(model, dataset_name, dataset, mesh, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ renderer = PartRenderer() # Create SMPL model smpl = SMPL().cuda() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(cfg.JOINT_REGRESSOR_H36M)).float() # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Transfer model to the GPU device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.eval() # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2, 1)) fp = np.zeros((2, 1)) fn = np.zeros((2, 1)) parts_tp = np.zeros((7, 1)) parts_fp = np.zeros((7, 1)) parts_fn = np.zeros((7, 1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 eval_pose = False eval_shape = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2': eval_pose = True elif dataset_name == 'up-3d': eval_shape = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = cfg.DATASET_FOLDERS['upi-s1h'] # Iterate over the entire dataset for step, batch in enumerate( tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl(gt_pose, gt_betas) images = batch['img'].to(device) curr_batch_size = images.shape[0] # Run inference with torch.no_grad(): pred_vertices, pred_vertices_smpl, camera, pred_rotmat, pred_betas = model( images) # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand( pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, cfg.J24_TO_J14, :-1] # Get 14 predicted joints from the non-parametic mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) pred_pelvis = pred_keypoints_3d[:, [0], :].clone() pred_keypoints_3d = pred_keypoints_3d[:, cfg.H36M_TO_J14, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Get 14 predicted joints from the SMPL mesh pred_keypoints_3d_smpl = torch.matmul(J_regressor_batch, pred_vertices_smpl) pred_pelvis_smpl = pred_keypoints_3d_smpl[:, [0], :].clone() pred_keypoints_3d_smpl = pred_keypoints_3d_smpl[:, cfg.H36M_TO_J14, :] pred_keypoints_3d_smpl = pred_keypoints_3d_smpl - pred_pelvis_smpl # Compute error metrics # Absolute error (MPJPE) error = torch.sqrt( ((pred_keypoints_3d - gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() error_smpl = torch.sqrt( ((pred_keypoints_3d_smpl - gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error mpjpe_smpl[step * batch_size:step * batch_size + curr_batch_size] = error_smpl # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) r_error_smpl = reconstruction_error( pred_keypoints_3d_smpl.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error recon_err_smpl[step * batch_size:step * batch_size + curr_batch_size] = r_error_smpl # Shape evaluation (Mean per-vertex error) if eval_shape: se = torch.sqrt( ((pred_vertices - gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() se_smpl = torch.sqrt( ((pred_vertices_smpl - gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() shape_err[step * batch_size:step * batch_size + curr_batch_size] = se shape_err_smpl[step * batch_size:step * batch_size + curr_batch_size] = se_smpl # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread( os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread( os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE (NonParam): ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error (NonParam): ' + str(1000 * recon_err[:step * batch_size].mean())) print('MPJPE (Param): ' + str(1000 * mpjpe_smpl[:step * batch_size].mean())) print('Reconstruction Error (Param): ' + str(1000 * recon_err_smpl[:step * batch_size].mean())) print() if eval_shape: print('Shape Error (NonParam): ' + str(1000 * shape_err[:step * batch_size].mean())) print('Shape Error (Param): ' + str(1000 * shape_err_smpl[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() # Print final results during evaluation print('*** Final Results ***') print() if eval_pose: print('MPJPE (NonParam): ' + str(1000 * mpjpe.mean())) print('Reconstruction Error (NonParam): ' + str(1000 * recon_err.mean())) print('MPJPE (Param): ' + str(1000 * mpjpe_smpl.mean())) print('Reconstruction Error (Param): ' + str(1000 * recon_err_smpl.mean())) print() if eval_shape: print('Shape Error (NonParam): ' + str(1000 * shape_err.mean())) print('Shape Error (Param): ' + str(1000 * shape_err_smpl.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print()
def run_evaluation(model, dataset_name, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl_male = SMPL(config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl_female = SMPL(config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle=False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2,1)) fp = np.zeros((2,1)) fn = np.zeros((2,1)) parts_tp = np.zeros((7,1)) parts_fp = np.zeros((7,1)) parts_fn = np.zeros((7,1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) eval_pose = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp': eval_pose = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = config.DATASET_FOLDERS['upi-s1h'] joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14 joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14 # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices images = batch['img'].to(device) gender = batch['gender'].to(device) curr_batch_size = images.shape[0] with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(images) pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices if save_results: rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1) rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1) pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72) smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy() smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :] = pred_betas.cpu().numpy() smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :] = pred_camera.cpu().numpy() # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1] # For 3DPW get the 14 common joints from the rendered shape else: gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0],:].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis # Get 14 predicted joints from the mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) if save_results: pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy() pred_pelvis = pred_keypoints_3d[:, [0],:].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, pred_camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera) # Print final results during evaluation print('*** Final Results ***') print() if eval_pose: print('MPJPE: ' + str(1000 * mpjpe.mean())) print('Reconstruction Error: ' + str(1000 * recon_err.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print()
def run_evaluation(model, dataset_name, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50, bVerbose= True): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # # Transfer model to the GPU # model.to(device) # Load SMPL model global g_smpl_neutral, g_smpl_male, g_smpl_female if g_smpl_neutral is None: g_smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device) g_smpl_male = SMPL(config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) g_smpl_female = SMPL(config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) smpl_neutral = g_smpl_neutral smpl_male = g_smpl_male smpl_female = g_smpl_female else: smpl_neutral = g_smpl_neutral smpl_male = g_smpl_male smpl_female = g_smpl_female # renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle=False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes # mpjpe = np.zeros(len(dataset)) # recon_err = np.zeros(len(dataset)) quant_mpjpe = {}#np.zeros(len(dataset)) quant_recon_err = {}#np.zeros(len(dataset)) mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2,1)) fp = np.zeros((2,1)) fn = np.zeros((2,1)) parts_tp = np.zeros((7,1)) parts_fp = np.zeros((7,1)) parts_fn = np.zeros((7,1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 # Store SMPL parameters output_pred_pose = np.zeros((len(dataset), 72)) output_pred_betas = np.zeros((len(dataset), 10)) output_pred_camera = np.zeros((len(dataset), 3)) output_pred_joints = np.zeros((len(dataset), 14, 3)) output_gt_pose = np.zeros((len(dataset), 72)) output_gt_betas = np.zeros((len(dataset), 10)) output_gt_joints = np.zeros((len(dataset), 14, 3)) output_error_MPJPE = np.zeros((len(dataset))) output_error_recon = np.zeros((len(dataset))) output_imgNames =[] output_cropScale = np.zeros((len(dataset))) output_cropCenter = np.zeros((len(dataset), 2)) outputStartPointer = 0 eval_pose = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp': eval_pose = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = config.DATASET_FOLDERS['upi-s1h'] joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14 joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14 # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch imgName = batch['imgname'][0] seqName = os.path.basename ( os.path.dirname(imgName) ) gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices images = batch['img'].to(device) gender = batch['gender'].to(device) curr_batch_size = images.shape[0] with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(images) pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1] # For 3DPW get the 14 common joints from the rendered shape else: gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0],:].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis if False: from renderer import viewer2D from renderer import glViewer import humanModelViewer batchNum = gt_pose.shape[0] for i in range(batchNum): smpl_face = humanModelViewer.GetSMPLFace() meshes_gt = {'ver': gt_vertices[i].cpu().numpy()*100, 'f': smpl_face} meshes_pred = {'ver': pred_vertices[i].cpu().numpy()*100, 'f': smpl_face} glViewer.setMeshData([meshes_gt, meshes_pred], bComputeNormal= True) glViewer.show(5) # Get 14 predicted joints from the mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) # if save_results: # pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy() pred_pelvis = pred_keypoints_3d[:, [0],:].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis #Visualize GT mesh and SPIN output mesh if False: from renderer import viewer2D from renderer import glViewer import humanModelViewer gt_keypoints_3d_vis = gt_keypoints_3d.cpu().numpy() gt_keypoints_3d_vis = np.reshape(gt_keypoints_3d_vis, (gt_keypoints_3d_vis.shape[0],-1)) #N,14x3 gt_keypoints_3d_vis = np.swapaxes(gt_keypoints_3d_vis, 0,1) *100 pred_keypoints_3d_vis = pred_keypoints_3d.cpu().numpy() pred_keypoints_3d_vis = np.reshape(pred_keypoints_3d_vis, (pred_keypoints_3d_vis.shape[0],-1)) #N,14x3 pred_keypoints_3d_vis = np.swapaxes(pred_keypoints_3d_vis, 0,1) *100 # output_sample = output_sample[ : , np.newaxis]*0.1 # gt_sample = gt_sample[: , np.newaxis]*0.1 # (skelNum, dim, frames) glViewer.setSkeleton( [gt_keypoints_3d_vis, pred_keypoints_3d_vis] ,jointType='smplcoco')#(skelNum, dim, frames) glViewer.show() # Absolute error (MPJPE) error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() # mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) # recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error for ii, p in enumerate(batch['imgname'][:len(r_error)]): seqName = os.path.basename( os.path.dirname(p)) # quant_mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error if seqName not in quant_mpjpe.keys(): quant_mpjpe[seqName] = [] quant_recon_err[seqName] = [] quant_mpjpe[seqName].append(error[ii]) quant_recon_err[seqName].append(r_error[ii]) # Reconstuction_error # quant_recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error list_mpjpe = np.hstack([ quant_mpjpe[k] for k in quant_mpjpe]) list_reconError = np.hstack([ quant_recon_err[k] for k in quant_recon_err]) if bVerbose: print(">>> {} : MPJPE {:.02f} mm, error: {:.02f} mm | Total MPJPE {:.02f} mm, error {:.02f} mm".format(seqName, np.mean(error)*1000, np.mean(r_error)*1000, np.hstack(list_mpjpe).mean()*1000, np.hstack(list_reconError).mean()*1000) ) # print("MPJPE {}, error: {}".format(np.mean(error)*100, np.mean(r_error)*100)) # If mask or part evaluation, render the mask and part images # if eval_masks or eval_parts: # mask, parts = renderer(pred_vertices, pred_camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if bVerbose: if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() if save_results: rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1) rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1) pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72) output_pred_pose[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_pose.cpu().numpy() output_pred_betas[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_betas.cpu().numpy() output_pred_camera[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_camera.cpu().numpy() output_pred_pose[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_pose.cpu().numpy() output_pred_betas[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_betas.cpu().numpy() output_pred_camera[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_camera.cpu().numpy() output_pred_joints[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_keypoints_3d.cpu().numpy() output_gt_pose[outputStartPointer:outputStartPointer+curr_batch_size, :] = gt_pose.cpu().numpy() output_gt_betas[outputStartPointer:outputStartPointer+curr_batch_size, :] = gt_betas.cpu().numpy() output_gt_joints[outputStartPointer:outputStartPointer+curr_batch_size, :] = gt_keypoints_3d.cpu().numpy() output_error_MPJPE[outputStartPointer:outputStartPointer+curr_batch_size,] = error *1000 output_error_recon[outputStartPointer:outputStartPointer+curr_batch_size] = r_error*1000 output_cropScale[outputStartPointer:outputStartPointer+curr_batch_size] = batch['scale'].cpu().numpy() output_cropCenter[outputStartPointer:outputStartPointer+curr_batch_size, :] = batch['center'].cpu().numpy() output_imgNames +=batch['imgname'] outputStartPointer +=curr_batch_size # if outputStartPointer>100: #Debug # break # if len(output_imgNames) < output_pred_pose.shape[0]: output ={} finalLen = len(output_imgNames) output['imageNames'] = output_imgNames output['pred_pose'] = output_pred_pose[:finalLen] output['pred_betas'] = output_pred_betas[:finalLen] output['pred_camera'] = output_pred_camera[:finalLen] output['pred_joints'] = output_pred_joints[:finalLen] output['gt_pose'] = output_gt_pose[:finalLen] output['gt_betas'] = output_gt_betas[:finalLen] output['gt_joints'] = output_gt_joints[:finalLen] output['error_MPJPE'] = output_error_MPJPE[:finalLen] output['error_recon'] = output_error_recon[:finalLen] output['cropScale'] = output_cropScale[:finalLen] output['cropCenter'] = output_cropCenter[:finalLen] # Save reconstructions to a file for further processing if save_results: import pickle # np.savez(result_file, pred_joints=pred_joints, pred_pose=pred_pose, pred_betas=pred_betas, pred_camera=pred_camera) with open(result_file,'wb') as f: pickle.dump(output, f) f.close() print("Saved to:{}".format(result_file)) # Print final results during evaluation if bVerbose: print('*** Final Results ***') print() if eval_pose: # if bVerbose: # print('MPJPE: ' + str(1000 * mpjpe.mean())) # print('Reconstruction Error: ' + str(1000 * recon_err.mean())) # print() list_mpjpe = np.hstack([ quant_mpjpe[k] for k in quant_mpjpe]) list_reconError = np.hstack([ quant_recon_err[k] for k in quant_recon_err]) output_str ='SeqNames; ' for seq in quant_mpjpe: output_str += seq + ';' output_str +='\n MPJPE; ' quant_mpjpe_avg_mm = np.hstack(list_mpjpe).mean()*1000 output_str += "Avg {:.02f} mm; ".format( quant_mpjpe_avg_mm) for seq in quant_mpjpe: output_str += '{:.02f}; '.format(1000 * np.hstack(quant_mpjpe[seq]).mean()) output_str +='\n Recon Error; ' quant_recon_error_avg_mm = np.hstack(list_reconError).mean()*1000 output_str +="Avg {:.02f}mm; ".format( quant_recon_error_avg_mm ) for seq in quant_recon_err: output_str += '{:.02f}; '.format(1000 * np.hstack(quant_recon_err[seq]).mean()) if bVerbose: print(output_str) else: print(">>> Test on 3DPW: MPJPE: {} | quant_recon_error_avg_mm: {}".format(quant_mpjpe_avg_mm, quant_recon_error_avg_mm) ) return quant_mpjpe_avg_mm, quant_recon_error_avg_mm if bVerbose: if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() return -1 #Should return something
def run_evaluation(model, dataset): """Run evaluation on the datasets and metrics we report in the paper. """ shuffle = args.shuffle log_freq = args.log_freq batch_size = args.batch_size dataset_name = args.dataset result_file = args.result_file num_workers = args.num_workers device = torch.device('cuda') if torch.cuda.is_available() \ else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl_male = SMPL(path_config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl_female = SMPL(path_config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load( path_config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle = False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) pve = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2, 1)) fp = np.zeros((2, 1)) fn = np.zeros((2, 1)) parts_tp = np.zeros((7, 1)) parts_fp = np.zeros((7, 1)) parts_fn = np.zeros((7, 1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) action_idxes = {} idx_counter = 0 # for each action act_PVE = {} act_MPJPE = {} act_paMPJPE = {} eval_pose = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == 'h36m-p2-mosh' \ or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp' or dataset_name == '3doh50k': eval_pose = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = path_config.DATASET_FOLDERS['upi-s1h'] joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14 joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14 # Iterate over the entire dataset cnt = 0 results_dict = {'id': [], 'pred': [], 'pred_pa': [], 'gt': []} for step, batch in enumerate( tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_smpl_out = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]) gt_vertices_nt = gt_smpl_out.vertices images = batch['img'].to(device) gender = batch['gender'].to(device) curr_batch_size = images.shape[0] if save_results: s_id = np.array( [int(item.split('/')[-3][-1]) for item in batch['imgname']]) * 10000 s_id += np.array( [int(item.split('/')[-1][4:-4]) for item in batch['imgname']]) results_dict['id'].append(s_id) if dataset_name == 'h36m-p2': action = [ im_path.split('/')[-1].split('.')[0].split('_')[1] for im_path in batch['imgname'] ] for act_i in range(len(action)): if action[act_i] in action_idxes: action_idxes[action[act_i]].append(idx_counter + act_i) else: action_idxes[action[act_i]] = [idx_counter + act_i] idx_counter += len(action) with torch.no_grad(): if args.regressor == 'hmr': pred_rotmat, pred_betas, pred_camera = model(images) # torch.Size([32, 24, 3, 3]) torch.Size([32, 10]) torch.Size([32, 3]) elif args.regressor == 'pymaf_net': preds_dict, _ = model(images) pred_rotmat = preds_dict['smpl_out'][-1]['rotmat'].contiguous( ).view(-1, 24, 3, 3) pred_betas = preds_dict['smpl_out'][-1][ 'theta'][:, 3:13].contiguous() pred_camera = preds_dict['smpl_out'][-1][ 'theta'][:, :3].contiguous() pred_output = smpl_neutral( betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices if save_results: rot_pad = torch.tensor([0, 0, 1], dtype=torch.float32, device=device).view(1, 3, 1) rotmat = torch.cat((pred_rotmat.view( -1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1) pred_pose = tgm.rotation_matrix_to_angle_axis( rotmat).contiguous().view(-1, 72) smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy() smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :] = pred_betas.cpu().numpy() smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :] = pred_camera.cpu().numpy() # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand( pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name or '3doh50k' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1] # For 3DPW get the 14 common joints from the rendered shape else: gt_vertices = smpl_male(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0], :].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis if '3dpw' in dataset_name: per_vertex_error = torch.sqrt( ((pred_vertices - gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() else: per_vertex_error = torch.sqrt( ((pred_vertices - gt_vertices_nt)**2).sum(dim=-1)).mean( dim=-1).cpu().numpy() pve[step * batch_size:step * batch_size + curr_batch_size] = per_vertex_error # Get 14 predicted joints from the mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) if save_results: pred_joints[ step * batch_size:step * batch_size + curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy() pred_pelvis = pred_keypoints_3d[:, [0], :].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt( ((pred_keypoints_3d - gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error, pred_keypoints_3d_pa = reconstruction_error( pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error if save_results: results_dict['gt'].append(gt_keypoints_3d.cpu().numpy()) results_dict['pred'].append(pred_keypoints_3d.cpu().numpy()) results_dict['pred_pa'].append(pred_keypoints_3d_pa) if args.vis_demo: imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']] if args.regressor == 'hmr': iuv_pred = None images_vis = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape( 1, 3, 1, 1) images_vis = images_vis + torch.tensor( [0.485, 0.456, 0.406], device=images.device).reshape( 1, 3, 1, 1) vis_smpl_iuv( images_vis.cpu().numpy(), pred_camera.cpu().numpy(), pred_output.vertices.cpu().numpy(), smpl_neutral.faces, iuv_pred, 100 * per_vertex_error, imgnames, os.path.join('./notebooks/output/demo_results', dataset_name, args.checkpoint.split('/')[-3]), args) # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, pred_camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread( os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread( os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera) for k in results_dict.keys(): results_dict[k] = np.concatenate(results_dict[k]) print(k, results_dict[k].shape) scipy.io.savemat(result_file + '.mat', results_dict) # Print final results during evaluation print('*** Final Results ***') try: print(os.path.split(args.checkpoint)[-3:], args.dataset) except: pass if eval_pose: print('PVE: ' + str(1000 * pve.mean())) print('MPJPE: ' + str(1000 * mpjpe.mean())) print('Reconstruction Error: ' + str(1000 * recon_err.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() if dataset_name == 'h36m-p2': print( 'Note: PVE is not available for h36m-p2. To evaluate PVE, use h36m-p2-mosh instead.' ) for act in action_idxes: act_idx = action_idxes[act] act_pve = [pve[i] for i in act_idx] act_errors = [mpjpe[i] for i in act_idx] act_errors_pa = [recon_err[i] for i in act_idx] act_errors_mean = np.mean(np.array(act_errors)) * 1000. act_errors_pa_mean = np.mean(np.array(act_errors_pa)) * 1000. act_pve_mean = np.mean(np.array(act_pve)) * 1000. act_MPJPE[act] = act_errors_mean act_paMPJPE[act] = act_errors_pa_mean act_PVE[act] = act_pve_mean act_err_info = ['action err'] act_row = [str(act_paMPJPE[act]) for act in action_idxes] + [act for act in action_idxes] act_err_info.extend(act_row) print(act_err_info) else: act_row = None