def init_fn(self): self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device) # feature extraction model self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr = self.options.lr, weight_decay=0) self.smpl = SMPL(config.SMPL_MODEL_DIR, batch_size = 16, create_transl=False).to(self.device) # per vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # keypoints loss including 2D and 3D self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # SMPL parameters loss if we have self.criterion_regr = nn.MSELoss().to(self.device) self.models_dict = {'model':self.model} self.optimizers_dict = {'optimizer':self.optimizer} self.focal_length = constants.FOCAL_LENGTH # initialize MVSMPLify self.mvsmplify = MVSMPLify(step_size=1e-2, batch_size=16, num_iters=100,focal_length=self.focal_length) print(self.options.pretrained_checkpoint) if self.options.pretrained_checkpoint is not None: self.load_pretrained(checkpoint_file = self.options.pretrained_checkpoint) #load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) # create renderer self.renderer = Renderer(focal_length=self.focal_length, img_res = 224, faces=self.smpl.faces)
def init_fn(self): self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device) self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr, weight_decay=0) self.smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=self.options.batch_size, create_transl=False).to(self.device) # Per-vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # Keypoint (2D and 3D) loss # No reduction because confidence weighting needs to be applied self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # Loss for SMPL parameter regression self.criterion_regr = nn.MSELoss().to(self.device) self.models_dict = {'model': self.model} self.optimizers_dict = {'optimizer': self.optimizer} self.focal_length = constants.FOCAL_LENGTH self.conf_thresh = self.options.conf_thresh # Initialize SMPLify fitting module self.smplify = SMPLify(step_size=1e-2, batch_size=self.options.batch_size, num_iters=self.options.num_smplify_iters, focal_length=self.focal_length, prior_mul=0.1, conf_thresh=self.conf_thresh) if self.options.pretrained_checkpoint is not None: self.load_pretrained(checkpoint_file=self.options.pretrained_checkpoint) # Load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) # Create renderer self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces)
def init_fn(self): # create training dataset self.train_ds = create_dataset(self.options.dataset, self.options) # create Mesh object self.mesh = Mesh() self.faces = self.mesh.faces.to(self.device) # create GraphCNN self.graph_cnn = GraphCNN(self.mesh.adjmat, self.mesh.ref_vertices.t(), num_channels=self.options.num_channels, num_layers=self.options.num_layers).to( self.device) # SMPL Parameter regressor self.smpl_param_regressor = SMPLParamRegressor().to(self.device) # Setup a joint optimizer for the 2 models self.optimizer = torch.optim.Adam( params=list(self.graph_cnn.parameters()) + list(self.smpl_param_regressor.parameters()), lr=self.options.lr, betas=(self.options.adam_beta1, 0.999), weight_decay=self.options.wd) # SMPL model self.smpl = SMPL().to(self.device) # Create loss functions self.criterion_shape = nn.L1Loss().to(self.device) self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) self.criterion_regr = nn.MSELoss().to(self.device) # Pack models and optimizers in a dict - necessary for checkpointing self.models_dict = { 'graph_cnn': self.graph_cnn, 'smpl_param_regressor': self.smpl_param_regressor } self.optimizers_dict = {'optimizer': self.optimizer} # Renderer for visualization self.renderer = Renderer(faces=self.smpl.faces.cpu().numpy()) # LSP indices from full list of keypoints self.to_lsp = list(range(14)) # Optionally start training from a pretrained checkpoint # Note that this is different from resuming training # For the latter use --resume if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint)
def bbox_from_json(bbox_file): """Get center and scale of bounding box from bounding box annotations. The expected format is [top_left(x), top_left(y), width, height]. """ with open(bbox_file, 'r') as f: bbox = np.array(json.load(f)['bbox']).astype(np.float32) ul_corner = bbox[:2] center = ul_corner + 0.5 * bbox[2:] # Load pretrained model model = hmr(config.SMPL_MEAN_PARAMS).to(device) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model'], strict=False) # Load SMPL model smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=1, create_transl=False).to(device) model.eval() # Setup renderer for visualization renderer = Renderer(focal_length=constants.FOCAL_LENGTH, img_res=constants.IMG_RES, faces=smpl.faces) # Preprocess input image and generate predictions img, norm_img = process_image(args.img, args.bbox, args.openpose, input_res=constants.IMG_RES) with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device)) pred_output = smpl(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices # Calculate camera parameters for rendering camera_translation = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * constants.FOCAL_LENGTH / (constants.IMG_RES * pred_camera[:, 0] + 1e-9) ], dim=-1) camera_translation = camera_translation[0].cpu().numpy() pred_vertices = pred_vertices[0].cpu().numpy() img = img.permute(1, 2, 0).cpu().numpy() width = max(bbox[2], bbox[3]) scale = width / 200.0 # make sure the bounding box is rectangular return center, scale
def __init__(self, focal_length=5000., render_res=224): # Parameters for rendering self.focal_length = focal_length self.render_res = render_res # We use Neural 3D mesh renderer for rendering masks and part segmentations self.neural_renderer = nr.Renderer(dist_coeffs=None, orig_size=self.render_res, image_size=render_res, light_intensity_ambient=1, light_intensity_directional=0, anti_aliasing=False) self.faces = SMPL().faces.cuda().int() textures = np.load(cfg.VERTEX_TEXTURE_FILE) self.textures = torch.from_numpy(textures).cuda().float() self.cube_parts = torch.cuda.FloatTensor(np.load(cfg.CUBE_PARTS_FILE))
def init_fn(self): self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True) self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device) self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=self.options.lr) self.lr_scheduler = torch.optim.lr_scheduler.ExponentialLR( self.optimizer, gamma=0.95) self.smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=self.options.batch_size, create_transl=False).to(self.device) # consistency loss self.criterion_consistency_contrastive = NTXent( tau=self.options.tau, kernel=self.options.kernel).to(self.device) self.criterion_consistency_mse = nn.MSELoss().to(self.device) # Per-vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # Keypoint (2D and 3D) loss # No reduction because confidence weighting needs to be applied self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # Loss for SMPL parameter regression self.criterion_regr = nn.MSELoss().to(self.device) self.models_dict = {'model': self.model} self.optimizers_dict = {'optimizer': self.optimizer} self.focal_length = constants.FOCAL_LENGTH if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) # Create renderer self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces) # Create input image flag self.input_img = self.options.input_img # initialize queue self.feat_queue = FeatQueue(max_queue_size=self.options.max_queue_size)
def __init__(self, filename='data/mesh_downsampling.npz', num_downsampling=1, nsize=1, device=torch.device('cuda')): self._A, self._U, self._D = get_graph_params(filename=filename, nsize=nsize) self._A = [a.to(device) for a in self._A] self._U = [u.to(device) for u in self._U] self._D = [d.to(device) for d in self._D] self.num_downsampling = num_downsampling # load template vertices from SMPL and normalize them smpl = SMPL() ref_vertices = smpl.v_template center = 0.5*(ref_vertices.max(dim=0)[0] + ref_vertices.min(dim=0)[0])[None] ref_vertices -= center ref_vertices /= ref_vertices.abs().max().item() self._ref_vertices = ref_vertices.to(device) self.faces = smpl.faces.int().to(device)
def __init__(self, mesh, num_layers, num_channels, pretrained_checkpoint=None): super(CMR, self).__init__() self.graph_cnn = GraphCNN(mesh.adjmat, mesh.ref_vertices.t(), num_layers, num_channels) self.smpl_param_regressor = SMPLParamRegressor() self.smpl = SMPL() self.mesh = mesh if pretrained_checkpoint is not None: checkpoint = torch.load(pretrained_checkpoint) try: self.graph_cnn.load_state_dict(checkpoint['graph_cnn']) except KeyError: print('Warning: graph_cnn was not found in checkpoint') try: self.smpl_param_regressor.load_state_dict(checkpoint['smpl_param_regressor']) except KeyError: print('Warning: smpl_param_regressor was not found in checkpoint')
def run_evaluation(model, dataset_name, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50, options=None): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl_male = SMPL(path_config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl_female = SMPL(path_config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(path_config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle = False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) fits_dict = None # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) # joint_mapper_coco = constants.H36M_TO_JCOCO joint_mapper_gt = constants.J24_TO_JCOCO focal_length = 5000 num_joints = 17 num_samples = len(dataset) print('dataset length: {}'.format(num_samples)) all_preds = np.zeros( (num_samples, num_joints, 3), dtype=np.float32 ) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): if len(options.vis_imname) > 0: imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']] name_hit = False for i_n in imgnames: if options.vis_imname in i_n: name_hit = True print('vis: ' + i_n) if not name_hit: continue images = batch['img'].to(device) scale = batch['scale'].numpy() center = batch['center'].numpy() num_images = images.size(0) gt_keypoints_2d = batch['keypoints'] # 2D keypoints # De-normalize 2D keypoints from [-1,1] to pixel space gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * img_res * (gt_keypoints_2d_orig[:, :, :-1] + 1) if options.regressor == 'hmr': pred_rotmat, pred_betas, pred_camera = model(images) # torch.Size([32, 24, 3, 3]) torch.Size([32, 10]) torch.Size([32, 3]) elif options.regressor == 'pymaf_net': preds_dict, _ = model(images) pred_rotmat = preds_dict['smpl_out'][-1]['rotmat'].contiguous().view(-1, 24, 3, 3) pred_betas = preds_dict['smpl_out'][-1]['theta'][:, 3:13].contiguous() pred_camera = preds_dict['smpl_out'][-1]['theta'][:, :3].contiguous() pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) # pred_vertices = pred_output.vertices pred_J24 = pred_output.joints[:, -24:] pred_JCOCO = pred_J24[:, constants.J24_TO_JCOCO] # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size # This camera translation can be used in a full perspective projection pred_cam_t = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*constants.FOCAL_LENGTH/(img_res * pred_camera[:, 0] +1e-9)],dim=-1) camera_center = torch.zeros(len(pred_JCOCO), 2, device=pred_camera.device) pred_keypoints_2d = perspective_projection(pred_JCOCO, rotation=torch.eye(3, device=pred_camera.device).unsqueeze(0).expand(len(pred_JCOCO), -1, -1), translation=pred_cam_t, focal_length=constants.FOCAL_LENGTH, camera_center=camera_center) coords = pred_keypoints_2d + (img_res / 2.) coords = coords.cpu().numpy() gt_keypoints_coco = gt_keypoints_2d_orig[:, -24:][:, constants.J24_TO_JCOCO] vert_errors_batch = [] for i, (gt2d, pred2d) in enumerate(zip(gt_keypoints_coco.cpu().numpy(), coords.copy())): vert_error = np.sqrt(np.sum((gt2d[:, :2] - pred2d[:, :2]) ** 2, axis=1)) vert_error *= gt2d[:, 2] vert_mean_error = np.sum(vert_error) / np.sum(gt2d[:, 2] > 0) vert_errors_batch.append(10 * vert_mean_error) if options.vis_demo: imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']] if options.regressor == 'hmr': iuv_pred = None images_vis = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape(1, 3, 1, 1) images_vis = images_vis + torch.tensor([0.485, 0.456, 0.406], device=images.device).reshape(1, 3, 1, 1) vis_smpl_iuv(images_vis.cpu().numpy(), pred_camera.cpu().numpy(), pred_output.vertices.cpu().numpy(), smpl_neutral.faces, iuv_pred, vert_errors_batch, imgnames, os.path.join('./notebooks/output/demo_results', dataset_name, options.checkpoint.split('/')[-3]), options) preds = coords.copy() scale_ = np.array([scale, scale]).transpose() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds( coords[i], center[i], scale_[i], [img_res, img_res] ) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = 1. all_boxes[idx:idx + num_images, 5] = 1. image_path.extend(batch['imgname']) idx += num_images if len(options.vis_imname) > 0: exit() if args.checkpoint is None or 'model_checkpoint.pt' in args.checkpoint: ckp_name = 'spin_model' else: ckp_name = args.checkpoint.split('/') ckp_name = ckp_name[2].split('_')[1] + '_' + ckp_name[-1].split('.')[0] name_values, perf_indicator = dataset.evaluate( cfg, all_preds, options.output_dir, all_boxes, image_path, ckp_name, filenames, imgnums ) model_name = options.regressor if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera)
def init(self): # Create training and testing dataset self.train_ds = MGNDataset(self.options, split = 'train') self.test_ds = MGNDataset(self.options, split = 'test') # test data loader is fixed and disable shuffle as it is unnecessary. self.test_data_loader = CheckpointDataLoader( self.test_ds, batch_size = self.options.batch_size, num_workers = self.options.num_workers, pin_memory = self.options.pin_memory, shuffle = False) # Create SMPL Mesh (graph) object for GCN self.mesh = Mesh(self.options, self.options.num_downsampling) self.faces = torch.cat( self.options.batch_size * [ self.mesh.faces.to(self.device)[None] ], dim = 0 ) self.faces = self.faces.type(torch.LongTensor).to(self.device) # Create SMPL blending model and edges self.smpl = SMPL(self.options.smpl_model_path, self.device) # self.smplEdge = torch.Tensor(np.load(self.options.smpl_edges_path)) \ # .long().to(self.device) # create SMPL+D blending model self.smplD = Smpl( self.options.smpl_model_path ) # read SMPL .bj file to get uv coordinates _, self.smpl_tri_ind, uv_coord, tri_uv_ind = read_Obj(self.options.smpl_objfile_path) uv_coord[:, 1] = 1 - uv_coord[:, 1] expUV = uv_coord[tri_uv_ind.flatten()] unique, index = np.unique(self.smpl_tri_ind.flatten(), return_index = True) self.smpl_verts_uvs = torch.as_tensor(expUV[index,:]).float().to(self.device) self.smpl_tri_ind = torch.as_tensor(self.smpl_tri_ind).to(self.device) # camera for projection self.perspCam = perspCamera() # mean and std of displacements self.dispPara = \ torch.Tensor(np.load(self.options.MGN_offsMeanStd_path)).to(self.device) # load average pose and shape and convert to camera coodinate; # avg pose is decided by the image id we use for training (0-11) avgPose_objCoord = np.load(self.options.MGN_avgPose_path) avgPose_objCoord[:3] = rotationMatrix_to_axisAngle( # for 0,6, front only torch.tensor([[[1, 0, 0], [0, -1, 0], [0, 0,-1]]])) self.avgPose = \ axisAngle_to_Rot6d( torch.Tensor(avgPose_objCoord[None]).reshape(-1, 3) ).reshape(1, -1).to(self.device) self.avgBeta = \ torch.Tensor( np.load(self.options.MGN_avgBeta_path)[None]).to(self.device) self.avgCam = torch.Tensor([1.2755, 0, 0])[None].to(self.device) # 1.2755 is for our settings self.model = frameVIBE( self.options.smpl_model_path, self.mesh, self.avgPose, self.avgBeta, self.avgCam, self.options.num_channels, self.options.num_layers , self.smpl_verts_uvs, self.smpl_tri_ind ).to(self.device) # Setup a optimizer for models self.optimizer = torch.optim.Adam( params=list(self.model.parameters()), lr=self.options.lr, betas=(self.options.adam_beta1, 0.999), weight_decay=self.options.wd) self.criterion = VIBELoss( e_loss_weight=50, # for kp 2d, help to estimate camera, global orientation e_3d_loss_weight=1, # for kp 3d, bvt e_pose_loss_weight=10, # for body pose parameters e_shape_loss_weight=1, # for body shape parameters e_disp_loss_weight=1, # for displacements e_tex_loss_weight=1, # for uv image d_motion_loss_weight=0 ) # Pack models and optimizers in a dict - necessary for checkpointing self.models_dict = {self.options.model: self.model} self.optimizers_dict = {'optimizer': self.optimizer}
def run_evaluation(hmr_model, dataset, eval_size, batch_size=32, num_workers=32, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # focal length focal_length = constants.FOCAL_LENGTH # Transfer hmr_model to the GPU hmr_model.to(device) # Load SMPL hmr_model smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl_male = SMPL(config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl_female = SMPL(config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float() # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros((len(dataset))) recon_err = np.zeros((len(dataset))) joint_mapper_h36m = constants.H36M_TO_J14 # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gender = batch['gender'].to(device) images = batch['img_up'] curr_batch_size = images.shape[0] with torch.no_grad(): images = images.to(device) pred_rotmat, pred_betas, pred_camera, _ = hmr_model(images, scale=size_to_scale(eval_size)) pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device) gt_vertices = smpl_male(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0], :].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) pred_pelvis = pred_keypoints_3d[:, [0], :].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error # Print intermediate results during evaluation if step % log_freq == log_freq - 1: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) # Print final results during evaluation print('*** Final Results ***') print() print('MPJPE: {}'.format(1000 * mpjpe.mean())) print('Reconstruction Error: {}'.format(1000 * recon_err.mean())) print()
def main(): """Main function""" args = parse_args() args.batch_size = 1 cfg_from_file(args.cfg_file) cfg.DANET.REFINEMENT = EasyDict(cfg.DANET.REFINEMENT) cfg.MSRES_MODEL.EXTRA = EasyDict(cfg.MSRES_MODEL.EXTRA) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') if cfg.DANET.SMPL_MODEL_TYPE == 'male': smpl_male = SMPL(path_config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl = smpl_male elif cfg.DANET.SMPL_MODEL_TYPE == 'neutral': smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl = smpl_neutral elif cfg.DANET.SMPL_MODEL_TYPE == 'female': smpl_female = SMPL(path_config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) smpl = smpl_female if args.use_opendr: from utils.renderer import opendr_render dr_render = opendr_render() # IUV renderer iuv_renderer = IUV_Renderer() if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) ### Model ### model = DaNet(args, path_config.SMPL_MEAN_PARAMS, pretrained=False).to(device) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model'], strict=False) model.eval() img_path_list = [ os.path.join(args.img_dir, name) for name in os.listdir(args.img_dir) if name.endswith('.jpg') ] for i, path in enumerate(img_path_list): image = Image.open(path).convert('RGB') img_id = path.split('/')[-1][:-4] image_tensor = torchvision.transforms.ToTensor()(image).unsqueeze( 0).cuda() # run inference pred_dict = model.infer_net(image_tensor) para_pred = pred_dict['para'] camera_pred = para_pred[:, 0:3].contiguous() betas_pred = para_pred[:, 3:13].contiguous() rotmat_pred = para_pred[:, 13:].contiguous().view(-1, 24, 3, 3) # input image image_np = image_tensor[0].cpu().numpy() image_np = np.transpose(image_np, (1, 2, 0)) ones_np = np.ones(image_np.shape[:2]) * 255 ones_np = ones_np[:, :, None] image_in_rgba = np.concatenate((image_np, ones_np), axis=2) # estimated global IUV global_iuv = iuv_map2img( *pred_dict['visualization']['iuv_pred'])[0].cpu().numpy() global_iuv = np.transpose(global_iuv, (1, 2, 0)) global_iuv = resize(global_iuv, image_np.shape[:2]) global_iuv_rgba = np.concatenate((global_iuv, ones_np), axis=2) # estimated patial IUV part_iuv_pred = pred_dict['visualization']['part_iuv_pred'][0] p_iuv_vis = [] for i in range(part_iuv_pred.size(0)): p_u_vis, p_v_vis, p_i_vis = [ part_iuv_pred[i, iuv].unsqueeze(0) for iuv in range(3) ] if p_u_vis.size(1) == 25: p_iuv_vis_i = iuv_map2img(p_u_vis.detach(), p_v_vis.detach(), p_i_vis.detach()) else: p_iuv_vis_i = iuv_map2img(p_u_vis.detach(), p_v_vis.detach(), p_i_vis.detach(), ind_mapping=[0] + model.img2iuv.dp2smpl_mapping[i]) p_iuv_vis.append(p_iuv_vis_i) part_iuv = torch.cat(p_iuv_vis, dim=0) part_iuv = make_grid(part_iuv, nrow=6, padding=0).cpu().numpy() part_iuv = np.transpose(part_iuv, (1, 2, 0)) part_iuv_rgba = np.concatenate( (part_iuv, np.ones(part_iuv.shape[:2])[:, :, None] * 255), axis=2) # rendered IUV of the predicted SMPL model smpl_output = smpl(betas=betas_pred, body_pose=rotmat_pred[:, 1:], global_orient=rotmat_pred[:, 0].unsqueeze(1), pose2rot=False) verts_pred = smpl_output.vertices render_iuv = iuv_renderer.verts2uvimg(verts_pred, camera_pred) render_iuv = render_iuv[0].cpu().numpy() render_iuv = np.transpose(render_iuv, (1, 2, 0)) render_iuv = resize(render_iuv, image_np.shape[:2]) img_render_iuv = image_np.copy() img_render_iuv[render_iuv > 0] = render_iuv[render_iuv > 0] img_render_iuv_rgba = np.concatenate((img_render_iuv, ones_np), axis=2) img_vis_list = [ image_in_rgba, global_iuv_rgba, part_iuv_rgba, img_render_iuv_rgba ] if args.use_opendr: # visualize the predicted SMPL model using the opendr renderer K = iuv_renderer.K[0].cpu().numpy() _, _, img_smpl, smpl_rgba = dr_render.render( image_tensor[0].cpu().numpy(), camera_pred[0].cpu().numpy(), K, verts_pred.cpu().numpy(), smpl_neutral.faces) img_smpl_rgba = np.concatenate((img_smpl, ones_np), axis=2) img_vis_list.extend([img_smpl_rgba, smpl_rgba]) img_vis = np.concatenate(img_vis_list, axis=1) img_vis[img_vis < 0.0] = 0.0 img_vis[img_vis > 1.0] = 1.0 imsave(os.path.join(args.out_dir, img_id + '_result.png'), img_vis) print('Demo results have been saved in {}.'.format(args.out_dir))
def run_evaluation(model, dataset_name, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl_male = SMPL(config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl_female = SMPL(config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load( config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle = False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) eval_pose = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp': eval_pose = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = config.DATASET_FOLDERS['upi-s1h'] joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14 joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14 # Iterate over the entire dataset for step, batch in enumerate( tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices images = batch['img'].to(device) gender = batch['gender'].to(device) curr_batch_size = images.shape[0] with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(images) pred_output = smpl_neutral( betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices if save_results: rot_pad = torch.tensor([0, 0, 1], dtype=torch.float32, device=device).view(1, 3, 1) rotmat = torch.cat((pred_rotmat.view( -1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1) pred_pose = tgm.rotation_matrix_to_angle_axis( rotmat).contiguous().view(-1, 72) smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy() smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :] = pred_betas.cpu().numpy() smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :] = pred_camera.cpu().numpy() # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand( pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1] # For 3DPW get the 14 common joints from the rendered shape else: gt_vertices = smpl_male(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0], :].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis # Get 14 predicted joints from the mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) if save_results: pred_joints[ step * batch_size:step * batch_size + curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy() pred_pelvis = pred_keypoints_3d[:, [0], :].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt( ((pred_keypoints_3d - gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, pred_camera) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) print() # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera) np.savez('error.npz', mpjpe=mpjpe, recon_err=recon_err) # Print final results during evaluation print('*** Final Results ***') print() if eval_pose: print('MPJPE: ' + str(1000 * mpjpe.mean())) print('Reconstruction Error: ' + str(1000 * recon_err.mean())) print()
img = img.astype(np.float32) / 255. img = torch.from_numpy(img).permute(2, 0, 1) norm_img = normalize_img(img.clone())[None] return img, norm_img if __name__ == '__main__': args = parser.parse_args() device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # Load trained model model = hmr(config.SMPL_MEAN_PARAMS).to(device) checkpoint = torch.load(args.trained_model) model.load_state_dict(checkpoint['model'], strict=False) smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=1, create_transl=False).to(device) model.eval() # Generate rendered image renderer = Renderer(focal_length=constants.FOCAL_LENGTH, img_res=constants.IMG_RES, faces=smpl.faces) # Processs the image and predict the parameters img, norm_img = process_image(args.test_image, args.bbox, input_res=constants.IMG_RES) with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device)) pred_output = smpl(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False)
def run_evaluation(model, dataset_name, dataset, mesh, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ renderer = PartRenderer() # Create SMPL model smpl = SMPL().cuda() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(cfg.JOINT_REGRESSOR_H36M)).float() # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Transfer model to the GPU device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.eval() # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2, 1)) fp = np.zeros((2, 1)) fn = np.zeros((2, 1)) parts_tp = np.zeros((7, 1)) parts_fp = np.zeros((7, 1)) parts_fn = np.zeros((7, 1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 eval_pose = False eval_shape = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2': eval_pose = True elif dataset_name == 'up-3d': eval_shape = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = cfg.DATASET_FOLDERS['upi-s1h'] # Iterate over the entire dataset for step, batch in enumerate( tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl(gt_pose, gt_betas) images = batch['img'].to(device) curr_batch_size = images.shape[0] # Run inference with torch.no_grad(): pred_vertices, pred_vertices_smpl, camera, pred_rotmat, pred_betas = model( images) # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand( pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, cfg.J24_TO_J14, :-1] # Get 14 predicted joints from the non-parametic mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) pred_pelvis = pred_keypoints_3d[:, [0], :].clone() pred_keypoints_3d = pred_keypoints_3d[:, cfg.H36M_TO_J14, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Get 14 predicted joints from the SMPL mesh pred_keypoints_3d_smpl = torch.matmul(J_regressor_batch, pred_vertices_smpl) pred_pelvis_smpl = pred_keypoints_3d_smpl[:, [0], :].clone() pred_keypoints_3d_smpl = pred_keypoints_3d_smpl[:, cfg.H36M_TO_J14, :] pred_keypoints_3d_smpl = pred_keypoints_3d_smpl - pred_pelvis_smpl # Compute error metrics # Absolute error (MPJPE) error = torch.sqrt( ((pred_keypoints_3d - gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() error_smpl = torch.sqrt( ((pred_keypoints_3d_smpl - gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error mpjpe_smpl[step * batch_size:step * batch_size + curr_batch_size] = error_smpl # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) r_error_smpl = reconstruction_error( pred_keypoints_3d_smpl.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error recon_err_smpl[step * batch_size:step * batch_size + curr_batch_size] = r_error_smpl # Shape evaluation (Mean per-vertex error) if eval_shape: se = torch.sqrt( ((pred_vertices - gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() se_smpl = torch.sqrt( ((pred_vertices_smpl - gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() shape_err[step * batch_size:step * batch_size + curr_batch_size] = se shape_err_smpl[step * batch_size:step * batch_size + curr_batch_size] = se_smpl # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread( os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread( os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE (NonParam): ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error (NonParam): ' + str(1000 * recon_err[:step * batch_size].mean())) print('MPJPE (Param): ' + str(1000 * mpjpe_smpl[:step * batch_size].mean())) print('Reconstruction Error (Param): ' + str(1000 * recon_err_smpl[:step * batch_size].mean())) print() if eval_shape: print('Shape Error (NonParam): ' + str(1000 * shape_err[:step * batch_size].mean())) print('Shape Error (Param): ' + str(1000 * shape_err_smpl[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() # Print final results during evaluation print('*** Final Results ***') print() if eval_pose: print('MPJPE (NonParam): ' + str(1000 * mpjpe.mean())) print('Reconstruction Error (NonParam): ' + str(1000 * recon_err.mean())) print('MPJPE (Param): ' + str(1000 * mpjpe_smpl.mean())) print('Reconstruction Error (Param): ' + str(1000 * recon_err_smpl.mean())) print() if eval_shape: print('Shape Error (NonParam): ' + str(1000 * shape_err.mean())) print('Shape Error (Param): ' + str(1000 * shape_err_smpl.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print()
args = parser.parse_args() img_path = args.img_path checkpoint_path = args.checkpoint normalize_img = Normalize(mean=constants.IMG_NORM_MEAN, std=constants.IMG_NORM_STD) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') hmr_model = hmr(config.SMPL_MEAN_PARAMS) checkpoint = torch.load(checkpoint_path) hmr_model.load_state_dict(checkpoint, strict=False) hmr_model.eval() hmr_model.to(device) smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device) img_renderer = Renderer(focal_length=constants.FOCAL_LENGTH, img_res=constants.IMG_RES, faces=smpl_neutral.faces) img = imageio.imread(img_path) im_size = img.shape[0] im_scale = size_to_scale(im_size) img_up = scipy.misc.imresize(img, [224, 224]) img_up = np.transpose(img_up.astype('float32'), (2, 0, 1)) / 255.0 img_up = normalize_img(torch.from_numpy(img_up).float()) images = img_up[None].to(device) with torch.no_grad(): pred_rotmat, pred_betas, pred_camera, _ = hmr_model(images, scale=im_scale)
def init_fn(self): # create training dataset self.train_ds = create_dataset(self.options.dataset, self.options, use_IUV=True) self.dp_res = int(self.options.img_res // (2**self.options.warp_level)) self.CNet = DPNet(warp_lv=self.options.warp_level, norm_type=self.options.norm_type).to(self.device) self.LNet = get_LNet(self.options).to(self.device) self.smpl = SMPL().to(self.device) self.female_smpl = SMPL(cfg.FEMALE_SMPL_FILE).to(self.device) self.male_smpl = SMPL(cfg.MALE_SMPL_FILE).to(self.device) uv_res = self.options.uv_res self.uv_type = self.options.uv_type self.sampler = Index_UV_Generator(UV_height=uv_res, UV_width=-1, uv_type=self.uv_type).to(self.device) weight_file = 'data/weight_p24_h{:04d}_w{:04d}_{}.npy'.format( uv_res, uv_res, self.uv_type) if not os.path.exists(weight_file): cal_uv_weight(self.sampler, weight_file) uv_weight = torch.from_numpy(np.load(weight_file)).to( self.device).float() uv_weight = uv_weight * self.sampler.mask.to(uv_weight.device).float() uv_weight = uv_weight / uv_weight.mean() self.uv_weight = uv_weight[None, :, :, None] self.tv_factor = (uv_res - 1) * (uv_res - 1) # Setup an optimizer if self.options.stage == 'dp': self.optimizer = torch.optim.Adam( params=list(self.CNet.parameters()), lr=self.options.lr, betas=(self.options.adam_beta1, 0.999), weight_decay=self.options.wd) self.models_dict = {'CNet': self.CNet} self.optimizers_dict = {'optimizer': self.optimizer} else: self.optimizer = torch.optim.Adam( params=list(self.LNet.parameters()) + list(self.CNet.parameters()), lr=self.options.lr, betas=(self.options.adam_beta1, 0.999), weight_decay=self.options.wd) self.models_dict = {'CNet': self.CNet, 'LNet': self.LNet} self.optimizers_dict = {'optimizer': self.optimizer} # Create loss functions self.criterion_shape = nn.L1Loss().to(self.device) self.criterion_uv = nn.L1Loss().to(self.device) self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) self.criterion_keypoints_3d = nn.L1Loss(reduction='none').to( self.device) self.criterion_regr = nn.MSELoss().to(self.device) # LSP indices from full list of keypoints self.to_lsp = list(range(14)) self.renderer = Renderer(faces=self.smpl.faces.cpu().numpy()) # Optionally start training from a pretrained checkpoint # Note that this is different from resuming training # For the latter use --resume if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint)
def run_evaluation(model, dataset_name, dataset, mesh, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ renderer = PartRenderer() # Create SMPL model smpl = SMPL().cuda() # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Transfer model to the GPU device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.eval() # 2D pose metrics pose_2d_m = np.zeros(len(dataset)) pose_2d_m_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) eval_2d_pose = False eval_shape = False # Choose appropriate evaluation for each dataset if dataset_name == 'up-3d': eval_shape = True elif dataset_name == 'lsp': eval_2d_pose = True # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl(gt_pose, gt_betas) images = batch['img'].to(torch.device('cuda')) curr_batch_size = images.shape[0] gt_keypoints_2d = batch['keypoints'].cpu().numpy() # Run inference with torch.no_grad(): pred_vertices, pred_vertices_smpl, camera, pred_rotmat, pred_betas = model(images) # If mask or part evaluation, render the mask and part images if eval_2d_pose: mask, parts = renderer(pred_vertices, camera) # 2D pose evaluation (for LSP) if eval_2d_pose: for i in range(curr_batch_size): gt_kp = gt_keypoints_2d[i, list(range(14))] # Get 3D and projected 2D keypoints from the regressed shape pred_keypoints_3d = smpl.get_joints(pred_vertices) pred_keypoints_2d = orthographic_projection(pred_keypoints_3d, camera)[:, :, :2] pred_keypoints_3d_smpl = smpl.get_joints(pred_vertices_smpl) pred_keypoints_2d_smpl = orthographic_projection(pred_keypoints_3d_smpl, camera.detach())[:, :, :2] pred_kp = pred_keypoints_2d.cpu().numpy()[i, list(range(14))] pred_kp_smpl = pred_keypoints_2d_smpl.cpu().numpy()[i, list(range(14))] # Compute 2D pose losses loss_2d_pose = np.sum((gt_kp[: , :2] - pred_kp)**2) loss_2d_pose_smpl = np.sum((gt_kp[: , :2] - pred_kp_smpl)**2) #print(gt_kp) #print() #print(pred_kp_smpl) #raw_input("Press Enter to continue...") pose_2d_m[step * batch_size + i] = loss_2d_pose pose_2d_m_smpl[step * batch_size + i] = loss_2d_pose_smpl # Shape evaluation (Mean per-vertex error) if eval_shape: se = torch.sqrt(((pred_vertices - gt_vertices) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() se_smpl = torch.sqrt(((pred_vertices_smpl - gt_vertices) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() shape_err[step * batch_size:step * batch_size + curr_batch_size] = se shape_err_smpl[step * batch_size:step * batch_size + curr_batch_size] = se_smpl # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_2d_pose: print('2D keypoints (NonParam): ' + str(1000 * pose_2d_m[:step * batch_size].mean())) print('2D keypoints (Param): ' + str(1000 * pose_2d_m_smpl[:step * batch_size].mean())) print() if eval_shape: print('Shape Error (NonParam): ' + str(1000 * shape_err[:step * batch_size].mean())) print('Shape Error (Param): ' + str(1000 * shape_err_smpl[:step * batch_size].mean())) print() # Print and store final results during evaluation print('*** Final Results ***') print() if eval_2d_pose: print('2D keypoints (NonParam): ' + str(1000 * pose_2d_m.mean())) print('2D keypoints (Param): ' + str(1000 * pose_2d_m_smpl.mean())) print() # store results #np.savez("../eval-output/CMR_no_extra_lsp_model_alt.npz", imgnames = dataset.imgname, kp_2d_err_graph = pose_2d_m, kp_2d_err_smpl = pose_2d_m_smpl) if eval_shape: print('Shape Error (NonParam): ' + str(1000 * shape_err.mean())) print('Shape Error (Param): ' + str(1000 * shape_err_smpl.mean())) print()
def run_evaluation(model, dataset_name, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl_male = SMPL(config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl_female = SMPL(config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle=False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2,1)) fp = np.zeros((2,1)) fn = np.zeros((2,1)) parts_tp = np.zeros((7,1)) parts_fp = np.zeros((7,1)) parts_fn = np.zeros((7,1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) eval_pose = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp': eval_pose = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = config.DATASET_FOLDERS['upi-s1h'] joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14 joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14 # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices images = batch['img'].to(device) gender = batch['gender'].to(device) curr_batch_size = images.shape[0] with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(images) pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices if save_results: rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1) rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1) pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72) smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy() smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :] = pred_betas.cpu().numpy() smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :] = pred_camera.cpu().numpy() # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1] # For 3DPW get the 14 common joints from the rendered shape else: gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0],:].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis # Get 14 predicted joints from the mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) if save_results: pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy() pred_pelvis = pred_keypoints_3d[:, [0],:].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, pred_camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera) # Print final results during evaluation print('*** Final Results ***') print() if eval_pose: print('MPJPE: ' + str(1000 * mpjpe.mean())) print('Reconstruction Error: ' + str(1000 * recon_err.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print()
extract_img=True) h36m_extract(cfg.H36M_ROOT_ORIGIN, out_path, protocol=2, extract_img=False) # LSP dataset preprocessing (test set) lsp_dataset_extract(cfg.LSP_ROOT, out_path) # UP-3D dataset preprocessing (lsp_test set) up_3d_extract(cfg.UP_3D_ROOT, out_path, 'lsp_test') if args.gt_iuv: device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') smpl = SMPL().to(device) uv_type = args.uv_type if uv_type == 'SMPL': data = objfile.read_obj_full( 'data/uv_sampler/smpl_fbx_template.obj') elif uv_type == 'BF': data = objfile.read_obj_full( 'data/uv_sampler/smpl_boundry_free_template.obj') vt = np.array(data['texcoords']) face = [f[0] for f in data['faces']] face = np.array(face) - 1 vt_face = [f[2] for f in data['faces']] vt_face = np.array(vt_face) - 1 renderer = UVRenderer(faces=face,
def run_evaluation(model, dataset_name, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50, bVerbose= True): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # # Transfer model to the GPU # model.to(device) # Load SMPL model global g_smpl_neutral, g_smpl_male, g_smpl_female if g_smpl_neutral is None: g_smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device) g_smpl_male = SMPL(config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) g_smpl_female = SMPL(config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) smpl_neutral = g_smpl_neutral smpl_male = g_smpl_male smpl_female = g_smpl_female else: smpl_neutral = g_smpl_neutral smpl_male = g_smpl_male smpl_female = g_smpl_female # renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle=False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes # mpjpe = np.zeros(len(dataset)) # recon_err = np.zeros(len(dataset)) quant_mpjpe = {}#np.zeros(len(dataset)) quant_recon_err = {}#np.zeros(len(dataset)) mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2,1)) fp = np.zeros((2,1)) fn = np.zeros((2,1)) parts_tp = np.zeros((7,1)) parts_fp = np.zeros((7,1)) parts_fn = np.zeros((7,1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 # Store SMPL parameters output_pred_pose = np.zeros((len(dataset), 72)) output_pred_betas = np.zeros((len(dataset), 10)) output_pred_camera = np.zeros((len(dataset), 3)) output_pred_joints = np.zeros((len(dataset), 14, 3)) output_gt_pose = np.zeros((len(dataset), 72)) output_gt_betas = np.zeros((len(dataset), 10)) output_gt_joints = np.zeros((len(dataset), 14, 3)) output_error_MPJPE = np.zeros((len(dataset))) output_error_recon = np.zeros((len(dataset))) output_imgNames =[] output_cropScale = np.zeros((len(dataset))) output_cropCenter = np.zeros((len(dataset), 2)) outputStartPointer = 0 eval_pose = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp': eval_pose = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = config.DATASET_FOLDERS['upi-s1h'] joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14 joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14 # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch imgName = batch['imgname'][0] seqName = os.path.basename ( os.path.dirname(imgName) ) gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices images = batch['img'].to(device) gender = batch['gender'].to(device) curr_batch_size = images.shape[0] with torch.no_grad(): pred_rotmat, pred_betas, pred_camera = model(images) pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1] # For 3DPW get the 14 common joints from the rendered shape else: gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0],:].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis if False: from renderer import viewer2D from renderer import glViewer import humanModelViewer batchNum = gt_pose.shape[0] for i in range(batchNum): smpl_face = humanModelViewer.GetSMPLFace() meshes_gt = {'ver': gt_vertices[i].cpu().numpy()*100, 'f': smpl_face} meshes_pred = {'ver': pred_vertices[i].cpu().numpy()*100, 'f': smpl_face} glViewer.setMeshData([meshes_gt, meshes_pred], bComputeNormal= True) glViewer.show(5) # Get 14 predicted joints from the mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) # if save_results: # pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy() pred_pelvis = pred_keypoints_3d[:, [0],:].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis #Visualize GT mesh and SPIN output mesh if False: from renderer import viewer2D from renderer import glViewer import humanModelViewer gt_keypoints_3d_vis = gt_keypoints_3d.cpu().numpy() gt_keypoints_3d_vis = np.reshape(gt_keypoints_3d_vis, (gt_keypoints_3d_vis.shape[0],-1)) #N,14x3 gt_keypoints_3d_vis = np.swapaxes(gt_keypoints_3d_vis, 0,1) *100 pred_keypoints_3d_vis = pred_keypoints_3d.cpu().numpy() pred_keypoints_3d_vis = np.reshape(pred_keypoints_3d_vis, (pred_keypoints_3d_vis.shape[0],-1)) #N,14x3 pred_keypoints_3d_vis = np.swapaxes(pred_keypoints_3d_vis, 0,1) *100 # output_sample = output_sample[ : , np.newaxis]*0.1 # gt_sample = gt_sample[: , np.newaxis]*0.1 # (skelNum, dim, frames) glViewer.setSkeleton( [gt_keypoints_3d_vis, pred_keypoints_3d_vis] ,jointType='smplcoco')#(skelNum, dim, frames) glViewer.show() # Absolute error (MPJPE) error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() # mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) # recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error for ii, p in enumerate(batch['imgname'][:len(r_error)]): seqName = os.path.basename( os.path.dirname(p)) # quant_mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error if seqName not in quant_mpjpe.keys(): quant_mpjpe[seqName] = [] quant_recon_err[seqName] = [] quant_mpjpe[seqName].append(error[ii]) quant_recon_err[seqName].append(r_error[ii]) # Reconstuction_error # quant_recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error list_mpjpe = np.hstack([ quant_mpjpe[k] for k in quant_mpjpe]) list_reconError = np.hstack([ quant_recon_err[k] for k in quant_recon_err]) if bVerbose: print(">>> {} : MPJPE {:.02f} mm, error: {:.02f} mm | Total MPJPE {:.02f} mm, error {:.02f} mm".format(seqName, np.mean(error)*1000, np.mean(r_error)*1000, np.hstack(list_mpjpe).mean()*1000, np.hstack(list_reconError).mean()*1000) ) # print("MPJPE {}, error: {}".format(np.mean(error)*100, np.mean(r_error)*100)) # If mask or part evaluation, render the mask and part images # if eval_masks or eval_parts: # mask, parts = renderer(pred_vertices, pred_camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if bVerbose: if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() if save_results: rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1) rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1) pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72) output_pred_pose[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_pose.cpu().numpy() output_pred_betas[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_betas.cpu().numpy() output_pred_camera[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_camera.cpu().numpy() output_pred_pose[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_pose.cpu().numpy() output_pred_betas[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_betas.cpu().numpy() output_pred_camera[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_camera.cpu().numpy() output_pred_joints[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_keypoints_3d.cpu().numpy() output_gt_pose[outputStartPointer:outputStartPointer+curr_batch_size, :] = gt_pose.cpu().numpy() output_gt_betas[outputStartPointer:outputStartPointer+curr_batch_size, :] = gt_betas.cpu().numpy() output_gt_joints[outputStartPointer:outputStartPointer+curr_batch_size, :] = gt_keypoints_3d.cpu().numpy() output_error_MPJPE[outputStartPointer:outputStartPointer+curr_batch_size,] = error *1000 output_error_recon[outputStartPointer:outputStartPointer+curr_batch_size] = r_error*1000 output_cropScale[outputStartPointer:outputStartPointer+curr_batch_size] = batch['scale'].cpu().numpy() output_cropCenter[outputStartPointer:outputStartPointer+curr_batch_size, :] = batch['center'].cpu().numpy() output_imgNames +=batch['imgname'] outputStartPointer +=curr_batch_size # if outputStartPointer>100: #Debug # break # if len(output_imgNames) < output_pred_pose.shape[0]: output ={} finalLen = len(output_imgNames) output['imageNames'] = output_imgNames output['pred_pose'] = output_pred_pose[:finalLen] output['pred_betas'] = output_pred_betas[:finalLen] output['pred_camera'] = output_pred_camera[:finalLen] output['pred_joints'] = output_pred_joints[:finalLen] output['gt_pose'] = output_gt_pose[:finalLen] output['gt_betas'] = output_gt_betas[:finalLen] output['gt_joints'] = output_gt_joints[:finalLen] output['error_MPJPE'] = output_error_MPJPE[:finalLen] output['error_recon'] = output_error_recon[:finalLen] output['cropScale'] = output_cropScale[:finalLen] output['cropCenter'] = output_cropCenter[:finalLen] # Save reconstructions to a file for further processing if save_results: import pickle # np.savez(result_file, pred_joints=pred_joints, pred_pose=pred_pose, pred_betas=pred_betas, pred_camera=pred_camera) with open(result_file,'wb') as f: pickle.dump(output, f) f.close() print("Saved to:{}".format(result_file)) # Print final results during evaluation if bVerbose: print('*** Final Results ***') print() if eval_pose: # if bVerbose: # print('MPJPE: ' + str(1000 * mpjpe.mean())) # print('Reconstruction Error: ' + str(1000 * recon_err.mean())) # print() list_mpjpe = np.hstack([ quant_mpjpe[k] for k in quant_mpjpe]) list_reconError = np.hstack([ quant_recon_err[k] for k in quant_recon_err]) output_str ='SeqNames; ' for seq in quant_mpjpe: output_str += seq + ';' output_str +='\n MPJPE; ' quant_mpjpe_avg_mm = np.hstack(list_mpjpe).mean()*1000 output_str += "Avg {:.02f} mm; ".format( quant_mpjpe_avg_mm) for seq in quant_mpjpe: output_str += '{:.02f}; '.format(1000 * np.hstack(quant_mpjpe[seq]).mean()) output_str +='\n Recon Error; ' quant_recon_error_avg_mm = np.hstack(list_reconError).mean()*1000 output_str +="Avg {:.02f}mm; ".format( quant_recon_error_avg_mm ) for seq in quant_recon_err: output_str += '{:.02f}; '.format(1000 * np.hstack(quant_recon_err[seq]).mean()) if bVerbose: print(output_str) else: print(">>> Test on 3DPW: MPJPE: {} | quant_recon_error_avg_mm: {}".format(quant_mpjpe_avg_mm, quant_recon_error_avg_mm) ) return quant_mpjpe_avg_mm, quant_recon_error_avg_mm if bVerbose: if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean()) print() return -1 #Should return something
def run_evaluation(model, dataset): """Run evaluation on the datasets and metrics we report in the paper. """ shuffle = args.shuffle log_freq = args.log_freq batch_size = args.batch_size dataset_name = args.dataset result_file = args.result_file num_workers = args.num_workers device = torch.device('cuda') if torch.cuda.is_available() \ else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl_male = SMPL(path_config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl_female = SMPL(path_config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load( path_config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle = False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) pve = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) shape_err_smpl = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2, 1)) fp = np.zeros((2, 1)) fn = np.zeros((2, 1)) parts_tp = np.zeros((7, 1)) parts_fp = np.zeros((7, 1)) parts_fn = np.zeros((7, 1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) action_idxes = {} idx_counter = 0 # for each action act_PVE = {} act_MPJPE = {} act_paMPJPE = {} eval_pose = False eval_masks = False eval_parts = False # Choose appropriate evaluation for each dataset if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == 'h36m-p2-mosh' \ or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp' or dataset_name == '3doh50k': eval_pose = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = path_config.DATASET_FOLDERS['upi-s1h'] joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14 joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14 # Iterate over the entire dataset cnt = 0 results_dict = {'id': [], 'pred': [], 'pred_pa': [], 'gt': []} for step, batch in enumerate( tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_smpl_out = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]) gt_vertices_nt = gt_smpl_out.vertices images = batch['img'].to(device) gender = batch['gender'].to(device) curr_batch_size = images.shape[0] if save_results: s_id = np.array( [int(item.split('/')[-3][-1]) for item in batch['imgname']]) * 10000 s_id += np.array( [int(item.split('/')[-1][4:-4]) for item in batch['imgname']]) results_dict['id'].append(s_id) if dataset_name == 'h36m-p2': action = [ im_path.split('/')[-1].split('.')[0].split('_')[1] for im_path in batch['imgname'] ] for act_i in range(len(action)): if action[act_i] in action_idxes: action_idxes[action[act_i]].append(idx_counter + act_i) else: action_idxes[action[act_i]] = [idx_counter + act_i] idx_counter += len(action) with torch.no_grad(): if args.regressor == 'hmr': pred_rotmat, pred_betas, pred_camera = model(images) # torch.Size([32, 24, 3, 3]) torch.Size([32, 10]) torch.Size([32, 3]) elif args.regressor == 'pymaf_net': preds_dict, _ = model(images) pred_rotmat = preds_dict['smpl_out'][-1]['rotmat'].contiguous( ).view(-1, 24, 3, 3) pred_betas = preds_dict['smpl_out'][-1][ 'theta'][:, 3:13].contiguous() pred_camera = preds_dict['smpl_out'][-1][ 'theta'][:, :3].contiguous() pred_output = smpl_neutral( betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) pred_vertices = pred_output.vertices if save_results: rot_pad = torch.tensor([0, 0, 1], dtype=torch.float32, device=device).view(1, 3, 1) rotmat = torch.cat((pred_rotmat.view( -1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1) pred_pose = tgm.rotation_matrix_to_angle_axis( rotmat).contiguous().view(-1, 72) smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy() smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :] = pred_betas.cpu().numpy() smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :] = pred_camera.cpu().numpy() # 3D pose evaluation if eval_pose: # Regressor broadcasting J_regressor_batch = J_regressor[None, :].expand( pred_vertices.shape[0], -1, -1).to(device) # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name or '3doh50k' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1] # For 3DPW get the 14 common joints from the rendered shape else: gt_vertices = smpl_male(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices gt_vertices_female = smpl_female(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :] gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices) gt_pelvis = gt_keypoints_3d[:, [0], :].clone() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :] gt_keypoints_3d = gt_keypoints_3d - gt_pelvis if '3dpw' in dataset_name: per_vertex_error = torch.sqrt( ((pred_vertices - gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() else: per_vertex_error = torch.sqrt( ((pred_vertices - gt_vertices_nt)**2).sum(dim=-1)).mean( dim=-1).cpu().numpy() pve[step * batch_size:step * batch_size + curr_batch_size] = per_vertex_error # Get 14 predicted joints from the mesh pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices) if save_results: pred_joints[ step * batch_size:step * batch_size + curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy() pred_pelvis = pred_keypoints_3d[:, [0], :].clone() pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :] pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt( ((pred_keypoints_3d - gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error, pred_keypoints_3d_pa = reconstruction_error( pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error if save_results: results_dict['gt'].append(gt_keypoints_3d.cpu().numpy()) results_dict['pred'].append(pred_keypoints_3d.cpu().numpy()) results_dict['pred_pa'].append(pred_keypoints_3d_pa) if args.vis_demo: imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']] if args.regressor == 'hmr': iuv_pred = None images_vis = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape( 1, 3, 1, 1) images_vis = images_vis + torch.tensor( [0.485, 0.456, 0.406], device=images.device).reshape( 1, 3, 1, 1) vis_smpl_iuv( images_vis.cpu().numpy(), pred_camera.cpu().numpy(), pred_output.vertices.cpu().numpy(), smpl_neutral.faces, iuv_pred, 100 * per_vertex_error, imgnames, os.path.join('./notebooks/output/demo_results', dataset_name, args.checkpoint.split('/')[-3]), args) # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, pred_camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread( os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread( os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera) for k in results_dict.keys(): results_dict[k] = np.concatenate(results_dict[k]) print(k, results_dict[k].shape) scipy.io.savemat(result_file + '.mat', results_dict) # Print final results during evaluation print('*** Final Results ***') try: print(os.path.split(args.checkpoint)[-3:], args.dataset) except: pass if eval_pose: print('PVE: ' + str(1000 * pve.mean())) print('MPJPE: ' + str(1000 * mpjpe.mean())) print('Reconstruction Error: ' + str(1000 * recon_err.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() if dataset_name == 'h36m-p2': print( 'Note: PVE is not available for h36m-p2. To evaluate PVE, use h36m-p2-mosh instead.' ) for act in action_idxes: act_idx = action_idxes[act] act_pve = [pve[i] for i in act_idx] act_errors = [mpjpe[i] for i in act_idx] act_errors_pa = [recon_err[i] for i in act_idx] act_errors_mean = np.mean(np.array(act_errors)) * 1000. act_errors_pa_mean = np.mean(np.array(act_errors_pa)) * 1000. act_pve_mean = np.mean(np.array(act_pve)) * 1000. act_MPJPE[act] = act_errors_mean act_paMPJPE[act] = act_errors_pa_mean act_PVE[act] = act_pve_mean act_err_info = ['action err'] act_row = [str(act_paMPJPE[act]) for act in action_idxes] + [act for act in action_idxes] act_err_info.extend(act_row) print(act_err_info) else: act_row = None
def init_fn(self): if self.options.rank == 0: self.summary_writer.add_text('command_args', print_args()) if self.options.regressor == 'hmr': # HMR/SPIN model self.model = hmr(path_config.SMPL_MEAN_PARAMS, pretrained=True) self.smpl = SMPL(path_config.SMPL_MODEL_DIR, batch_size=cfg.TRAIN.BATCH_SIZE, create_transl=False).to(self.device) elif self.options.regressor == 'pymaf_net': # PyMAF model self.model = pymaf_net(path_config.SMPL_MEAN_PARAMS, pretrained=True) self.smpl = self.model.regressor[0].smpl if self.options.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if self.options.gpu is not None: torch.cuda.set_device(self.options.gpu) self.model.cuda(self.options.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have self.options.batch_size = int(self.options.batch_size / self.options.ngpus_per_node) self.options.workers = int( (self.options.workers + self.options.ngpus_per_node - 1) / self.options.ngpus_per_node) self.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm( self.model) self.model = torch.nn.parallel.DistributedDataParallel( self.model, device_ids=[self.options.gpu], output_device=self.options.gpu, find_unused_parameters=True) else: self.model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set self.model = torch.nn.parallel.DistributedDataParallel( self.model, find_unused_parameters=True) self.models_dict = {'model': self.model.module} else: self.model = self.model.to(self.device) self.models_dict = {'model': self.model} cudnn.benchmark = True # Per-vertex loss on the shape self.criterion_shape = nn.L1Loss().to(self.device) # Keypoint (2D and 3D) loss # No reduction because confidence weighting needs to be applied self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device) # Loss for SMPL parameter regression self.criterion_regr = nn.MSELoss().to(self.device) self.focal_length = constants.FOCAL_LENGTH if self.options.pretrained_checkpoint is not None: self.load_pretrained( checkpoint_file=self.options.pretrained_checkpoint) self.optimizer = torch.optim.Adam(params=self.model.parameters(), lr=cfg.SOLVER.BASE_LR, weight_decay=0) self.optimizers_dict = {'optimizer': self.optimizer} if self.options.single_dataset: self.train_ds = BaseDataset(self.options, self.options.single_dataname, is_train=True) else: self.train_ds = MixedDataset(self.options, is_train=True) self.valid_ds = BaseDataset(self.options, self.options.eval_dataset, is_train=False) if self.options.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( self.train_ds) val_sampler = None else: train_sampler = None val_sampler = None self.train_data_loader = DataLoader(self.train_ds, batch_size=self.options.batch_size, num_workers=self.options.workers, pin_memory=cfg.TRAIN.PIN_MEMORY, shuffle=(train_sampler is None), sampler=train_sampler) self.valid_loader = DataLoader(dataset=self.valid_ds, batch_size=cfg.TEST.BATCH_SIZE, shuffle=False, num_workers=cfg.TRAIN.NUM_WORKERS, pin_memory=cfg.TRAIN.PIN_MEMORY, sampler=val_sampler) # Load dictionary of fits self.fits_dict = FitsDict(self.options, self.train_ds) self.evaluation_accumulators = dict.fromkeys([ 'pred_j3d', 'target_j3d', 'target_theta', 'pred_verts', 'target_verts' ]) # Create renderer try: self.renderer = OpenDRenderer() except: print('No renderer for visualization.') self.renderer = None if cfg.MODEL.PyMAF.AUX_SUPV_ON: self.iuv_maker = IUV_Renderer( output_size=cfg.MODEL.PyMAF.DP_HEATMAP_SIZE) self.decay_steps_ind = 1 self.decay_epochs_ind = 1
def run_evaluation(model, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, options=None): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR, create_transl=False).to(device) save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle = False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) num_joints = 17 num_samples = len(dataset) print('dataset length: {}'.format(num_samples)) all_preds = np.zeros((num_samples, num_joints, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for step, batch in enumerate( tqdm(data_loader, desc='Eval', total=len(data_loader))): images = batch['img'].to(device) scale = batch['scale'].numpy() center = batch['center'].numpy() num_images = images.size(0) gt_keypoints_2d = batch['keypoints'] # 2D keypoints # De-normalize 2D keypoints from [-1,1] to pixel space gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * img_res * ( gt_keypoints_2d_orig[:, :, :-1] + 1) if options.regressor == 'hmr': pred_rotmat, pred_betas, pred_camera = model(images) elif options.regressor == 'danet': danet_pred_dict = model.infer_net(images) para_pred = danet_pred_dict['para'] pred_camera = para_pred[:, 0:3].contiguous() pred_betas = para_pred[:, 3:13].contiguous() pred_rotmat = para_pred[:, 13:].contiguous().view(-1, 24, 3, 3) pred_output = smpl_neutral( betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) # pred_vertices = pred_output.vertices pred_J24 = pred_output.joints[:, -24:] pred_JCOCO = pred_J24[:, constants.J24_TO_JCOCO] # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size # This camera translation can be used in a full perspective projection pred_cam_t = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * constants.FOCAL_LENGTH / (img_res * pred_camera[:, 0] + 1e-9) ], dim=-1) camera_center = torch.zeros(len(pred_JCOCO), 2, device=pred_camera.device) pred_keypoints_2d = perspective_projection( pred_JCOCO, rotation=torch.eye( 3, device=pred_camera.device).unsqueeze(0).expand( len(pred_JCOCO), -1, -1), translation=pred_cam_t, focal_length=constants.FOCAL_LENGTH, camera_center=camera_center) coords = pred_keypoints_2d + (img_res / 2.) coords = coords.cpu().numpy() # Normalize keypoints to [-1,1] # pred_keypoints_2d = pred_keypoints_2d / (img_res / 2.) gt_keypoints_coco = gt_keypoints_2d_orig[:, -24:][:, constants. J24_TO_JCOCO] preds = coords.copy() scale_ = np.array([scale, scale]).transpose() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale_[i], [img_res, img_res]) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = 1. # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = center[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = scale_[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(scale_ * 200, 1) all_boxes[idx:idx + num_images, 5] = 1. image_path.extend(batch['imgname']) idx += num_images ckp_name = options.regressor name_values, perf_indicator = dataset.evaluate(all_preds, options.output_dir, all_boxes, image_path, ckp_name, filenames, imgnums) model_name = options.regressor if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera)
def run_evaluation(model, opt, options, dataset_name, log_freq=50): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Create SMPL model smpl = SMPL().to(device) if dataset_name == '3dpw' or dataset_name == 'surreal': smpl_male = SMPL(cfg.MALE_SMPL_FILE).to(device) smpl_female = SMPL(cfg.FEMALE_SMPL_FILE).to(device) batch_size = opt.batch_size # Create dataloader for the dataset if dataset_name == 'surreal': dataset = SurrealDataset(options, use_augmentation=False, is_train=False, use_IUV=False) else: dataset = BaseDataset(options, dataset_name, use_augmentation=False, is_train=False, use_IUV=False) data_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.num_workers), pin_memory=True) print('data loader finish') # Transfer model to the GPU device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.eval() # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) mpjpe_pa = np.zeros(len(dataset)) # Shape metrics # Mean per-vertex error shape_err = np.zeros(len(dataset)) # Mask and part metrics # Accuracy accuracy = 0. parts_accuracy = 0. # True positive, false positive and false negative tp = np.zeros((2, 1)) fp = np.zeros((2, 1)) fn = np.zeros((2, 1)) parts_tp = np.zeros((7, 1)) parts_fp = np.zeros((7, 1)) parts_fn = np.zeros((7, 1)) # Pixel count accumulators pixel_count = 0 parts_pixel_count = 0 eval_pose = False eval_shape = False eval_masks = False eval_parts = False joint_mapper = cfg.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else cfg.J24_TO_J14 # Choose appropriate evaluation for each dataset if 'h36m' in dataset_name or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp': eval_pose = True elif dataset_name in ['up-3d', 'surreal']: eval_shape = True elif dataset_name == 'lsp': eval_masks = True eval_parts = True annot_path = cfg.DATASET_FOLDERS['upi-s1h'] if eval_parts or eval_masks: from utils.part_utils import PartRenderer renderer = PartRenderer() # Iterate over the entire dataset for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): # Get ground truth annotations from the batch gt_pose = batch['pose'].to(device) gt_betas = batch['betas'].to(device) gt_vertices = smpl(gt_pose, gt_betas) images = batch['img'].to(device) curr_batch_size = images.shape[0] # Run inference with torch.no_grad(): out_dict = model(images) pred_vertices = out_dict['pred_vertices'] camera = out_dict['camera'] # 3D pose evaluation if eval_pose: # Get 14 ground truth joints if 'h36m' in dataset_name or 'mpi-inf' in dataset_name: gt_keypoints_3d = batch['pose_3d'].cuda() gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper, :-1] gt_pelvis = (gt_keypoints_3d[:, [2]] + gt_keypoints_3d[:, [3]]) / 2 gt_keypoints_3d = gt_keypoints_3d - gt_pelvis else: gender = batch['gender'].to(device) gt_vertices = smpl_male(gt_pose, gt_betas) gt_vertices_female = smpl_female(gt_pose, gt_betas) gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :] gt_keypoints_3d = smpl.get_train_joints(gt_vertices)[:, joint_mapper] # gt_keypoints_3d = smpl.get_lsp_joints(gt_vertices) # joints_regressor used in cmr gt_pelvis = (gt_keypoints_3d[:, [2]] + gt_keypoints_3d[:, [3]]) / 2 gt_keypoints_3d = gt_keypoints_3d - gt_pelvis # Get 14 predicted joints from the non-parametic mesh pred_keypoints_3d = smpl.get_train_joints(pred_vertices)[:, joint_mapper] # pred_keypoints_3d = smpl.get_lsp_joints(pred_vertices) # joints_regressor used in cmr pred_pelvis = (pred_keypoints_3d[:, [2]] + pred_keypoints_3d[:, [3]]) / 2 pred_keypoints_3d = pred_keypoints_3d - pred_pelvis # Absolute error (MPJPE) error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error # Reconstuction_error r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None) mpjpe_pa[step * batch_size:step * batch_size + curr_batch_size] = r_error # Shape evaluation (Mean per-vertex error) if eval_shape: if dataset_name == 'surreal': gender = batch['gender'].to(device) gt_vertices = smpl_male(gt_pose, gt_betas) gt_vertices_female = smpl_female(gt_pose, gt_betas) gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :] gt_pelvis_mesh = smpl.get_eval_joints(gt_vertices) pred_pelvis_mesh = smpl.get_eval_joints(pred_vertices) gt_pelvis_mesh = (gt_pelvis_mesh[:, [2]] + gt_pelvis_mesh[:, [3]]) / 2 pred_pelvis_mesh = (pred_pelvis_mesh[:, [2]] + pred_pelvis_mesh[:, [3]]) / 2 # se = torch.sqrt(((pred_vertices - gt_vertices) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() se = torch.sqrt(((pred_vertices - pred_pelvis_mesh - gt_vertices + gt_pelvis_mesh) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy() shape_err[step * batch_size:step * batch_size + curr_batch_size] = se # If mask or part evaluation, render the mask and part images if eval_masks or eval_parts: mask, parts = renderer(pred_vertices, camera) # Mask evaluation (for LSP) if eval_masks: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() # Dimensions of original image orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): # After rendering, convert imate back to original resolution pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0 # Load gt mask gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0 # Evaluation consistent with the original UP-3D code accuracy += (gt_mask == pred_mask).sum() pixel_count += np.prod(np.array(gt_mask.shape)) for c in range(2): cgt = gt_mask == c cpred = pred_mask == c tp[c] += (cgt & cpred).sum() fp[c] += (~cgt & cpred).sum() fn[c] += (cgt & ~cpred).sum() f1 = 2 * tp / (2 * tp + fp + fn) # Part evaluation (for LSP) if eval_parts: center = batch['center'].cpu().numpy() scale = batch['scale'].cpu().numpy() orig_shape = batch['orig_shape'].cpu().numpy() for i in range(curr_batch_size): pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i]) # Load gt part segmentation gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0) # Evaluation consistent with the original UP-3D code # 6 parts + background for c in range(7): cgt = gt_parts == c cpred = pred_parts == c cpred[gt_parts == 255] = 0 parts_tp[c] += (cgt & cpred).sum() parts_fp[c] += (~cgt & cpred).sum() parts_fn[c] += (cgt & ~cpred).sum() gt_parts[gt_parts == 255] = 0 pred_parts[pred_parts == 255] = 0 parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn) parts_accuracy += (gt_parts == pred_parts).sum() parts_pixel_count += np.prod(np.array(gt_parts.shape)) # Print intermediate results during evaluation if step % log_freq == log_freq - 1: if eval_pose: print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean())) print('MPJPE-PA: ' + str(1000 * mpjpe_pa[:step * batch_size].mean())) print() if eval_shape: print('Shape Error: ' + str(1000 * shape_err[:step * batch_size].mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() # Print final results during evaluation print('*** Final Results ***') print() if eval_pose: print('MPJPE: ' + str(1000 * mpjpe.mean())) print('MPJPE-PA: ' + str(1000 * mpjpe_pa.mean())) print() if eval_shape: print('Shape Error: ' + str(1000 * shape_err.mean())) print() if eval_masks: print('Accuracy: ', accuracy / pixel_count) print('F1: ', f1.mean()) print() if eval_parts: print('Parts Accuracy: ', parts_accuracy / parts_pixel_count) print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()) print() # Save final results to .txt file txt_name = join(opt.save_root, dataset_name + '.txt') f = open(txt_name, 'w') f.write('*** Final Results ***') f.write('\n') if eval_pose: f.write('MPJPE: ' + str(1000 * mpjpe.mean())) f.write('\n') f.write('MPJPE-PA: ' + str(1000 * mpjpe_pa.mean())) f.write('\n') if eval_shape: f.write('Shape Error: ' + str(1000 * shape_err.mean())) f.write('\n') if eval_masks: f.write('Accuracy: ' + str(accuracy / pixel_count)) f.write('\n') f.write('F1: ' + str(f1.mean())) f.write('\n') if eval_parts: f.write('Parts Accuracy: ' + str(parts_accuracy / parts_pixel_count)) f.write('\n') f.write('Parts F1 (BG): ' + str(parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean())) f.write('\n')
def run_evaluation(model, args, dataset, mesh): """Run evaluation on the datasets and metrics we report in the paper. """ # Create SMPL model smpl = SMPL().cuda() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(cfg.JOINT_REGRESSOR_H36M)).float() # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) # Transfer model to the GPU device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model.to(device) model.eval() # predictions all_kps = {} # Iterate over the entire dataset for step, batch in enumerate( tqdm(data_loader, desc='Eval', total=args.num_imgs)): # Get ground truth annotations from the batch images = batch['img'].to(device) curr_batch_size = images.shape[0] # Run inference with torch.no_grad(): pred_vertices, pred_vertices_smpl, camera, pred_rotmat, pred_betas = model( images) pred_keypoints_3d_smpl = smpl.get_joints(pred_vertices_smpl) pred_keypoints_2d_smpl = orthographic_projection( pred_keypoints_3d_smpl, camera.detach())[:, :, :2].cpu().data.numpy() eval_part = np.zeros((1, 19, 2)) # we use custom keypoints for evaluation: MPII + COCO face joints # see paper / supplementary for details eval_part[0, :14, :] = pred_keypoints_2d_smpl[0][:14] eval_part[0, 14:, :] = pred_keypoints_2d_smpl[0][19:] all_kps[step] = eval_part if args.write_imgs == 'True': renderer = Renderer(faces=smpl.faces.cpu().numpy()) write_imgs(batch, pred_keypoints_2d_smpl, pred_vertices_smpl, camera, args, renderer) if args.eval_pck == 'True': gt_kp_path = os.path.join(cfg.BASE_DATA_DIR, args.dataset, args.crop_setting, 'keypoints.pkl') log_dir = os.path.join(cfg.BASE_DATA_DIR, 'cmr_pck_results.txt') with open(gt_kp_path, 'rb') as f: gt = pkl.load(f) calc = CalcPCK( all_kps, gt, num_imgs=cfg.DATASET_SIZES[args.dataset][args.crop_setting], log_dir=log_dir, dataset=args.dataset, crop_setting=args.crop_setting, pck_eval_threshold=args.pck_eval_threshold) calc.eval()