Ejemplo n.º 1
0
    def init_fn(self):
        self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True)
        self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device)      # feature extraction model
        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                            lr = self.options.lr,
                                            weight_decay=0)
        self.smpl = SMPL(config.SMPL_MODEL_DIR,
                         batch_size = 16,
                         create_transl=False).to(self.device)
        # per vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # keypoints loss including 2D and 3D
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # SMPL parameters loss if we have
        self.criterion_regr = nn.MSELoss().to(self.device)

        self.models_dict = {'model':self.model}
        self.optimizers_dict = {'optimizer':self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH
        # initialize MVSMPLify
        self.mvsmplify = MVSMPLify(step_size=1e-2, batch_size=16, num_iters=100,focal_length=self.focal_length)
        print(self.options.pretrained_checkpoint)
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(checkpoint_file = self.options.pretrained_checkpoint)
        #load dictionary of fits
        self.fits_dict = FitsDict(self.options, self.train_ds)
        # create renderer
        self.renderer = Renderer(focal_length=self.focal_length, img_res = 224, faces=self.smpl.faces)
Ejemplo n.º 2
0
    def init_fn(self):
        self.train_ds = MixedDataset(self.options, ignore_3d=self.options.ignore_3d, is_train=True)

        self.model = hmr(config.SMPL_MEAN_PARAMS, pretrained=True).to(self.device)
        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=self.options.lr,
                                          weight_decay=0)
        self.smpl = SMPL(config.SMPL_MODEL_DIR,
                         batch_size=self.options.batch_size,
                         create_transl=False).to(self.device)
        # Per-vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # Keypoint (2D and 3D) loss
        # No reduction because confidence weighting needs to be applied
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # Loss for SMPL parameter regression
        self.criterion_regr = nn.MSELoss().to(self.device)
        self.models_dict = {'model': self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH
        self.conf_thresh = self.options.conf_thresh

        # Initialize SMPLify fitting module
        self.smplify = SMPLify(step_size=1e-2, batch_size=self.options.batch_size, num_iters=self.options.num_smplify_iters, focal_length=self.focal_length, prior_mul=0.1, conf_thresh=self.conf_thresh)
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(checkpoint_file=self.options.pretrained_checkpoint)

        # Load dictionary of fits
        self.fits_dict = FitsDict(self.options, self.train_ds)

        # Create renderer
        self.renderer = Renderer(focal_length=self.focal_length, img_res=self.options.img_res, faces=self.smpl.faces)
Ejemplo n.º 3
0
    def init_fn(self):
        # create training dataset
        self.train_ds = create_dataset(self.options.dataset, self.options)

        # create Mesh object
        self.mesh = Mesh()
        self.faces = self.mesh.faces.to(self.device)

        # create GraphCNN
        self.graph_cnn = GraphCNN(self.mesh.adjmat,
                                  self.mesh.ref_vertices.t(),
                                  num_channels=self.options.num_channels,
                                  num_layers=self.options.num_layers).to(
                                      self.device)

        # SMPL Parameter regressor
        self.smpl_param_regressor = SMPLParamRegressor().to(self.device)

        # Setup a joint optimizer for the 2 models
        self.optimizer = torch.optim.Adam(
            params=list(self.graph_cnn.parameters()) +
            list(self.smpl_param_regressor.parameters()),
            lr=self.options.lr,
            betas=(self.options.adam_beta1, 0.999),
            weight_decay=self.options.wd)

        # SMPL model
        self.smpl = SMPL().to(self.device)

        # Create loss functions
        self.criterion_shape = nn.L1Loss().to(self.device)
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        self.criterion_regr = nn.MSELoss().to(self.device)

        # Pack models and optimizers in a dict - necessary for checkpointing
        self.models_dict = {
            'graph_cnn': self.graph_cnn,
            'smpl_param_regressor': self.smpl_param_regressor
        }
        self.optimizers_dict = {'optimizer': self.optimizer}

        # Renderer for visualization
        self.renderer = Renderer(faces=self.smpl.faces.cpu().numpy())

        # LSP indices from full list of keypoints
        self.to_lsp = list(range(14))

        # Optionally start training from a pretrained checkpoint
        # Note that this is different from resuming training
        # For the latter use --resume
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)
Ejemplo n.º 4
0
def bbox_from_json(bbox_file):
    """Get center and scale of bounding box from bounding box annotations.
    The expected format is [top_left(x), top_left(y), width, height].
    """
    with open(bbox_file, 'r') as f:
        bbox = np.array(json.load(f)['bbox']).astype(np.float32)
    ul_corner = bbox[:2]
    center = ul_corner + 0.5 * bbox[2:]
    # Load pretrained model
    model = hmr(config.SMPL_MEAN_PARAMS).to(device)
    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)

    # Load SMPL model
    smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=1,
                create_transl=False).to(device)
    model.eval()

    # Setup renderer for visualization
    renderer = Renderer(focal_length=constants.FOCAL_LENGTH,
                        img_res=constants.IMG_RES,
                        faces=smpl.faces)

    # Preprocess input image and generate predictions
    img, norm_img = process_image(args.img,
                                  args.bbox,
                                  args.openpose,
                                  input_res=constants.IMG_RES)
    with torch.no_grad():
        pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device))
        pred_output = smpl(betas=pred_betas,
                           body_pose=pred_rotmat[:, 1:],
                           global_orient=pred_rotmat[:, 0].unsqueeze(1),
                           pose2rot=False)
        pred_vertices = pred_output.vertices

    # Calculate camera parameters for rendering
    camera_translation = torch.stack([
        pred_camera[:, 1], pred_camera[:, 2], 2 * constants.FOCAL_LENGTH /
        (constants.IMG_RES * pred_camera[:, 0] + 1e-9)
    ],
                                     dim=-1)
    camera_translation = camera_translation[0].cpu().numpy()
    pred_vertices = pred_vertices[0].cpu().numpy()
    img = img.permute(1, 2, 0).cpu().numpy()

    width = max(bbox[2], bbox[3])
    scale = width / 200.0
    # make sure the bounding box is rectangular
    return center, scale
Ejemplo n.º 5
0
 def __init__(self, focal_length=5000., render_res=224):
     # Parameters for rendering
     self.focal_length = focal_length
     self.render_res = render_res
     # We use Neural 3D mesh renderer for rendering masks and part segmentations
     self.neural_renderer = nr.Renderer(dist_coeffs=None, orig_size=self.render_res,
                                        image_size=render_res,
                                        light_intensity_ambient=1,
                                        light_intensity_directional=0,
                                        anti_aliasing=False)
     self.faces = SMPL().faces.cuda().int()
     textures = np.load(cfg.VERTEX_TEXTURE_FILE)
     self.textures = torch.from_numpy(textures).cuda().float()
     self.cube_parts = torch.cuda.FloatTensor(np.load(cfg.CUBE_PARTS_FILE))
Ejemplo n.º 6
0
    def init_fn(self):
        self.train_ds = MixedDataset(self.options,
                                     ignore_3d=self.options.ignore_3d,
                                     is_train=True)

        self.model = hmr(config.SMPL_MEAN_PARAMS,
                         pretrained=True).to(self.device)

        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=self.options.lr)
        self.lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
            self.optimizer, gamma=0.95)

        self.smpl = SMPL(config.SMPL_MODEL_DIR,
                         batch_size=self.options.batch_size,
                         create_transl=False).to(self.device)

        # consistency loss
        self.criterion_consistency_contrastive = NTXent(
            tau=self.options.tau, kernel=self.options.kernel).to(self.device)
        self.criterion_consistency_mse = nn.MSELoss().to(self.device)
        # Per-vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # Keypoint (2D and 3D) loss
        # No reduction because confidence weighting needs to be applied
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # Loss for SMPL parameter regression
        self.criterion_regr = nn.MSELoss().to(self.device)
        self.models_dict = {'model': self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
        self.focal_length = constants.FOCAL_LENGTH

        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)

        # Create renderer
        self.renderer = Renderer(focal_length=self.focal_length,
                                 img_res=self.options.img_res,
                                 faces=self.smpl.faces)

        # Create input image flag
        self.input_img = self.options.input_img

        # initialize queue
        self.feat_queue = FeatQueue(max_queue_size=self.options.max_queue_size)
Ejemplo n.º 7
0
    def __init__(self, filename='data/mesh_downsampling.npz',
                 num_downsampling=1, nsize=1, device=torch.device('cuda')):
        self._A, self._U, self._D = get_graph_params(filename=filename, nsize=nsize)
        self._A = [a.to(device) for a in self._A]
        self._U = [u.to(device) for u in self._U]
        self._D = [d.to(device) for d in self._D]
        self.num_downsampling = num_downsampling

        # load template vertices from SMPL and normalize them
        smpl = SMPL()
        ref_vertices = smpl.v_template
        center = 0.5*(ref_vertices.max(dim=0)[0] + ref_vertices.min(dim=0)[0])[None]
        ref_vertices -= center
        ref_vertices /= ref_vertices.abs().max().item()

        self._ref_vertices = ref_vertices.to(device)
        self.faces = smpl.faces.int().to(device)
Ejemplo n.º 8
0
 def __init__(self, mesh, num_layers, num_channels, pretrained_checkpoint=None):
     super(CMR, self).__init__()
     self.graph_cnn = GraphCNN(mesh.adjmat, mesh.ref_vertices.t(),
                               num_layers, num_channels)
     self.smpl_param_regressor = SMPLParamRegressor()
     self.smpl = SMPL()
     self.mesh = mesh
     if pretrained_checkpoint is not None:
         checkpoint = torch.load(pretrained_checkpoint)
         try:
             self.graph_cnn.load_state_dict(checkpoint['graph_cnn'])
         except KeyError:
             print('Warning: graph_cnn was not found in checkpoint')
         try:
             self.smpl_param_regressor.load_state_dict(checkpoint['smpl_param_regressor'])
         except KeyError:
             print('Warning: smpl_param_regressor was not found in checkpoint')
Ejemplo n.º 9
0
def run_evaluation(model, dataset_name, dataset, result_file,
                   batch_size=32, img_res=224, 
                   num_workers=32, shuffle=False, log_freq=50, options=None):
    """Run evaluation on the datasets and metrics we report in the paper. """

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Transfer model to the GPU
    model.to(device)

    # Load SMPL model
    smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR,
                        create_transl=False).to(device)
    smpl_male = SMPL(path_config.SMPL_MODEL_DIR,
                     gender='male',
                     create_transl=False).to(device)
    smpl_female = SMPL(path_config.SMPL_MODEL_DIR,
                       gender='female',
                       create_transl=False).to(device)
    
    renderer = PartRenderer()
    
    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(path_config.JOINT_REGRESSOR_H36M)).float()
    
    save_results = result_file is not None
    # Disable shuffling if you want to save the results
    if save_results:
        shuffle = False
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)

    fits_dict = None

    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros(len(dataset))
    recon_err = np.zeros(len(dataset))
    mpjpe_smpl = np.zeros(len(dataset))
    recon_err_smpl = np.zeros(len(dataset))

    # Store SMPL parameters
    smpl_pose = np.zeros((len(dataset), 72))
    smpl_betas = np.zeros((len(dataset), 10))
    smpl_camera = np.zeros((len(dataset), 3))
    pred_joints = np.zeros((len(dataset), 17, 3))

    # joint_mapper_coco = constants.H36M_TO_JCOCO
    joint_mapper_gt = constants.J24_TO_JCOCO

    focal_length = 5000

    num_joints = 17
    num_samples = len(dataset)
    print('dataset length: {}'.format(num_samples))
    all_preds = np.zeros(
        (num_samples, num_joints, 3),
        dtype=np.float32
    )
    all_boxes = np.zeros((num_samples, 6))
    image_path = []
    filenames = []
    imgnums = []
    idx = 0
    with torch.no_grad():
        for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))):
            if len(options.vis_imname) > 0:
                imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']]
                name_hit = False
                for i_n in imgnames:
                    if options.vis_imname in i_n:
                        name_hit = True
                        print('vis: ' + i_n)
                if not name_hit:
                    continue

            images = batch['img'].to(device)

            scale = batch['scale'].numpy()
            center = batch['center'].numpy()

            num_images = images.size(0)

            gt_keypoints_2d = batch['keypoints']  # 2D keypoints
            # De-normalize 2D keypoints from [-1,1] to pixel space
            gt_keypoints_2d_orig = gt_keypoints_2d.clone()
            gt_keypoints_2d_orig[:, :, :-1] = 0.5 * img_res * (gt_keypoints_2d_orig[:, :, :-1] + 1)

            if options.regressor == 'hmr':
                pred_rotmat, pred_betas, pred_camera = model(images)
                # torch.Size([32, 24, 3, 3]) torch.Size([32, 10]) torch.Size([32, 3])
            elif options.regressor == 'pymaf_net':
                preds_dict, _ = model(images)
                pred_rotmat = preds_dict['smpl_out'][-1]['rotmat'].contiguous().view(-1, 24, 3, 3)
                pred_betas = preds_dict['smpl_out'][-1]['theta'][:, 3:13].contiguous()
                pred_camera = preds_dict['smpl_out'][-1]['theta'][:, :3].contiguous()

            pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:, 1:],
                                        global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False)

            # pred_vertices = pred_output.vertices
            pred_J24 = pred_output.joints[:, -24:]
            pred_JCOCO = pred_J24[:, constants.J24_TO_JCOCO]

            # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size
            # This camera translation can be used in a full perspective projection
            pred_cam_t = torch.stack([pred_camera[:,1],
                                        pred_camera[:,2],
                                        2*constants.FOCAL_LENGTH/(img_res * pred_camera[:, 0] +1e-9)],dim=-1)
            camera_center = torch.zeros(len(pred_JCOCO), 2, device=pred_camera.device)
            pred_keypoints_2d = perspective_projection(pred_JCOCO,
                                                        rotation=torch.eye(3, device=pred_camera.device).unsqueeze(0).expand(len(pred_JCOCO), -1, -1),
                                                        translation=pred_cam_t,
                                                        focal_length=constants.FOCAL_LENGTH,
                                                        camera_center=camera_center)

            coords = pred_keypoints_2d + (img_res / 2.)
            coords = coords.cpu().numpy()

            gt_keypoints_coco = gt_keypoints_2d_orig[:, -24:][:, constants.J24_TO_JCOCO]
            vert_errors_batch = []
            for i, (gt2d, pred2d) in enumerate(zip(gt_keypoints_coco.cpu().numpy(), coords.copy())):
                vert_error = np.sqrt(np.sum((gt2d[:, :2] - pred2d[:, :2]) ** 2, axis=1))
                vert_error *= gt2d[:, 2]
                vert_mean_error = np.sum(vert_error) / np.sum(gt2d[:, 2] > 0)
                vert_errors_batch.append(10 * vert_mean_error)

            if options.vis_demo:
                imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']]

                if options.regressor == 'hmr':
                    iuv_pred = None

                images_vis = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape(1, 3, 1, 1)
                images_vis = images_vis + torch.tensor([0.485, 0.456, 0.406], device=images.device).reshape(1, 3, 1, 1)
                vis_smpl_iuv(images_vis.cpu().numpy(), pred_camera.cpu().numpy(), pred_output.vertices.cpu().numpy(),
                             smpl_neutral.faces, iuv_pred,
                             vert_errors_batch, imgnames, os.path.join('./notebooks/output/demo_results', dataset_name,
                                                                            options.checkpoint.split('/')[-3]), options)

            preds = coords.copy()

            scale_ = np.array([scale, scale]).transpose()

            # Transform back
            for i in range(coords.shape[0]):
                preds[i] = transform_preds(
                    coords[i], center[i], scale_[i], [img_res, img_res]
                )

            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
            all_preds[idx:idx + num_images, :, 2:3] = 1.
            all_boxes[idx:idx + num_images, 5] = 1.
            image_path.extend(batch['imgname'])

            idx += num_images

        if len(options.vis_imname) > 0:
            exit()

        if args.checkpoint is None or 'model_checkpoint.pt' in args.checkpoint:
            ckp_name = 'spin_model'
        else:
            ckp_name = args.checkpoint.split('/')
            ckp_name = ckp_name[2].split('_')[1] + '_' + ckp_name[-1].split('.')[0]
        name_values, perf_indicator = dataset.evaluate(
            cfg, all_preds, options.output_dir, all_boxes, image_path, ckp_name,
            filenames, imgnums
        )

        model_name = options.regressor
        if isinstance(name_values, list):
            for name_value in name_values:
                _print_name_value(name_value, model_name)
        else:
            _print_name_value(name_values, model_name)

    # Save reconstructions to a file for further processing
    if save_results:
        np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera)
Ejemplo n.º 10
0
    def init(self):
        
        # Create training and testing dataset
        self.train_ds = MGNDataset(self.options, split = 'train')
        self.test_ds  = MGNDataset(self.options, split = 'test')
        
        # test data loader is fixed and disable shuffle as it is unnecessary.
        self.test_data_loader = CheckpointDataLoader( self.test_ds,
                    batch_size  = self.options.batch_size,
                    num_workers = self.options.num_workers,
                    pin_memory  = self.options.pin_memory,
                    shuffle     = False)

        # Create SMPL Mesh (graph) object for GCN
        self.mesh = Mesh(self.options, self.options.num_downsampling)
        self.faces = torch.cat( self.options.batch_size * [
                                self.mesh.faces.to(self.device)[None]
                                ], dim = 0 )
        self.faces = self.faces.type(torch.LongTensor).to(self.device)
        
        # Create SMPL blending model and edges
        self.smpl = SMPL(self.options.smpl_model_path, self.device)
        # self.smplEdge = torch.Tensor(np.load(self.options.smpl_edges_path)) \
        #                 .long().to(self.device)
        
        # create SMPL+D blending model
        self.smplD = Smpl( self.options.smpl_model_path ) 
        
        # read SMPL .bj file to get uv coordinates
        _, self.smpl_tri_ind, uv_coord, tri_uv_ind = read_Obj(self.options.smpl_objfile_path)
        uv_coord[:, 1] = 1 - uv_coord[:, 1]
        expUV = uv_coord[tri_uv_ind.flatten()]
        unique, index = np.unique(self.smpl_tri_ind.flatten(), return_index = True)
        self.smpl_verts_uvs = torch.as_tensor(expUV[index,:]).float().to(self.device)
        self.smpl_tri_ind   = torch.as_tensor(self.smpl_tri_ind).to(self.device)
        
        # camera for projection 
        self.perspCam = perspCamera() 

        # mean and std of displacements
        self.dispPara = \
            torch.Tensor(np.load(self.options.MGN_offsMeanStd_path)).to(self.device)
        
        # load average pose and shape and convert to camera coodinate;
        # avg pose is decided by the image id we use for training (0-11) 
        avgPose_objCoord = np.load(self.options.MGN_avgPose_path)
        avgPose_objCoord[:3] = rotationMatrix_to_axisAngle(    # for 0,6, front only
            torch.tensor([[[1,  0, 0],
                           [0, -1, 0],
                           [0,  0,-1]]]))
        self.avgPose = \
            axisAngle_to_Rot6d(
                torch.Tensor(avgPose_objCoord[None]).reshape(-1, 3)
                    ).reshape(1, -1).to(self.device)
        self.avgBeta = \
            torch.Tensor(
                np.load(self.options.MGN_avgBeta_path)[None]).to(self.device)
        self.avgCam  = torch.Tensor([1.2755, 0, 0])[None].to(self.device)    # 1.2755 is for our settings
        
        self.model = frameVIBE(
            self.options.smpl_model_path, 
            self.mesh,                
            self.avgPose,
            self.avgBeta,
            self.avgCam,
            self.options.num_channels,
            self.options.num_layers ,  
            self.smpl_verts_uvs,
            self.smpl_tri_ind
            ).to(self.device)
            
        # Setup a optimizer for models
        self.optimizer = torch.optim.Adam(
            params=list(self.model.parameters()),
            lr=self.options.lr,
            betas=(self.options.adam_beta1, 0.999),
            weight_decay=self.options.wd)
        
        self.criterion = VIBELoss(
            e_loss_weight=50,         # for kp 2d, help to estimate camera, global orientation
            e_3d_loss_weight=1,       # for kp 3d, bvt
            e_pose_loss_weight=10,     # for body pose parameters
            e_shape_loss_weight=1,   # for body shape parameters
            e_disp_loss_weight=1,   # for displacements 
            e_tex_loss_weight=1,       # for uv image 
            d_motion_loss_weight=0
            )
        
        # Pack models and optimizers in a dict - necessary for checkpointing
        self.models_dict = {self.options.model: self.model}
        self.optimizers_dict = {'optimizer': self.optimizer}
Ejemplo n.º 11
0
def run_evaluation(hmr_model, dataset, eval_size, batch_size=32, num_workers=32, log_freq=50):
    """Run evaluation on the datasets and metrics we report in the paper. """
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # focal length
    focal_length = constants.FOCAL_LENGTH

    # Transfer hmr_model to the GPU
    hmr_model.to(device)

    # Load SMPL hmr_model
    smpl_neutral = SMPL(config.SMPL_MODEL_DIR,
                        create_transl=False).to(device)
    smpl_male = SMPL(config.SMPL_MODEL_DIR,
                     gender='male',
                     create_transl=False).to(device)
    smpl_female = SMPL(config.SMPL_MODEL_DIR,
                       gender='female',
                       create_transl=False).to(device)

    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float()

    # Create dataloader for the dataset
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros((len(dataset)))
    recon_err = np.zeros((len(dataset)))

    joint_mapper_h36m = constants.H36M_TO_J14

    # Iterate over the entire dataset
    for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)

        gender = batch['gender'].to(device)
        images = batch['img_up']
        curr_batch_size = images.shape[0]

        with torch.no_grad():
            images = images.to(device)
            pred_rotmat, pred_betas, pred_camera, _ = hmr_model(images, scale=size_to_scale(eval_size))
            pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:, 1:],
                                       global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False)
            pred_vertices = pred_output.vertices

            J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device)

            gt_vertices = smpl_male(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:], betas=gt_betas).vertices
            gt_vertices_female = smpl_female(global_orient=gt_pose[:, :3], body_pose=gt_pose[:, 3:],
                                             betas=gt_betas).vertices
            gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :]
            gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices)
            gt_pelvis = gt_keypoints_3d[:, [0], :].clone()
            gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :]
            gt_keypoints_3d = gt_keypoints_3d - gt_pelvis

            pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)
            pred_pelvis = pred_keypoints_3d[:, [0], :].clone()
            pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :]
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis

            # Absolute error (MPJPE)
            error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error

            # Reconstuction_error
            r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None)
            recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error

            # Print intermediate results during evaluation
            if step % log_freq == log_freq - 1:
                print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean()))
                print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean()))

    # Print final results during evaluation
    print('*** Final Results ***')
    print()
    print('MPJPE: {}'.format(1000 * mpjpe.mean()))
    print('Reconstruction Error: {}'.format(1000 * recon_err.mean()))
    print()
Ejemplo n.º 12
0
def main():
    """Main function"""
    args = parse_args()
    args.batch_size = 1

    cfg_from_file(args.cfg_file)

    cfg.DANET.REFINEMENT = EasyDict(cfg.DANET.REFINEMENT)
    cfg.MSRES_MODEL.EXTRA = EasyDict(cfg.MSRES_MODEL.EXTRA)

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    if cfg.DANET.SMPL_MODEL_TYPE == 'male':
        smpl_male = SMPL(path_config.SMPL_MODEL_DIR,
                         gender='male',
                         create_transl=False).to(device)
        smpl = smpl_male
    elif cfg.DANET.SMPL_MODEL_TYPE == 'neutral':
        smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR,
                            create_transl=False).to(device)
        smpl = smpl_neutral
    elif cfg.DANET.SMPL_MODEL_TYPE == 'female':
        smpl_female = SMPL(path_config.SMPL_MODEL_DIR,
                           gender='female',
                           create_transl=False).to(device)
        smpl = smpl_female

    if args.use_opendr:
        from utils.renderer import opendr_render
        dr_render = opendr_render()

    # IUV renderer
    iuv_renderer = IUV_Renderer()

    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)

    ### Model ###
    model = DaNet(args, path_config.SMPL_MEAN_PARAMS,
                  pretrained=False).to(device)

    checkpoint = torch.load(args.checkpoint)
    model.load_state_dict(checkpoint['model'], strict=False)
    model.eval()

    img_path_list = [
        os.path.join(args.img_dir, name) for name in os.listdir(args.img_dir)
        if name.endswith('.jpg')
    ]
    for i, path in enumerate(img_path_list):

        image = Image.open(path).convert('RGB')
        img_id = path.split('/')[-1][:-4]

        image_tensor = torchvision.transforms.ToTensor()(image).unsqueeze(
            0).cuda()

        # run inference
        pred_dict = model.infer_net(image_tensor)
        para_pred = pred_dict['para']
        camera_pred = para_pred[:, 0:3].contiguous()
        betas_pred = para_pred[:, 3:13].contiguous()
        rotmat_pred = para_pred[:, 13:].contiguous().view(-1, 24, 3, 3)

        # input image
        image_np = image_tensor[0].cpu().numpy()
        image_np = np.transpose(image_np, (1, 2, 0))

        ones_np = np.ones(image_np.shape[:2]) * 255
        ones_np = ones_np[:, :, None]

        image_in_rgba = np.concatenate((image_np, ones_np), axis=2)

        # estimated global IUV
        global_iuv = iuv_map2img(
            *pred_dict['visualization']['iuv_pred'])[0].cpu().numpy()
        global_iuv = np.transpose(global_iuv, (1, 2, 0))
        global_iuv = resize(global_iuv, image_np.shape[:2])
        global_iuv_rgba = np.concatenate((global_iuv, ones_np), axis=2)

        # estimated patial IUV
        part_iuv_pred = pred_dict['visualization']['part_iuv_pred'][0]
        p_iuv_vis = []
        for i in range(part_iuv_pred.size(0)):
            p_u_vis, p_v_vis, p_i_vis = [
                part_iuv_pred[i, iuv].unsqueeze(0) for iuv in range(3)
            ]
            if p_u_vis.size(1) == 25:
                p_iuv_vis_i = iuv_map2img(p_u_vis.detach(), p_v_vis.detach(),
                                          p_i_vis.detach())
            else:
                p_iuv_vis_i = iuv_map2img(p_u_vis.detach(),
                                          p_v_vis.detach(),
                                          p_i_vis.detach(),
                                          ind_mapping=[0] +
                                          model.img2iuv.dp2smpl_mapping[i])
            p_iuv_vis.append(p_iuv_vis_i)
        part_iuv = torch.cat(p_iuv_vis, dim=0)
        part_iuv = make_grid(part_iuv, nrow=6, padding=0).cpu().numpy()
        part_iuv = np.transpose(part_iuv, (1, 2, 0))
        part_iuv_rgba = np.concatenate(
            (part_iuv, np.ones(part_iuv.shape[:2])[:, :, None] * 255), axis=2)

        # rendered IUV of the predicted SMPL model
        smpl_output = smpl(betas=betas_pred,
                           body_pose=rotmat_pred[:, 1:],
                           global_orient=rotmat_pred[:, 0].unsqueeze(1),
                           pose2rot=False)
        verts_pred = smpl_output.vertices
        render_iuv = iuv_renderer.verts2uvimg(verts_pred, camera_pred)
        render_iuv = render_iuv[0].cpu().numpy()

        render_iuv = np.transpose(render_iuv, (1, 2, 0))
        render_iuv = resize(render_iuv, image_np.shape[:2])

        img_render_iuv = image_np.copy()
        img_render_iuv[render_iuv > 0] = render_iuv[render_iuv > 0]

        img_render_iuv_rgba = np.concatenate((img_render_iuv, ones_np), axis=2)

        img_vis_list = [
            image_in_rgba, global_iuv_rgba, part_iuv_rgba, img_render_iuv_rgba
        ]

        if args.use_opendr:
            # visualize the predicted SMPL model using the opendr renderer
            K = iuv_renderer.K[0].cpu().numpy()
            _, _, img_smpl, smpl_rgba = dr_render.render(
                image_tensor[0].cpu().numpy(), camera_pred[0].cpu().numpy(), K,
                verts_pred.cpu().numpy(), smpl_neutral.faces)

            img_smpl_rgba = np.concatenate((img_smpl, ones_np), axis=2)
            img_vis_list.extend([img_smpl_rgba, smpl_rgba])

        img_vis = np.concatenate(img_vis_list, axis=1)
        img_vis[img_vis < 0.0] = 0.0
        img_vis[img_vis > 1.0] = 1.0
        imsave(os.path.join(args.out_dir, img_id + '_result.png'), img_vis)

    print('Demo results have been saved in {}.'.format(args.out_dir))
Ejemplo n.º 13
0
def run_evaluation(model,
                   dataset_name,
                   dataset,
                   result_file,
                   batch_size=32,
                   img_res=224,
                   num_workers=32,
                   shuffle=False,
                   log_freq=50):
    """Run evaluation on the datasets and metrics we report in the paper. """

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Transfer model to the GPU
    model.to(device)

    # Load SMPL model
    smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device)
    smpl_male = SMPL(config.SMPL_MODEL_DIR, gender='male',
                     create_transl=False).to(device)
    smpl_female = SMPL(config.SMPL_MODEL_DIR,
                       gender='female',
                       create_transl=False).to(device)

    renderer = PartRenderer()

    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(
        config.JOINT_REGRESSOR_H36M)).float()

    save_results = result_file is not None
    # Disable shuffling if you want to save the results
    if save_results:
        shuffle = False
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             shuffle=shuffle,
                             num_workers=num_workers)

    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros(len(dataset))
    recon_err = np.zeros(len(dataset))
    mpjpe_smpl = np.zeros(len(dataset))
    recon_err_smpl = np.zeros(len(dataset))

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))
    shape_err_smpl = np.zeros(len(dataset))

    # Store SMPL parameters
    smpl_pose = np.zeros((len(dataset), 72))
    smpl_betas = np.zeros((len(dataset), 10))
    smpl_camera = np.zeros((len(dataset), 3))
    pred_joints = np.zeros((len(dataset), 17, 3))

    eval_pose = False
    eval_masks = False
    eval_parts = False
    # Choose appropriate evaluation for each dataset
    if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp':
        eval_pose = True
    elif dataset_name == 'lsp':
        eval_masks = True
        eval_parts = True
        annot_path = config.DATASET_FOLDERS['upi-s1h']

    joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14
    joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14
    # Iterate over the entire dataset
    for step, batch in enumerate(
            tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_vertices = smpl_neutral(betas=gt_betas,
                                   body_pose=gt_pose[:, 3:],
                                   global_orient=gt_pose[:, :3]).vertices
        images = batch['img'].to(device)
        gender = batch['gender'].to(device)
        curr_batch_size = images.shape[0]

        with torch.no_grad():
            pred_rotmat, pred_betas, pred_camera = model(images)
            pred_output = smpl_neutral(
                betas=pred_betas,
                body_pose=pred_rotmat[:, 1:],
                global_orient=pred_rotmat[:, 0].unsqueeze(1),
                pose2rot=False)
            pred_vertices = pred_output.vertices

        if save_results:
            rot_pad = torch.tensor([0, 0, 1],
                                   dtype=torch.float32,
                                   device=device).view(1, 3, 1)
            rotmat = torch.cat((pred_rotmat.view(
                -1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)),
                               dim=-1)
            pred_pose = tgm.rotation_matrix_to_angle_axis(
                rotmat).contiguous().view(-1, 72)
            smpl_pose[step * batch_size:step * batch_size +
                      curr_batch_size, :] = pred_pose.cpu().numpy()
            smpl_betas[step * batch_size:step * batch_size +
                       curr_batch_size, :] = pred_betas.cpu().numpy()
            smpl_camera[step * batch_size:step * batch_size +
                        curr_batch_size, :] = pred_camera.cpu().numpy()

        # 3D pose evaluation
        if eval_pose:
            # Regressor broadcasting
            J_regressor_batch = J_regressor[None, :].expand(
                pred_vertices.shape[0], -1, -1).to(device)
            # Get 14 ground truth joints
            if 'h36m' in dataset_name or 'mpi-inf' in dataset_name:
                gt_keypoints_3d = batch['pose_3d'].cuda()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1]
            # For 3DPW get the 14 common joints from the rendered shape
            else:
                gt_vertices = smpl_male(global_orient=gt_pose[:, :3],
                                        body_pose=gt_pose[:, 3:],
                                        betas=gt_betas).vertices
                gt_vertices_female = smpl_female(global_orient=gt_pose[:, :3],
                                                 body_pose=gt_pose[:, 3:],
                                                 betas=gt_betas).vertices
                gt_vertices[gender == 1, :, :] = gt_vertices_female[gender ==
                                                                    1, :, :]
                gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices)
                gt_pelvis = gt_keypoints_3d[:, [0], :].clone()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :]
                gt_keypoints_3d = gt_keypoints_3d - gt_pelvis

            # Get 14 predicted joints from the mesh
            pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)
            if save_results:
                pred_joints[
                    step * batch_size:step * batch_size +
                    curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy()
            pred_pelvis = pred_keypoints_3d[:, [0], :].clone()
            pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :]
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis
            # Absolute error (MPJPE)
            error = torch.sqrt(
                ((pred_keypoints_3d -
                  gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            mpjpe[step * batch_size:step * batch_size +
                  curr_batch_size] = error

            # Reconstuction_error
            r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(),
                                           gt_keypoints_3d.cpu().numpy(),
                                           reduction=None)
            recon_err[step * batch_size:step * batch_size +
                      curr_batch_size] = r_error

        # If mask or part evaluation, render the mask and part images
        if eval_masks or eval_parts:
            mask, parts = renderer(pred_vertices, pred_camera)

        # Print intermediate results during evaluation
        if step % log_freq == log_freq - 1:
            if eval_pose:
                print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean()))
                print('Reconstruction Error: ' +
                      str(1000 * recon_err[:step * batch_size].mean()))
                print()

    # Save reconstructions to a file for further processing
    if save_results:
        np.savez(result_file,
                 pred_joints=pred_joints,
                 pose=smpl_pose,
                 betas=smpl_betas,
                 camera=smpl_camera)
        np.savez('error.npz', mpjpe=mpjpe, recon_err=recon_err)
    # Print final results during evaluation
    print('*** Final Results ***')
    print()
    if eval_pose:
        print('MPJPE: ' + str(1000 * mpjpe.mean()))
        print('Reconstruction Error: ' + str(1000 * recon_err.mean()))
        print()
Ejemplo n.º 14
0
    img = img.astype(np.float32) / 255.
    img = torch.from_numpy(img).permute(2, 0, 1)
    norm_img = normalize_img(img.clone())[None]
    return img, norm_img


if __name__ == '__main__':
    args = parser.parse_args()
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Load trained model
    model = hmr(config.SMPL_MEAN_PARAMS).to(device)
    checkpoint = torch.load(args.trained_model)
    model.load_state_dict(checkpoint['model'], strict=False)
    smpl = SMPL(config.SMPL_MODEL_DIR, batch_size=1,
                create_transl=False).to(device)
    model.eval()
    # Generate rendered image
    renderer = Renderer(focal_length=constants.FOCAL_LENGTH,
                        img_res=constants.IMG_RES,
                        faces=smpl.faces)
    # Processs the image and predict the parameters
    img, norm_img = process_image(args.test_image,
                                  args.bbox,
                                  input_res=constants.IMG_RES)
    with torch.no_grad():
        pred_rotmat, pred_betas, pred_camera = model(norm_img.to(device))
        pred_output = smpl(betas=pred_betas,
                           body_pose=pred_rotmat[:, 1:],
                           global_orient=pred_rotmat[:, 0].unsqueeze(1),
                           pose2rot=False)
Ejemplo n.º 15
0
def run_evaluation(model,
                   dataset_name,
                   dataset,
                   mesh,
                   batch_size=32,
                   img_res=224,
                   num_workers=32,
                   shuffle=False,
                   log_freq=50):
    """Run evaluation on the datasets and metrics we report in the paper. """

    renderer = PartRenderer()

    # Create SMPL model
    smpl = SMPL().cuda()

    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(cfg.JOINT_REGRESSOR_H36M)).float()

    # Create dataloader for the dataset
    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             shuffle=shuffle,
                             num_workers=num_workers)

    # Transfer model to the GPU
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.eval()

    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros(len(dataset))
    recon_err = np.zeros(len(dataset))
    mpjpe_smpl = np.zeros(len(dataset))
    recon_err_smpl = np.zeros(len(dataset))

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))
    shape_err_smpl = np.zeros(len(dataset))

    # Mask and part metrics
    # Accuracy
    accuracy = 0.
    parts_accuracy = 0.
    # True positive, false positive and false negative
    tp = np.zeros((2, 1))
    fp = np.zeros((2, 1))
    fn = np.zeros((2, 1))
    parts_tp = np.zeros((7, 1))
    parts_fp = np.zeros((7, 1))
    parts_fn = np.zeros((7, 1))
    # Pixel count accumulators
    pixel_count = 0
    parts_pixel_count = 0

    eval_pose = False
    eval_shape = False
    eval_masks = False
    eval_parts = False
    # Choose appropriate evaluation for each dataset
    if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2':
        eval_pose = True
    elif dataset_name == 'up-3d':
        eval_shape = True
    elif dataset_name == 'lsp':
        eval_masks = True
        eval_parts = True
        annot_path = cfg.DATASET_FOLDERS['upi-s1h']

    # Iterate over the entire dataset
    for step, batch in enumerate(
            tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_vertices = smpl(gt_pose, gt_betas)
        images = batch['img'].to(device)
        curr_batch_size = images.shape[0]

        # Run inference
        with torch.no_grad():
            pred_vertices, pred_vertices_smpl, camera, pred_rotmat, pred_betas = model(
                images)

        # 3D pose evaluation
        if eval_pose:
            # Regressor broadcasting
            J_regressor_batch = J_regressor[None, :].expand(
                pred_vertices.shape[0], -1, -1).to(device)

            # Get 14 ground truth joints
            gt_keypoints_3d = batch['pose_3d'].cuda()
            gt_keypoints_3d = gt_keypoints_3d[:, cfg.J24_TO_J14, :-1]

            # Get 14 predicted joints from the non-parametic mesh
            pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)
            pred_pelvis = pred_keypoints_3d[:, [0], :].clone()
            pred_keypoints_3d = pred_keypoints_3d[:, cfg.H36M_TO_J14, :]
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis
            # Get 14 predicted joints from the SMPL mesh
            pred_keypoints_3d_smpl = torch.matmul(J_regressor_batch,
                                                  pred_vertices_smpl)
            pred_pelvis_smpl = pred_keypoints_3d_smpl[:, [0], :].clone()
            pred_keypoints_3d_smpl = pred_keypoints_3d_smpl[:,
                                                            cfg.H36M_TO_J14, :]
            pred_keypoints_3d_smpl = pred_keypoints_3d_smpl - pred_pelvis_smpl

            # Compute error metrics

            # Absolute error (MPJPE)
            error = torch.sqrt(
                ((pred_keypoints_3d -
                  gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            error_smpl = torch.sqrt(
                ((pred_keypoints_3d_smpl -
                  gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            mpjpe[step * batch_size:step * batch_size +
                  curr_batch_size] = error
            mpjpe_smpl[step * batch_size:step * batch_size +
                       curr_batch_size] = error_smpl

            # Reconstuction_error
            r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(),
                                           gt_keypoints_3d.cpu().numpy(),
                                           reduction=None)
            r_error_smpl = reconstruction_error(
                pred_keypoints_3d_smpl.cpu().numpy(),
                gt_keypoints_3d.cpu().numpy(),
                reduction=None)
            recon_err[step * batch_size:step * batch_size +
                      curr_batch_size] = r_error
            recon_err_smpl[step * batch_size:step * batch_size +
                           curr_batch_size] = r_error_smpl

        # Shape evaluation (Mean per-vertex error)
        if eval_shape:
            se = torch.sqrt(
                ((pred_vertices -
                  gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            se_smpl = torch.sqrt(
                ((pred_vertices_smpl -
                  gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            shape_err[step * batch_size:step * batch_size +
                      curr_batch_size] = se
            shape_err_smpl[step * batch_size:step * batch_size +
                           curr_batch_size] = se_smpl

        # If mask or part evaluation, render the mask and part images
        if eval_masks or eval_parts:
            mask, parts = renderer(pred_vertices, camera)

        # Mask evaluation (for LSP)
        if eval_masks:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            # Dimensions of original image
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                # After rendering, convert imate back to original resolution
                pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i],
                                   orig_shape[i]) > 0
                # Load gt mask
                gt_mask = cv2.imread(
                    os.path.join(annot_path, batch['maskname'][i]), 0) > 0
                # Evaluation consistent with the original UP-3D code
                accuracy += (gt_mask == pred_mask).sum()
                pixel_count += np.prod(np.array(gt_mask.shape))
                for c in range(2):
                    cgt = gt_mask == c
                    cpred = pred_mask == c
                    tp[c] += (cgt & cpred).sum()
                    fp[c] += (~cgt & cpred).sum()
                    fn[c] += (cgt & ~cpred).sum()
                f1 = 2 * tp / (2 * tp + fp + fn)

        # Part evaluation (for LSP)
        if eval_parts:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8),
                                    center[i], scale[i], orig_shape[i])
                # Load gt part segmentation
                gt_parts = cv2.imread(
                    os.path.join(annot_path, batch['partname'][i]), 0)
                # Evaluation consistent with the original UP-3D code
                # 6 parts + background
                for c in range(7):
                    cgt = gt_parts == c
                    cpred = pred_parts == c
                    cpred[gt_parts == 255] = 0
                    parts_tp[c] += (cgt & cpred).sum()
                    parts_fp[c] += (~cgt & cpred).sum()
                    parts_fn[c] += (cgt & ~cpred).sum()
                gt_parts[gt_parts == 255] = 0
                pred_parts[pred_parts == 255] = 0
                parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn)
                parts_accuracy += (gt_parts == pred_parts).sum()
                parts_pixel_count += np.prod(np.array(gt_parts.shape))

        # Print intermediate results during evaluation
        if step % log_freq == log_freq - 1:
            if eval_pose:
                print('MPJPE (NonParam): ' +
                      str(1000 * mpjpe[:step * batch_size].mean()))
                print('Reconstruction Error (NonParam): ' +
                      str(1000 * recon_err[:step * batch_size].mean()))
                print('MPJPE (Param): ' +
                      str(1000 * mpjpe_smpl[:step * batch_size].mean()))
                print('Reconstruction Error (Param): ' +
                      str(1000 * recon_err_smpl[:step * batch_size].mean()))
                print()
            if eval_shape:
                print('Shape Error (NonParam): ' +
                      str(1000 * shape_err[:step * batch_size].mean()))
                print('Shape Error (Param): ' +
                      str(1000 * shape_err_smpl[:step * batch_size].mean()))
                print()
            if eval_masks:
                print('Accuracy: ', accuracy / pixel_count)
                print('F1: ', f1.mean())
                print()
            if eval_parts:
                print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
                print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5,
                                                   6]].mean())
                print()

    # Print final results during evaluation
    print('*** Final Results ***')
    print()
    if eval_pose:
        print('MPJPE (NonParam): ' + str(1000 * mpjpe.mean()))
        print('Reconstruction Error (NonParam): ' +
              str(1000 * recon_err.mean()))
        print('MPJPE (Param): ' + str(1000 * mpjpe_smpl.mean()))
        print('Reconstruction Error (Param): ' +
              str(1000 * recon_err_smpl.mean()))
        print()
    if eval_shape:
        print('Shape Error (NonParam): ' + str(1000 * shape_err.mean()))
        print('Shape Error (Param): ' + str(1000 * shape_err_smpl.mean()))
        print()
    if eval_masks:
        print('Accuracy: ', accuracy / pixel_count)
        print('F1: ', f1.mean())
        print()
    if eval_parts:
        print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
        print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean())
        print()
Ejemplo n.º 16
0
    args = parser.parse_args()
    img_path = args.img_path
    checkpoint_path = args.checkpoint

    normalize_img = Normalize(mean=constants.IMG_NORM_MEAN,
                              std=constants.IMG_NORM_STD)
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    hmr_model = hmr(config.SMPL_MEAN_PARAMS)
    checkpoint = torch.load(checkpoint_path)
    hmr_model.load_state_dict(checkpoint, strict=False)
    hmr_model.eval()
    hmr_model.to(device)

    smpl_neutral = SMPL(config.SMPL_MODEL_DIR, create_transl=False).to(device)
    img_renderer = Renderer(focal_length=constants.FOCAL_LENGTH,
                            img_res=constants.IMG_RES,
                            faces=smpl_neutral.faces)

    img = imageio.imread(img_path)
    im_size = img.shape[0]
    im_scale = size_to_scale(im_size)
    img_up = scipy.misc.imresize(img, [224, 224])
    img_up = np.transpose(img_up.astype('float32'), (2, 0, 1)) / 255.0
    img_up = normalize_img(torch.from_numpy(img_up).float())
    images = img_up[None].to(device)

    with torch.no_grad():
        pred_rotmat, pred_betas, pred_camera, _ = hmr_model(images,
                                                            scale=im_scale)
Ejemplo n.º 17
0
    def init_fn(self):
        # create training dataset
        self.train_ds = create_dataset(self.options.dataset,
                                       self.options,
                                       use_IUV=True)
        self.dp_res = int(self.options.img_res // (2**self.options.warp_level))

        self.CNet = DPNet(warp_lv=self.options.warp_level,
                          norm_type=self.options.norm_type).to(self.device)

        self.LNet = get_LNet(self.options).to(self.device)
        self.smpl = SMPL().to(self.device)
        self.female_smpl = SMPL(cfg.FEMALE_SMPL_FILE).to(self.device)
        self.male_smpl = SMPL(cfg.MALE_SMPL_FILE).to(self.device)

        uv_res = self.options.uv_res
        self.uv_type = self.options.uv_type
        self.sampler = Index_UV_Generator(UV_height=uv_res,
                                          UV_width=-1,
                                          uv_type=self.uv_type).to(self.device)

        weight_file = 'data/weight_p24_h{:04d}_w{:04d}_{}.npy'.format(
            uv_res, uv_res, self.uv_type)
        if not os.path.exists(weight_file):
            cal_uv_weight(self.sampler, weight_file)

        uv_weight = torch.from_numpy(np.load(weight_file)).to(
            self.device).float()
        uv_weight = uv_weight * self.sampler.mask.to(uv_weight.device).float()
        uv_weight = uv_weight / uv_weight.mean()
        self.uv_weight = uv_weight[None, :, :, None]
        self.tv_factor = (uv_res - 1) * (uv_res - 1)

        # Setup an optimizer
        if self.options.stage == 'dp':
            self.optimizer = torch.optim.Adam(
                params=list(self.CNet.parameters()),
                lr=self.options.lr,
                betas=(self.options.adam_beta1, 0.999),
                weight_decay=self.options.wd)
            self.models_dict = {'CNet': self.CNet}
            self.optimizers_dict = {'optimizer': self.optimizer}

        else:
            self.optimizer = torch.optim.Adam(
                params=list(self.LNet.parameters()) +
                list(self.CNet.parameters()),
                lr=self.options.lr,
                betas=(self.options.adam_beta1, 0.999),
                weight_decay=self.options.wd)
            self.models_dict = {'CNet': self.CNet, 'LNet': self.LNet}
            self.optimizers_dict = {'optimizer': self.optimizer}

        # Create loss functions
        self.criterion_shape = nn.L1Loss().to(self.device)
        self.criterion_uv = nn.L1Loss().to(self.device)
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        self.criterion_keypoints_3d = nn.L1Loss(reduction='none').to(
            self.device)
        self.criterion_regr = nn.MSELoss().to(self.device)

        # LSP indices from full list of keypoints
        self.to_lsp = list(range(14))
        self.renderer = Renderer(faces=self.smpl.faces.cpu().numpy())

        # Optionally start training from a pretrained checkpoint
        # Note that this is different from resuming training
        # For the latter use --resume
        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)
def run_evaluation(model, dataset_name, dataset, 
                   mesh, batch_size=32, img_res=224, 
                   num_workers=32, shuffle=False, log_freq=50):
    """Run evaluation on the datasets and metrics we report in the paper. """
    
    renderer = PartRenderer()
    
    # Create SMPL model
    smpl = SMPL().cuda()
    
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
    
    # Transfer model to the GPU
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.eval()
    
    # 2D pose metrics
    pose_2d_m = np.zeros(len(dataset))
    pose_2d_m_smpl = np.zeros(len(dataset))

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))
    shape_err_smpl = np.zeros(len(dataset))

    eval_2d_pose = False
    eval_shape = False

    # Choose appropriate evaluation for each dataset
    if dataset_name == 'up-3d':
        eval_shape = True
    elif dataset_name == 'lsp':
        eval_2d_pose = True

    # Iterate over the entire dataset
    for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_vertices = smpl(gt_pose, gt_betas)
        images = batch['img'].to(torch.device('cuda'))
        curr_batch_size = images.shape[0]
        gt_keypoints_2d = batch['keypoints'].cpu().numpy()
        
        # Run inference
        with torch.no_grad():
            pred_vertices, pred_vertices_smpl, camera, pred_rotmat, pred_betas = model(images)

        # If mask or part evaluation, render the mask and part images
        if eval_2d_pose:
            mask, parts = renderer(pred_vertices, camera)

        # 2D pose evaluation (for LSP)
        if eval_2d_pose:
            for i in range(curr_batch_size):
                gt_kp = gt_keypoints_2d[i, list(range(14))]
                # Get 3D and projected 2D keypoints from the regressed shape
                pred_keypoints_3d = smpl.get_joints(pred_vertices)
                pred_keypoints_2d = orthographic_projection(pred_keypoints_3d, camera)[:, :, :2]
                pred_keypoints_3d_smpl = smpl.get_joints(pred_vertices_smpl)
                pred_keypoints_2d_smpl = orthographic_projection(pred_keypoints_3d_smpl, camera.detach())[:, :, :2]
                pred_kp = pred_keypoints_2d.cpu().numpy()[i, list(range(14))]
                pred_kp_smpl = pred_keypoints_2d_smpl.cpu().numpy()[i, list(range(14))]
                # Compute 2D pose losses
                loss_2d_pose = np.sum((gt_kp[: , :2] - pred_kp)**2)
                loss_2d_pose_smpl = np.sum((gt_kp[: , :2] - pred_kp_smpl)**2)

                #print(gt_kp)
                #print()
                #print(pred_kp_smpl)
                #raw_input("Press Enter to continue...")

                pose_2d_m[step * batch_size + i] = loss_2d_pose
                pose_2d_m_smpl[step * batch_size + i] = loss_2d_pose_smpl

        # Shape evaluation (Mean per-vertex error)
        if eval_shape:
            se = torch.sqrt(((pred_vertices - gt_vertices) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            se_smpl = torch.sqrt(((pred_vertices_smpl - gt_vertices) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            shape_err[step * batch_size:step * batch_size + curr_batch_size] = se
            shape_err_smpl[step * batch_size:step * batch_size + curr_batch_size] = se_smpl

        # Print intermediate results during evaluation
        if step % log_freq == log_freq - 1:
            if eval_2d_pose:
                print('2D keypoints (NonParam): ' + str(1000 * pose_2d_m[:step * batch_size].mean()))
                print('2D keypoints (Param): ' + str(1000 * pose_2d_m_smpl[:step * batch_size].mean()))
                print()
            if eval_shape:
                print('Shape Error (NonParam): ' + str(1000 * shape_err[:step * batch_size].mean()))
                print('Shape Error (Param): ' + str(1000 * shape_err_smpl[:step * batch_size].mean()))
                print()

    # Print and store final results during evaluation
    print('*** Final Results ***')
    print()
    if eval_2d_pose:
        print('2D keypoints (NonParam): ' + str(1000 * pose_2d_m.mean()))
        print('2D keypoints (Param): ' + str(1000 * pose_2d_m_smpl.mean()))
        print()
        # store results
        #np.savez("../eval-output/CMR_no_extra_lsp_model_alt.npz", imgnames = dataset.imgname, kp_2d_err_graph = pose_2d_m, kp_2d_err_smpl = pose_2d_m_smpl)
    if eval_shape:
        print('Shape Error (NonParam): ' + str(1000 * shape_err.mean()))
        print('Shape Error (Param): ' + str(1000 * shape_err_smpl.mean()))
        print()
Ejemplo n.º 19
0
def run_evaluation(model, dataset_name, dataset, result_file,
                   batch_size=32, img_res=224, 
                   num_workers=32, shuffle=False, log_freq=50):
    """Run evaluation on the datasets and metrics we report in the paper. """

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Transfer model to the GPU
    model.to(device)

    # Load SMPL model
    smpl_neutral = SMPL(config.SMPL_MODEL_DIR,
                        create_transl=False).to(device)
    smpl_male = SMPL(config.SMPL_MODEL_DIR,
                     gender='male',
                     create_transl=False).to(device)
    smpl_female = SMPL(config.SMPL_MODEL_DIR,
                       gender='female',
                       create_transl=False).to(device)
    
    renderer = PartRenderer()
    
    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float()
    
    save_results = result_file is not None
    # Disable shuffling if you want to save the results
    if save_results:
        shuffle=False
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
    
    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros(len(dataset))
    recon_err = np.zeros(len(dataset))
    mpjpe_smpl = np.zeros(len(dataset))
    recon_err_smpl = np.zeros(len(dataset))

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))
    shape_err_smpl = np.zeros(len(dataset))

    # Mask and part metrics
    # Accuracy
    accuracy = 0.
    parts_accuracy = 0.
    # True positive, false positive and false negative
    tp = np.zeros((2,1))
    fp = np.zeros((2,1))
    fn = np.zeros((2,1))
    parts_tp = np.zeros((7,1))
    parts_fp = np.zeros((7,1))
    parts_fn = np.zeros((7,1))
    # Pixel count accumulators
    pixel_count = 0
    parts_pixel_count = 0

    # Store SMPL parameters
    smpl_pose = np.zeros((len(dataset), 72))
    smpl_betas = np.zeros((len(dataset), 10))
    smpl_camera = np.zeros((len(dataset), 3))
    pred_joints = np.zeros((len(dataset), 17, 3))

    eval_pose = False
    eval_masks = False
    eval_parts = False
    # Choose appropriate evaluation for each dataset
    if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp':
        eval_pose = True
    elif dataset_name == 'lsp':
        eval_masks = True
        eval_parts = True
        annot_path = config.DATASET_FOLDERS['upi-s1h']

    joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14
    joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14
    # Iterate over the entire dataset
    for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices
        images = batch['img'].to(device)
        gender = batch['gender'].to(device)
        curr_batch_size = images.shape[0]
        
        with torch.no_grad():
            pred_rotmat, pred_betas, pred_camera = model(images)
            pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False)
            pred_vertices = pred_output.vertices

        if save_results:
            rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1)
            rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1)
            pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72)
            smpl_pose[step * batch_size:step * batch_size + curr_batch_size, :] = pred_pose.cpu().numpy()
            smpl_betas[step * batch_size:step * batch_size + curr_batch_size, :]  = pred_betas.cpu().numpy()
            smpl_camera[step * batch_size:step * batch_size + curr_batch_size, :]  = pred_camera.cpu().numpy()
            
        # 3D pose evaluation
        if eval_pose:
            # Regressor broadcasting
            J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device)
            # Get 14 ground truth joints
            if 'h36m' in dataset_name or 'mpi-inf' in dataset_name:
                gt_keypoints_3d = batch['pose_3d'].cuda()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1]
            # For 3DPW get the 14 common joints from the rendered shape
            else:
                gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices 
                gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices 
                gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :]
                gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices)
                gt_pelvis = gt_keypoints_3d[:, [0],:].clone()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :]
                gt_keypoints_3d = gt_keypoints_3d - gt_pelvis 


            # Get 14 predicted joints from the mesh
            pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)
            if save_results:
                pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :]  = pred_keypoints_3d.cpu().numpy()
            pred_pelvis = pred_keypoints_3d[:, [0],:].clone()
            pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :]
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis 

            # Absolute error (MPJPE)
            error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error

            # Reconstuction_error
            r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None)
            recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error


        # If mask or part evaluation, render the mask and part images
        if eval_masks or eval_parts:
            mask, parts = renderer(pred_vertices, pred_camera)

        # Mask evaluation (for LSP)
        if eval_masks:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            # Dimensions of original image
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                # After rendering, convert imate back to original resolution
                pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0
                # Load gt mask
                gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0
                # Evaluation consistent with the original UP-3D code
                accuracy += (gt_mask == pred_mask).sum()
                pixel_count += np.prod(np.array(gt_mask.shape))
                for c in range(2):
                    cgt = gt_mask == c
                    cpred = pred_mask == c
                    tp[c] += (cgt & cpred).sum()
                    fp[c] +=  (~cgt & cpred).sum()
                    fn[c] +=  (cgt & ~cpred).sum()
                f1 = 2 * tp / (2 * tp + fp + fn)

        # Part evaluation (for LSP)
        if eval_parts:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i])
                # Load gt part segmentation
                gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0)
                # Evaluation consistent with the original UP-3D code
                # 6 parts + background
                for c in range(7):
                   cgt = gt_parts == c
                   cpred = pred_parts == c
                   cpred[gt_parts == 255] = 0
                   parts_tp[c] += (cgt & cpred).sum()
                   parts_fp[c] +=  (~cgt & cpred).sum()
                   parts_fn[c] +=  (cgt & ~cpred).sum()
                gt_parts[gt_parts == 255] = 0
                pred_parts[pred_parts == 255] = 0
                parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn)
                parts_accuracy += (gt_parts == pred_parts).sum()
                parts_pixel_count += np.prod(np.array(gt_parts.shape))

        # Print intermediate results during evaluation
        if step % log_freq == log_freq - 1:
            if eval_pose:
                print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean()))
                print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean()))
                print()
            if eval_masks:
                print('Accuracy: ', accuracy / pixel_count)
                print('F1: ', f1.mean())
                print()
            if eval_parts:
                print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
                print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
                print()

    # Save reconstructions to a file for further processing
    if save_results:
        np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera)
    # Print final results during evaluation
    print('*** Final Results ***')
    print()
    if eval_pose:
        print('MPJPE: ' + str(1000 * mpjpe.mean()))
        print('Reconstruction Error: ' + str(1000 * recon_err.mean()))
        print()
    if eval_masks:
        print('Accuracy: ', accuracy / pixel_count)
        print('F1: ', f1.mean())
        print()
    if eval_parts:
        print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
        print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
        print()
Ejemplo n.º 20
0
                     extract_img=True)
        h36m_extract(cfg.H36M_ROOT_ORIGIN,
                     out_path,
                     protocol=2,
                     extract_img=False)

        # LSP dataset preprocessing (test set)
        lsp_dataset_extract(cfg.LSP_ROOT, out_path)

        # UP-3D dataset preprocessing (lsp_test set)
        up_3d_extract(cfg.UP_3D_ROOT, out_path, 'lsp_test')

    if args.gt_iuv:
        device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        smpl = SMPL().to(device)
        uv_type = args.uv_type

        if uv_type == 'SMPL':
            data = objfile.read_obj_full(
                'data/uv_sampler/smpl_fbx_template.obj')
        elif uv_type == 'BF':
            data = objfile.read_obj_full(
                'data/uv_sampler/smpl_boundry_free_template.obj')

        vt = np.array(data['texcoords'])
        face = [f[0] for f in data['faces']]
        face = np.array(face) - 1
        vt_face = [f[2] for f in data['faces']]
        vt_face = np.array(vt_face) - 1
        renderer = UVRenderer(faces=face,
Ejemplo n.º 21
0
def run_evaluation(model, dataset_name, dataset, result_file,
                   batch_size=32, img_res=224, 
                   num_workers=32, shuffle=False, log_freq=50, bVerbose= True):
    """Run evaluation on the datasets and metrics we report in the paper. """

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    # # Transfer model to the GPU
    # model.to(device)

    # Load SMPL model
    global g_smpl_neutral, g_smpl_male, g_smpl_female
    if g_smpl_neutral is None:
        g_smpl_neutral = SMPL(config.SMPL_MODEL_DIR,
                            create_transl=False).to(device)
        g_smpl_male = SMPL(config.SMPL_MODEL_DIR,
                        gender='male',
                        create_transl=False).to(device)
        g_smpl_female = SMPL(config.SMPL_MODEL_DIR,
                        gender='female',
                        create_transl=False).to(device)

        smpl_neutral = g_smpl_neutral
        smpl_male = g_smpl_male
        smpl_female = g_smpl_female
    else:
        smpl_neutral = g_smpl_neutral
        smpl_male = g_smpl_male
        smpl_female = g_smpl_female

    
    # renderer = PartRenderer()
    
    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(config.JOINT_REGRESSOR_H36M)).float()
    
    save_results = result_file is not None
    # Disable shuffling if you want to save the results
    if save_results:
        shuffle=False
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
    
    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    # mpjpe = np.zeros(len(dataset))
    # recon_err = np.zeros(len(dataset))
    quant_mpjpe = {}#np.zeros(len(dataset))
    quant_recon_err = {}#np.zeros(len(dataset))
    mpjpe = np.zeros(len(dataset))
    recon_err = np.zeros(len(dataset))

    mpjpe_smpl = np.zeros(len(dataset))
    recon_err_smpl = np.zeros(len(dataset))

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))
    shape_err_smpl = np.zeros(len(dataset))

    # Mask and part metrics
    # Accuracy
    accuracy = 0.
    parts_accuracy = 0.
    # True positive, false positive and false negative
    tp = np.zeros((2,1))
    fp = np.zeros((2,1))
    fn = np.zeros((2,1))
    parts_tp = np.zeros((7,1))
    parts_fp = np.zeros((7,1))
    parts_fn = np.zeros((7,1))
    # Pixel count accumulators
    pixel_count = 0
    parts_pixel_count = 0

    # Store SMPL parameters
    output_pred_pose = np.zeros((len(dataset), 72))
    output_pred_betas = np.zeros((len(dataset), 10))
    output_pred_camera = np.zeros((len(dataset), 3))
    output_pred_joints = np.zeros((len(dataset), 14, 3))

    output_gt_pose = np.zeros((len(dataset), 72))
    output_gt_betas = np.zeros((len(dataset), 10))
    output_gt_joints = np.zeros((len(dataset), 14, 3))

    output_error_MPJPE = np.zeros((len(dataset)))
    output_error_recon = np.zeros((len(dataset)))

    output_imgNames =[]
    output_cropScale  = np.zeros((len(dataset)))
    output_cropCenter = np.zeros((len(dataset), 2))
    outputStartPointer = 0


    eval_pose = False
    eval_masks = False
    eval_parts = False
    # Choose appropriate evaluation for each dataset
    if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp':
        eval_pose = True
    elif dataset_name == 'lsp':
        eval_masks = True
        eval_parts = True
        annot_path = config.DATASET_FOLDERS['upi-s1h']

    joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14
    joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14
    # Iterate over the entire dataset
    for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch

        imgName = batch['imgname'][0]
        seqName = os.path.basename ( os.path.dirname(imgName) )

        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_vertices = smpl_neutral(betas=gt_betas, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices
        images = batch['img'].to(device)
        gender = batch['gender'].to(device)
        curr_batch_size = images.shape[0]
        
        with torch.no_grad():
            pred_rotmat, pred_betas, pred_camera = model(images)
            pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:,1:], global_orient=pred_rotmat[:,0].unsqueeze(1), pose2rot=False)
            pred_vertices = pred_output.vertices

        
    
        # 3D pose evaluation
        if eval_pose:
            # Regressor broadcasting
            J_regressor_batch = J_regressor[None, :].expand(pred_vertices.shape[0], -1, -1).to(device)
            # Get 14 ground truth joints
            if 'h36m' in dataset_name or 'mpi-inf' in dataset_name:
                gt_keypoints_3d = batch['pose_3d'].cuda()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1]
            # For 3DPW get the 14 common joints from the rendered shape
            else:
                gt_vertices = smpl_male(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices 
                gt_vertices_female = smpl_female(global_orient=gt_pose[:,:3], body_pose=gt_pose[:,3:], betas=gt_betas).vertices 
                gt_vertices[gender==1, :, :] = gt_vertices_female[gender==1, :, :]
                gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices)
                gt_pelvis = gt_keypoints_3d[:, [0],:].clone()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :]
                gt_keypoints_3d = gt_keypoints_3d - gt_pelvis             

                if False:
                    from renderer import viewer2D
                    from renderer import glViewer
                    import humanModelViewer
                    batchNum = gt_pose.shape[0]
                    for i in range(batchNum):
                        smpl_face = humanModelViewer.GetSMPLFace()
                        meshes_gt = {'ver': gt_vertices[i].cpu().numpy()*100, 'f': smpl_face}
                        meshes_pred = {'ver': pred_vertices[i].cpu().numpy()*100, 'f': smpl_face}

                        glViewer.setMeshData([meshes_gt, meshes_pred], bComputeNormal= True)
                        glViewer.show(5)

            # Get 14 predicted joints from the mesh
            pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)
            # if save_results:
            #     pred_joints[step * batch_size:step * batch_size + curr_batch_size, :, :]  = pred_keypoints_3d.cpu().numpy()
            pred_pelvis = pred_keypoints_3d[:, [0],:].clone()
            pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :]
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis 

            #Visualize GT mesh and SPIN output mesh
            if False:
                from renderer import viewer2D
                from renderer import glViewer
                import humanModelViewer

                gt_keypoints_3d_vis = gt_keypoints_3d.cpu().numpy()
                gt_keypoints_3d_vis = np.reshape(gt_keypoints_3d_vis, (gt_keypoints_3d_vis.shape[0],-1))        #N,14x3
                gt_keypoints_3d_vis = np.swapaxes(gt_keypoints_3d_vis, 0,1) *100

                pred_keypoints_3d_vis = pred_keypoints_3d.cpu().numpy()
                pred_keypoints_3d_vis = np.reshape(pred_keypoints_3d_vis, (pred_keypoints_3d_vis.shape[0],-1))        #N,14x3
                pred_keypoints_3d_vis = np.swapaxes(pred_keypoints_3d_vis, 0,1) *100
                # output_sample = output_sample[ : , np.newaxis]*0.1
                # gt_sample = gt_sample[: , np.newaxis]*0.1
                # (skelNum, dim, frames)
                glViewer.setSkeleton( [gt_keypoints_3d_vis, pred_keypoints_3d_vis] ,jointType='smplcoco')#(skelNum, dim, frames)
                glViewer.show()
                

            # Absolute error (MPJPE)
            error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            # mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error

            # Reconstuction_error
            r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(), reduction=None)
            # recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error

            for ii, p in enumerate(batch['imgname'][:len(r_error)]):
                seqName = os.path.basename( os.path.dirname(p))
                # quant_mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error
                if seqName not in quant_mpjpe.keys():
                    quant_mpjpe[seqName] = []
                    quant_recon_err[seqName] = []
                
                quant_mpjpe[seqName].append(error[ii]) 
                quant_recon_err[seqName].append(r_error[ii])

            # Reconstuction_error
            # quant_recon_err[step * batch_size:step * batch_size + curr_batch_size] = r_error

            list_mpjpe = np.hstack([ quant_mpjpe[k] for k in quant_mpjpe])
            list_reconError = np.hstack([ quant_recon_err[k] for k in quant_recon_err])
            if bVerbose:
                print(">>> {} : MPJPE {:.02f} mm, error: {:.02f} mm | Total MPJPE {:.02f} mm, error {:.02f} mm".format(seqName, np.mean(error)*1000, np.mean(r_error)*1000, np.hstack(list_mpjpe).mean()*1000, np.hstack(list_reconError).mean()*1000) )

            # print("MPJPE {}, error: {}".format(np.mean(error)*100, np.mean(r_error)*100))

        # If mask or part evaluation, render the mask and part images
        # if eval_masks or eval_parts:
        #     mask, parts = renderer(pred_vertices, pred_camera)

        # Mask evaluation (for LSP)
        if eval_masks:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            # Dimensions of original image
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                # After rendering, convert imate back to original resolution
                pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0
                # Load gt mask
                gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0
                # Evaluation consistent with the original UP-3D code
                accuracy += (gt_mask == pred_mask).sum()
                pixel_count += np.prod(np.array(gt_mask.shape))
                for c in range(2):
                    cgt = gt_mask == c
                    cpred = pred_mask == c
                    tp[c] += (cgt & cpred).sum()
                    fp[c] +=  (~cgt & cpred).sum()
                    fn[c] +=  (cgt & ~cpred).sum()
                f1 = 2 * tp / (2 * tp + fp + fn)

        # Part evaluation (for LSP)
        if eval_parts:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i])
                # Load gt part segmentation
                gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0)
                # Evaluation consistent with the original UP-3D code
                # 6 parts + background
                for c in range(7):
                   cgt = gt_parts == c
                   cpred = pred_parts == c
                   cpred[gt_parts == 255] = 0
                   parts_tp[c] += (cgt & cpred).sum()
                   parts_fp[c] +=  (~cgt & cpred).sum()
                   parts_fn[c] +=  (cgt & ~cpred).sum()
                gt_parts[gt_parts == 255] = 0
                pred_parts[pred_parts == 255] = 0
                parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn)
                parts_accuracy += (gt_parts == pred_parts).sum()
                parts_pixel_count += np.prod(np.array(gt_parts.shape))

        # Print intermediate results during evaluation
        if bVerbose:
            if step % log_freq == log_freq - 1:
                if eval_pose:
                    print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean()))
                    print('Reconstruction Error: ' + str(1000 * recon_err[:step * batch_size].mean()))
                    print()
                if eval_masks:
                    print('Accuracy: ', accuracy / pixel_count)
                    print('F1: ', f1.mean())
                    print()
                if eval_parts:
                    print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
                    print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
                    print()

        if save_results:
            rot_pad = torch.tensor([0,0,1], dtype=torch.float32, device=device).view(1,3,1)
            rotmat = torch.cat((pred_rotmat.view(-1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)), dim=-1)
            pred_pose = tgm.rotation_matrix_to_angle_axis(rotmat).contiguous().view(-1, 72)

            output_pred_pose[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_pose.cpu().numpy()
            output_pred_betas[outputStartPointer:outputStartPointer+curr_batch_size, :]  = pred_betas.cpu().numpy()
            output_pred_camera[outputStartPointer:outputStartPointer+curr_batch_size, :]  = pred_camera.cpu().numpy()

            output_pred_pose[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_pose.cpu().numpy()
            output_pred_betas[outputStartPointer:outputStartPointer+curr_batch_size, :]  = pred_betas.cpu().numpy()
            output_pred_camera[outputStartPointer:outputStartPointer+curr_batch_size, :]  = pred_camera.cpu().numpy()
            output_pred_joints[outputStartPointer:outputStartPointer+curr_batch_size, :] = pred_keypoints_3d.cpu().numpy()

            output_gt_pose[outputStartPointer:outputStartPointer+curr_batch_size, :]  = gt_pose.cpu().numpy()
            output_gt_betas[outputStartPointer:outputStartPointer+curr_batch_size, :] = gt_betas.cpu().numpy()
            output_gt_joints[outputStartPointer:outputStartPointer+curr_batch_size, :] = gt_keypoints_3d.cpu().numpy()

            output_error_MPJPE[outputStartPointer:outputStartPointer+curr_batch_size,]  =  error *1000
            output_error_recon[outputStartPointer:outputStartPointer+curr_batch_size] =  r_error*1000

            output_cropScale[outputStartPointer:outputStartPointer+curr_batch_size] = batch['scale'].cpu().numpy()
            output_cropCenter[outputStartPointer:outputStartPointer+curr_batch_size, :] = batch['center'].cpu().numpy()

            output_imgNames +=batch['imgname']

            outputStartPointer +=curr_batch_size

            # if outputStartPointer>100:     #Debug
            #         break


        
    # if len(output_imgNames) < output_pred_pose.shape[0]:
    output ={}
    finalLen = len(output_imgNames)
    output['imageNames'] = output_imgNames
    output['pred_pose'] = output_pred_pose[:finalLen]
    output['pred_betas'] = output_pred_betas[:finalLen]
    output['pred_camera'] = output_pred_camera[:finalLen]
    output['pred_joints'] = output_pred_joints[:finalLen]

    output['gt_pose'] = output_gt_pose[:finalLen]
    output['gt_betas'] = output_gt_betas[:finalLen]
    output['gt_joints'] = output_gt_joints[:finalLen]

    output['error_MPJPE'] = output_error_MPJPE[:finalLen]
    output['error_recon'] = output_error_recon[:finalLen]

    output['cropScale']  = output_cropScale[:finalLen]
    output['cropCenter'] = output_cropCenter[:finalLen]


    # Save reconstructions to a file for further processing
    if save_results:
        import pickle
        # np.savez(result_file, pred_joints=pred_joints, pred_pose=pred_pose, pred_betas=pred_betas, pred_camera=pred_camera)
        with open(result_file,'wb') as f:
            pickle.dump(output, f)
            f.close()
            print("Saved to:{}".format(result_file))
        
    # Print final results during evaluation

    if bVerbose:
        print('*** Final Results ***')
        print()
    if eval_pose:
        # if bVerbose:
        #     print('MPJPE: ' + str(1000 * mpjpe.mean()))
        #     print('Reconstruction Error: ' + str(1000 * recon_err.mean()))
        #     print()
        list_mpjpe = np.hstack([ quant_mpjpe[k] for k in quant_mpjpe])
        list_reconError = np.hstack([ quant_recon_err[k] for k in quant_recon_err])

        output_str ='SeqNames; '
        for seq in quant_mpjpe:
            output_str += seq + ';'
        output_str +='\n MPJPE; '
        quant_mpjpe_avg_mm = np.hstack(list_mpjpe).mean()*1000
        output_str += "Avg {:.02f} mm; ".format( quant_mpjpe_avg_mm)
        for seq in quant_mpjpe:
            output_str += '{:.02f}; '.format(1000 * np.hstack(quant_mpjpe[seq]).mean())

        output_str +='\n Recon Error; '
        quant_recon_error_avg_mm = np.hstack(list_reconError).mean()*1000
        output_str +="Avg {:.02f}mm; ".format( quant_recon_error_avg_mm )
        for seq in quant_recon_err:
            output_str += '{:.02f}; '.format(1000 * np.hstack(quant_recon_err[seq]).mean())
        if bVerbose:
            print(output_str)
        else:
            print(">>>  Test on 3DPW: MPJPE: {} | quant_recon_error_avg_mm: {}".format(quant_mpjpe_avg_mm, quant_recon_error_avg_mm) )

       
        return quant_mpjpe_avg_mm, quant_recon_error_avg_mm

    if bVerbose:
        if eval_masks:
            print('Accuracy: ', accuracy / pixel_count)
            print('F1: ', f1.mean())
            print()
        if eval_parts:
            print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
            print('Parts F1 (BG): ', parts_f1[[0,1,2,3,4,5,6]].mean())
            print()

    return -1       #Should return something
Ejemplo n.º 22
0
def run_evaluation(model, dataset):
    """Run evaluation on the datasets and metrics we report in the paper. """

    shuffle = args.shuffle
    log_freq = args.log_freq
    batch_size = args.batch_size
    dataset_name = args.dataset
    result_file = args.result_file
    num_workers = args.num_workers
    device = torch.device('cuda') if torch.cuda.is_available() \
                                else torch.device('cpu')

    # Transfer model to the GPU
    model.to(device)

    # Load SMPL model
    smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR,
                        create_transl=False).to(device)
    smpl_male = SMPL(path_config.SMPL_MODEL_DIR,
                     gender='male',
                     create_transl=False).to(device)
    smpl_female = SMPL(path_config.SMPL_MODEL_DIR,
                       gender='female',
                       create_transl=False).to(device)

    renderer = PartRenderer()

    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(
        path_config.JOINT_REGRESSOR_H36M)).float()

    save_results = result_file is not None
    # Disable shuffling if you want to save the results
    if save_results:
        shuffle = False
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             shuffle=shuffle,
                             num_workers=num_workers)

    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros(len(dataset))
    recon_err = np.zeros(len(dataset))
    mpjpe_smpl = np.zeros(len(dataset))
    recon_err_smpl = np.zeros(len(dataset))
    pve = np.zeros(len(dataset))

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))
    shape_err_smpl = np.zeros(len(dataset))

    # Mask and part metrics
    # Accuracy
    accuracy = 0.
    parts_accuracy = 0.
    # True positive, false positive and false negative
    tp = np.zeros((2, 1))
    fp = np.zeros((2, 1))
    fn = np.zeros((2, 1))
    parts_tp = np.zeros((7, 1))
    parts_fp = np.zeros((7, 1))
    parts_fn = np.zeros((7, 1))
    # Pixel count accumulators
    pixel_count = 0
    parts_pixel_count = 0

    # Store SMPL parameters
    smpl_pose = np.zeros((len(dataset), 72))
    smpl_betas = np.zeros((len(dataset), 10))
    smpl_camera = np.zeros((len(dataset), 3))
    pred_joints = np.zeros((len(dataset), 17, 3))
    action_idxes = {}
    idx_counter = 0
    # for each action
    act_PVE = {}
    act_MPJPE = {}
    act_paMPJPE = {}

    eval_pose = False
    eval_masks = False
    eval_parts = False
    # Choose appropriate evaluation for each dataset
    if dataset_name == 'h36m-p1' or dataset_name == 'h36m-p2' or dataset_name == 'h36m-p2-mosh' \
       or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp' or dataset_name == '3doh50k':
        eval_pose = True
    elif dataset_name == 'lsp':
        eval_masks = True
        eval_parts = True
        annot_path = path_config.DATASET_FOLDERS['upi-s1h']

    joint_mapper_h36m = constants.H36M_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.H36M_TO_J14
    joint_mapper_gt = constants.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else constants.J24_TO_J14
    # Iterate over the entire dataset
    cnt = 0
    results_dict = {'id': [], 'pred': [], 'pred_pa': [], 'gt': []}
    for step, batch in enumerate(
            tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_smpl_out = smpl_neutral(betas=gt_betas,
                                   body_pose=gt_pose[:, 3:],
                                   global_orient=gt_pose[:, :3])
        gt_vertices_nt = gt_smpl_out.vertices
        images = batch['img'].to(device)
        gender = batch['gender'].to(device)
        curr_batch_size = images.shape[0]

        if save_results:
            s_id = np.array(
                [int(item.split('/')[-3][-1])
                 for item in batch['imgname']]) * 10000
            s_id += np.array(
                [int(item.split('/')[-1][4:-4]) for item in batch['imgname']])
            results_dict['id'].append(s_id)

        if dataset_name == 'h36m-p2':
            action = [
                im_path.split('/')[-1].split('.')[0].split('_')[1]
                for im_path in batch['imgname']
            ]
            for act_i in range(len(action)):

                if action[act_i] in action_idxes:
                    action_idxes[action[act_i]].append(idx_counter + act_i)
                else:
                    action_idxes[action[act_i]] = [idx_counter + act_i]
            idx_counter += len(action)

        with torch.no_grad():
            if args.regressor == 'hmr':
                pred_rotmat, pred_betas, pred_camera = model(images)
                # torch.Size([32, 24, 3, 3]) torch.Size([32, 10]) torch.Size([32, 3])
            elif args.regressor == 'pymaf_net':
                preds_dict, _ = model(images)
                pred_rotmat = preds_dict['smpl_out'][-1]['rotmat'].contiguous(
                ).view(-1, 24, 3, 3)
                pred_betas = preds_dict['smpl_out'][-1][
                    'theta'][:, 3:13].contiguous()
                pred_camera = preds_dict['smpl_out'][-1][
                    'theta'][:, :3].contiguous()

            pred_output = smpl_neutral(
                betas=pred_betas,
                body_pose=pred_rotmat[:, 1:],
                global_orient=pred_rotmat[:, 0].unsqueeze(1),
                pose2rot=False)
            pred_vertices = pred_output.vertices

        if save_results:
            rot_pad = torch.tensor([0, 0, 1],
                                   dtype=torch.float32,
                                   device=device).view(1, 3, 1)
            rotmat = torch.cat((pred_rotmat.view(
                -1, 3, 3), rot_pad.expand(curr_batch_size * 24, -1, -1)),
                               dim=-1)
            pred_pose = tgm.rotation_matrix_to_angle_axis(
                rotmat).contiguous().view(-1, 72)
            smpl_pose[step * batch_size:step * batch_size +
                      curr_batch_size, :] = pred_pose.cpu().numpy()
            smpl_betas[step * batch_size:step * batch_size +
                       curr_batch_size, :] = pred_betas.cpu().numpy()
            smpl_camera[step * batch_size:step * batch_size +
                        curr_batch_size, :] = pred_camera.cpu().numpy()

        # 3D pose evaluation
        if eval_pose:
            # Regressor broadcasting
            J_regressor_batch = J_regressor[None, :].expand(
                pred_vertices.shape[0], -1, -1).to(device)
            # Get 14 ground truth joints
            if 'h36m' in dataset_name or 'mpi-inf' in dataset_name or '3doh50k' in dataset_name:
                gt_keypoints_3d = batch['pose_3d'].cuda()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_gt, :-1]
            # For 3DPW get the 14 common joints from the rendered shape
            else:
                gt_vertices = smpl_male(global_orient=gt_pose[:, :3],
                                        body_pose=gt_pose[:, 3:],
                                        betas=gt_betas).vertices
                gt_vertices_female = smpl_female(global_orient=gt_pose[:, :3],
                                                 body_pose=gt_pose[:, 3:],
                                                 betas=gt_betas).vertices
                gt_vertices[gender == 1, :, :] = gt_vertices_female[gender ==
                                                                    1, :, :]
                gt_keypoints_3d = torch.matmul(J_regressor_batch, gt_vertices)
                gt_pelvis = gt_keypoints_3d[:, [0], :].clone()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper_h36m, :]
                gt_keypoints_3d = gt_keypoints_3d - gt_pelvis

            if '3dpw' in dataset_name:
                per_vertex_error = torch.sqrt(
                    ((pred_vertices -
                      gt_vertices)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            else:
                per_vertex_error = torch.sqrt(
                    ((pred_vertices - gt_vertices_nt)**2).sum(dim=-1)).mean(
                        dim=-1).cpu().numpy()
            pve[step * batch_size:step * batch_size +
                curr_batch_size] = per_vertex_error

            # Get 14 predicted joints from the mesh
            pred_keypoints_3d = torch.matmul(J_regressor_batch, pred_vertices)
            if save_results:
                pred_joints[
                    step * batch_size:step * batch_size +
                    curr_batch_size, :, :] = pred_keypoints_3d.cpu().numpy()
            pred_pelvis = pred_keypoints_3d[:, [0], :].clone()
            pred_keypoints_3d = pred_keypoints_3d[:, joint_mapper_h36m, :]
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis

            # Absolute error (MPJPE)
            error = torch.sqrt(
                ((pred_keypoints_3d -
                  gt_keypoints_3d)**2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            mpjpe[step * batch_size:step * batch_size +
                  curr_batch_size] = error

            # Reconstuction_error
            r_error, pred_keypoints_3d_pa = reconstruction_error(
                pred_keypoints_3d.cpu().numpy(),
                gt_keypoints_3d.cpu().numpy(),
                reduction=None)
            recon_err[step * batch_size:step * batch_size +
                      curr_batch_size] = r_error

            if save_results:
                results_dict['gt'].append(gt_keypoints_3d.cpu().numpy())
                results_dict['pred'].append(pred_keypoints_3d.cpu().numpy())
                results_dict['pred_pa'].append(pred_keypoints_3d_pa)

        if args.vis_demo:
            imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']]

            if args.regressor == 'hmr':
                iuv_pred = None

            images_vis = images * torch.tensor([0.229, 0.224, 0.225],
                                               device=images.device).reshape(
                                                   1, 3, 1, 1)
            images_vis = images_vis + torch.tensor(
                [0.485, 0.456, 0.406], device=images.device).reshape(
                    1, 3, 1, 1)
            vis_smpl_iuv(
                images_vis.cpu().numpy(),
                pred_camera.cpu().numpy(),
                pred_output.vertices.cpu().numpy(), smpl_neutral.faces,
                iuv_pred, 100 * per_vertex_error, imgnames,
                os.path.join('./notebooks/output/demo_results', dataset_name,
                             args.checkpoint.split('/')[-3]), args)

        # If mask or part evaluation, render the mask and part images
        if eval_masks or eval_parts:
            mask, parts = renderer(pred_vertices, pred_camera)
        # Mask evaluation (for LSP)
        if eval_masks:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            # Dimensions of original image
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                # After rendering, convert imate back to original resolution
                pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i],
                                   orig_shape[i]) > 0
                # Load gt mask
                gt_mask = cv2.imread(
                    os.path.join(annot_path, batch['maskname'][i]), 0) > 0
                # Evaluation consistent with the original UP-3D code
                accuracy += (gt_mask == pred_mask).sum()
                pixel_count += np.prod(np.array(gt_mask.shape))
                for c in range(2):
                    cgt = gt_mask == c
                    cpred = pred_mask == c
                    tp[c] += (cgt & cpred).sum()
                    fp[c] += (~cgt & cpred).sum()
                    fn[c] += (cgt & ~cpred).sum()
                f1 = 2 * tp / (2 * tp + fp + fn)

        # Part evaluation (for LSP)
        if eval_parts:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8),
                                    center[i], scale[i], orig_shape[i])
                # Load gt part segmentation
                gt_parts = cv2.imread(
                    os.path.join(annot_path, batch['partname'][i]), 0)
                # Evaluation consistent with the original UP-3D code
                # 6 parts + background
                for c in range(7):
                    cgt = gt_parts == c
                    cpred = pred_parts == c
                    cpred[gt_parts == 255] = 0
                    parts_tp[c] += (cgt & cpred).sum()
                    parts_fp[c] += (~cgt & cpred).sum()
                    parts_fn[c] += (cgt & ~cpred).sum()
                gt_parts[gt_parts == 255] = 0
                pred_parts[pred_parts == 255] = 0
                parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn)
                parts_accuracy += (gt_parts == pred_parts).sum()
                parts_pixel_count += np.prod(np.array(gt_parts.shape))

        # Print intermediate results during evaluation
        if step % log_freq == log_freq - 1:
            if eval_pose:
                print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean()))
                print('Reconstruction Error: ' +
                      str(1000 * recon_err[:step * batch_size].mean()))
                print()
            if eval_masks:
                print('Accuracy: ', accuracy / pixel_count)
                print('F1: ', f1.mean())
                print()
            if eval_parts:
                print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
                print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5,
                                                   6]].mean())
                print()

    # Save reconstructions to a file for further processing
    if save_results:
        np.savez(result_file,
                 pred_joints=pred_joints,
                 pose=smpl_pose,
                 betas=smpl_betas,
                 camera=smpl_camera)
        for k in results_dict.keys():
            results_dict[k] = np.concatenate(results_dict[k])
            print(k, results_dict[k].shape)

        scipy.io.savemat(result_file + '.mat', results_dict)

    # Print final results during evaluation
    print('*** Final Results ***')
    try:
        print(os.path.split(args.checkpoint)[-3:], args.dataset)
    except:
        pass
    if eval_pose:
        print('PVE: ' + str(1000 * pve.mean()))
        print('MPJPE: ' + str(1000 * mpjpe.mean()))
        print('Reconstruction Error: ' + str(1000 * recon_err.mean()))
        print()
    if eval_masks:
        print('Accuracy: ', accuracy / pixel_count)
        print('F1: ', f1.mean())
        print()
    if eval_parts:
        print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
        print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean())
        print()

    if dataset_name == 'h36m-p2':
        print(
            'Note: PVE is not available for h36m-p2. To evaluate PVE, use h36m-p2-mosh instead.'
        )
        for act in action_idxes:
            act_idx = action_idxes[act]
            act_pve = [pve[i] for i in act_idx]
            act_errors = [mpjpe[i] for i in act_idx]
            act_errors_pa = [recon_err[i] for i in act_idx]

            act_errors_mean = np.mean(np.array(act_errors)) * 1000.
            act_errors_pa_mean = np.mean(np.array(act_errors_pa)) * 1000.
            act_pve_mean = np.mean(np.array(act_pve)) * 1000.
            act_MPJPE[act] = act_errors_mean
            act_paMPJPE[act] = act_errors_pa_mean
            act_PVE[act] = act_pve_mean

        act_err_info = ['action err']
        act_row = [str(act_paMPJPE[act])
                   for act in action_idxes] + [act for act in action_idxes]
        act_err_info.extend(act_row)
        print(act_err_info)
    else:
        act_row = None
Ejemplo n.º 23
0
    def init_fn(self):
        if self.options.rank == 0:
            self.summary_writer.add_text('command_args', print_args())

        if self.options.regressor == 'hmr':
            # HMR/SPIN model
            self.model = hmr(path_config.SMPL_MEAN_PARAMS, pretrained=True)
            self.smpl = SMPL(path_config.SMPL_MODEL_DIR,
                             batch_size=cfg.TRAIN.BATCH_SIZE,
                             create_transl=False).to(self.device)
        elif self.options.regressor == 'pymaf_net':
            # PyMAF model
            self.model = pymaf_net(path_config.SMPL_MEAN_PARAMS,
                                   pretrained=True)
            self.smpl = self.model.regressor[0].smpl

        if self.options.distributed:
            # For multiprocessing distributed, DistributedDataParallel constructor
            # should always set the single device scope, otherwise,
            # DistributedDataParallel will use all available devices.
            if self.options.gpu is not None:
                torch.cuda.set_device(self.options.gpu)
                self.model.cuda(self.options.gpu)
                # When using a single GPU per process and per
                # DistributedDataParallel, we need to divide the batch size
                # ourselves based on the total number of GPUs we have
                self.options.batch_size = int(self.options.batch_size /
                                              self.options.ngpus_per_node)
                self.options.workers = int(
                    (self.options.workers + self.options.ngpus_per_node - 1) /
                    self.options.ngpus_per_node)
                self.model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
                    self.model)
                self.model = torch.nn.parallel.DistributedDataParallel(
                    self.model,
                    device_ids=[self.options.gpu],
                    output_device=self.options.gpu,
                    find_unused_parameters=True)
            else:
                self.model.cuda()
                # DistributedDataParallel will divide and allocate batch_size to all
                # available GPUs if device_ids are not set
                self.model = torch.nn.parallel.DistributedDataParallel(
                    self.model, find_unused_parameters=True)
            self.models_dict = {'model': self.model.module}
        else:
            self.model = self.model.to(self.device)
            self.models_dict = {'model': self.model}

        cudnn.benchmark = True

        # Per-vertex loss on the shape
        self.criterion_shape = nn.L1Loss().to(self.device)
        # Keypoint (2D and 3D) loss
        # No reduction because confidence weighting needs to be applied
        self.criterion_keypoints = nn.MSELoss(reduction='none').to(self.device)
        # Loss for SMPL parameter regression
        self.criterion_regr = nn.MSELoss().to(self.device)
        self.focal_length = constants.FOCAL_LENGTH

        if self.options.pretrained_checkpoint is not None:
            self.load_pretrained(
                checkpoint_file=self.options.pretrained_checkpoint)

        self.optimizer = torch.optim.Adam(params=self.model.parameters(),
                                          lr=cfg.SOLVER.BASE_LR,
                                          weight_decay=0)

        self.optimizers_dict = {'optimizer': self.optimizer}

        if self.options.single_dataset:
            self.train_ds = BaseDataset(self.options,
                                        self.options.single_dataname,
                                        is_train=True)
        else:
            self.train_ds = MixedDataset(self.options, is_train=True)

        self.valid_ds = BaseDataset(self.options,
                                    self.options.eval_dataset,
                                    is_train=False)

        if self.options.distributed:
            train_sampler = torch.utils.data.distributed.DistributedSampler(
                self.train_ds)
            val_sampler = None
        else:
            train_sampler = None
            val_sampler = None

        self.train_data_loader = DataLoader(self.train_ds,
                                            batch_size=self.options.batch_size,
                                            num_workers=self.options.workers,
                                            pin_memory=cfg.TRAIN.PIN_MEMORY,
                                            shuffle=(train_sampler is None),
                                            sampler=train_sampler)

        self.valid_loader = DataLoader(dataset=self.valid_ds,
                                       batch_size=cfg.TEST.BATCH_SIZE,
                                       shuffle=False,
                                       num_workers=cfg.TRAIN.NUM_WORKERS,
                                       pin_memory=cfg.TRAIN.PIN_MEMORY,
                                       sampler=val_sampler)

        # Load dictionary of fits
        self.fits_dict = FitsDict(self.options, self.train_ds)
        self.evaluation_accumulators = dict.fromkeys([
            'pred_j3d', 'target_j3d', 'target_theta', 'pred_verts',
            'target_verts'
        ])

        # Create renderer
        try:
            self.renderer = OpenDRenderer()
        except:
            print('No renderer for visualization.')
            self.renderer = None

        if cfg.MODEL.PyMAF.AUX_SUPV_ON:
            self.iuv_maker = IUV_Renderer(
                output_size=cfg.MODEL.PyMAF.DP_HEATMAP_SIZE)

        self.decay_steps_ind = 1
        self.decay_epochs_ind = 1
Ejemplo n.º 24
0
def run_evaluation(model,
                   dataset,
                   result_file,
                   batch_size=32,
                   img_res=224,
                   num_workers=32,
                   shuffle=False,
                   options=None):
    """Run evaluation on the datasets and metrics we report in the paper. """

    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # Transfer model to the GPU
    model.to(device)

    # Load SMPL model
    smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR,
                        create_transl=False).to(device)

    save_results = result_file is not None
    # Disable shuffling if you want to save the results
    if save_results:
        shuffle = False
    # Create dataloader for the dataset
    data_loader = DataLoader(dataset,
                             batch_size=batch_size,
                             shuffle=shuffle,
                             num_workers=num_workers)

    # Store SMPL parameters
    smpl_pose = np.zeros((len(dataset), 72))
    smpl_betas = np.zeros((len(dataset), 10))
    smpl_camera = np.zeros((len(dataset), 3))
    pred_joints = np.zeros((len(dataset), 17, 3))

    num_joints = 17

    num_samples = len(dataset)
    print('dataset length: {}'.format(num_samples))
    all_preds = np.zeros((num_samples, num_joints, 3), dtype=np.float32)
    all_boxes = np.zeros((num_samples, 6))
    image_path = []
    filenames = []
    imgnums = []
    idx = 0
    with torch.no_grad():
        end = time.time()

        for step, batch in enumerate(
                tqdm(data_loader, desc='Eval', total=len(data_loader))):
            images = batch['img'].to(device)
            scale = batch['scale'].numpy()
            center = batch['center'].numpy()

            num_images = images.size(0)

            gt_keypoints_2d = batch['keypoints']  # 2D keypoints
            # De-normalize 2D keypoints from [-1,1] to pixel space
            gt_keypoints_2d_orig = gt_keypoints_2d.clone()
            gt_keypoints_2d_orig[:, :, :-1] = 0.5 * img_res * (
                gt_keypoints_2d_orig[:, :, :-1] + 1)

            if options.regressor == 'hmr':
                pred_rotmat, pred_betas, pred_camera = model(images)
            elif options.regressor == 'danet':
                danet_pred_dict = model.infer_net(images)
                para_pred = danet_pred_dict['para']
                pred_camera = para_pred[:, 0:3].contiguous()
                pred_betas = para_pred[:, 3:13].contiguous()
                pred_rotmat = para_pred[:, 13:].contiguous().view(-1, 24, 3, 3)

            pred_output = smpl_neutral(
                betas=pred_betas,
                body_pose=pred_rotmat[:, 1:],
                global_orient=pred_rotmat[:, 0].unsqueeze(1),
                pose2rot=False)

            # pred_vertices = pred_output.vertices
            pred_J24 = pred_output.joints[:, -24:]
            pred_JCOCO = pred_J24[:, constants.J24_TO_JCOCO]

            # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size
            # This camera translation can be used in a full perspective projection
            pred_cam_t = torch.stack([
                pred_camera[:, 1], pred_camera[:, 2], 2 *
                constants.FOCAL_LENGTH / (img_res * pred_camera[:, 0] + 1e-9)
            ],
                                     dim=-1)

            camera_center = torch.zeros(len(pred_JCOCO),
                                        2,
                                        device=pred_camera.device)
            pred_keypoints_2d = perspective_projection(
                pred_JCOCO,
                rotation=torch.eye(
                    3, device=pred_camera.device).unsqueeze(0).expand(
                        len(pred_JCOCO), -1, -1),
                translation=pred_cam_t,
                focal_length=constants.FOCAL_LENGTH,
                camera_center=camera_center)

            coords = pred_keypoints_2d + (img_res / 2.)
            coords = coords.cpu().numpy()
            # Normalize keypoints to [-1,1]
            # pred_keypoints_2d = pred_keypoints_2d / (img_res / 2.)

            gt_keypoints_coco = gt_keypoints_2d_orig[:, -24:][:, constants.
                                                              J24_TO_JCOCO]

            preds = coords.copy()

            scale_ = np.array([scale, scale]).transpose()

            # Transform back
            for i in range(coords.shape[0]):
                preds[i] = transform_preds(coords[i], center[i], scale_[i],
                                           [img_res, img_res])

            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
            all_preds[idx:idx + num_images, :, 2:3] = 1.
            # double check this all_boxes parts
            all_boxes[idx:idx + num_images, 0:2] = center[:, 0:2]
            all_boxes[idx:idx + num_images, 2:4] = scale_[:, 0:2]
            all_boxes[idx:idx + num_images, 4] = np.prod(scale_ * 200, 1)
            all_boxes[idx:idx + num_images, 5] = 1.
            image_path.extend(batch['imgname'])

            idx += num_images

        ckp_name = options.regressor
        name_values, perf_indicator = dataset.evaluate(all_preds,
                                                       options.output_dir,
                                                       all_boxes, image_path,
                                                       ckp_name, filenames,
                                                       imgnums)

        model_name = options.regressor
        if isinstance(name_values, list):
            for name_value in name_values:
                _print_name_value(name_value, model_name)
        else:
            _print_name_value(name_values, model_name)

    # Save reconstructions to a file for further processing
    if save_results:
        np.savez(result_file,
                 pred_joints=pred_joints,
                 pose=smpl_pose,
                 betas=smpl_betas,
                 camera=smpl_camera)
Ejemplo n.º 25
0
def run_evaluation(model, opt, options, dataset_name, log_freq=50):
    """Run evaluation on the datasets and metrics we report in the paper. """

    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    # Create SMPL model
    smpl = SMPL().to(device)
    if dataset_name == '3dpw' or dataset_name == 'surreal':
        smpl_male = SMPL(cfg.MALE_SMPL_FILE).to(device)
        smpl_female = SMPL(cfg.FEMALE_SMPL_FILE).to(device)

    batch_size = opt.batch_size

    # Create dataloader for the dataset
    if dataset_name == 'surreal':
        dataset = SurrealDataset(options, use_augmentation=False, is_train=False, use_IUV=False)
    else:
        dataset = BaseDataset(options, dataset_name, use_augmentation=False, is_train=False, use_IUV=False)

    data_loader = DataLoader(dataset,  batch_size=opt.batch_size, shuffle=False, num_workers=int(opt.num_workers),
                             pin_memory=True)

    print('data loader finish')

    # Transfer model to the GPU
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.eval()

    # Pose metrics
    # MPJPE and Reconstruction error for the non-parametric and parametric shapes
    mpjpe = np.zeros(len(dataset))
    mpjpe_pa = np.zeros(len(dataset))

    # Shape metrics
    # Mean per-vertex error
    shape_err = np.zeros(len(dataset))

    # Mask and part metrics
    # Accuracy
    accuracy = 0.
    parts_accuracy = 0.
    # True positive, false positive and false negative
    tp = np.zeros((2, 1))
    fp = np.zeros((2, 1))
    fn = np.zeros((2, 1))
    parts_tp = np.zeros((7, 1))
    parts_fp = np.zeros((7, 1))
    parts_fn = np.zeros((7, 1))
    # Pixel count accumulators
    pixel_count = 0
    parts_pixel_count = 0

    eval_pose = False
    eval_shape = False
    eval_masks = False
    eval_parts = False
    joint_mapper = cfg.J24_TO_J17 if dataset_name == 'mpi-inf-3dhp' else cfg.J24_TO_J14
    # Choose appropriate evaluation for each dataset
    if 'h36m' in dataset_name or dataset_name == '3dpw' or dataset_name == 'mpi-inf-3dhp':
        eval_pose = True
    elif dataset_name in ['up-3d', 'surreal']:
        eval_shape = True
    elif dataset_name == 'lsp':
        eval_masks = True
        eval_parts = True
        annot_path = cfg.DATASET_FOLDERS['upi-s1h']

    if eval_parts or eval_masks:
        from utils.part_utils import PartRenderer
        renderer = PartRenderer()

    # Iterate over the entire dataset
    for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))):
        # Get ground truth annotations from the batch
        gt_pose = batch['pose'].to(device)
        gt_betas = batch['betas'].to(device)
        gt_vertices = smpl(gt_pose, gt_betas)
        images = batch['img'].to(device)

        curr_batch_size = images.shape[0]

        # Run inference
        with torch.no_grad():
            out_dict = model(images)

        pred_vertices = out_dict['pred_vertices']
        camera = out_dict['camera']
        # 3D pose evaluation
        if eval_pose:
            # Get 14 ground truth joints
            if 'h36m' in dataset_name or 'mpi-inf' in dataset_name:
                gt_keypoints_3d = batch['pose_3d'].cuda()
                gt_keypoints_3d = gt_keypoints_3d[:, joint_mapper, :-1]
                gt_pelvis = (gt_keypoints_3d[:, [2]] + gt_keypoints_3d[:, [3]]) / 2
                gt_keypoints_3d = gt_keypoints_3d - gt_pelvis
            else:
                gender = batch['gender'].to(device)
                gt_vertices = smpl_male(gt_pose, gt_betas)
                gt_vertices_female = smpl_female(gt_pose, gt_betas)
                gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :]

                gt_keypoints_3d = smpl.get_train_joints(gt_vertices)[:, joint_mapper]
                # gt_keypoints_3d = smpl.get_lsp_joints(gt_vertices)    # joints_regressor used in cmr
                gt_pelvis = (gt_keypoints_3d[:, [2]] + gt_keypoints_3d[:, [3]]) / 2
                gt_keypoints_3d = gt_keypoints_3d - gt_pelvis

            # Get 14 predicted joints from the non-parametic mesh
            pred_keypoints_3d = smpl.get_train_joints(pred_vertices)[:, joint_mapper]
            # pred_keypoints_3d = smpl.get_lsp_joints(pred_vertices)    # joints_regressor used in cmr
            pred_pelvis = (pred_keypoints_3d[:, [2]] + pred_keypoints_3d[:, [3]]) / 2
            pred_keypoints_3d = pred_keypoints_3d - pred_pelvis

            # Absolute error (MPJPE)
            error = torch.sqrt(((pred_keypoints_3d - gt_keypoints_3d) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            mpjpe[step * batch_size:step * batch_size + curr_batch_size] = error

            # Reconstuction_error
            r_error = reconstruction_error(pred_keypoints_3d.cpu().numpy(), gt_keypoints_3d.cpu().numpy(),
                                           reduction=None)
            mpjpe_pa[step * batch_size:step * batch_size + curr_batch_size] = r_error

        # Shape evaluation (Mean per-vertex error)
        if eval_shape:
            if dataset_name == 'surreal':
                gender = batch['gender'].to(device)
                gt_vertices = smpl_male(gt_pose, gt_betas)
                gt_vertices_female = smpl_female(gt_pose, gt_betas)
                gt_vertices[gender == 1, :, :] = gt_vertices_female[gender == 1, :, :]

            gt_pelvis_mesh = smpl.get_eval_joints(gt_vertices)
            pred_pelvis_mesh = smpl.get_eval_joints(pred_vertices)
            gt_pelvis_mesh = (gt_pelvis_mesh[:, [2]] + gt_pelvis_mesh[:, [3]]) / 2
            pred_pelvis_mesh = (pred_pelvis_mesh[:, [2]] + pred_pelvis_mesh[:, [3]]) / 2

            # se = torch.sqrt(((pred_vertices - gt_vertices) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            se = torch.sqrt(((pred_vertices - pred_pelvis_mesh - gt_vertices + gt_pelvis_mesh) ** 2).sum(dim=-1)).mean(dim=-1).cpu().numpy()
            shape_err[step * batch_size:step * batch_size + curr_batch_size] = se

        # If mask or part evaluation, render the mask and part images
        if eval_masks or eval_parts:
            mask, parts = renderer(pred_vertices, camera)
        # Mask evaluation (for LSP)
        if eval_masks:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            # Dimensions of original image
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                # After rendering, convert imate back to original resolution
                pred_mask = uncrop(mask[i].cpu().numpy(), center[i], scale[i], orig_shape[i]) > 0
                # Load gt mask
                gt_mask = cv2.imread(os.path.join(annot_path, batch['maskname'][i]), 0) > 0
                # Evaluation consistent with the original UP-3D code
                accuracy += (gt_mask == pred_mask).sum()
                pixel_count += np.prod(np.array(gt_mask.shape))
                for c in range(2):
                    cgt = gt_mask == c
                    cpred = pred_mask == c
                    tp[c] += (cgt & cpred).sum()
                    fp[c] += (~cgt & cpred).sum()
                    fn[c] += (cgt & ~cpred).sum()
                f1 = 2 * tp / (2 * tp + fp + fn)

        # Part evaluation (for LSP)
        if eval_parts:
            center = batch['center'].cpu().numpy()
            scale = batch['scale'].cpu().numpy()
            orig_shape = batch['orig_shape'].cpu().numpy()
            for i in range(curr_batch_size):
                pred_parts = uncrop(parts[i].cpu().numpy().astype(np.uint8), center[i], scale[i], orig_shape[i])
                # Load gt part segmentation
                gt_parts = cv2.imread(os.path.join(annot_path, batch['partname'][i]), 0)
                # Evaluation consistent with the original UP-3D code
                # 6 parts + background
                for c in range(7):
                    cgt = gt_parts == c
                    cpred = pred_parts == c
                    cpred[gt_parts == 255] = 0
                    parts_tp[c] += (cgt & cpred).sum()
                    parts_fp[c] += (~cgt & cpred).sum()
                    parts_fn[c] += (cgt & ~cpred).sum()
                gt_parts[gt_parts == 255] = 0
                pred_parts[pred_parts == 255] = 0
                parts_f1 = 2 * parts_tp / (2 * parts_tp + parts_fp + parts_fn)
                parts_accuracy += (gt_parts == pred_parts).sum()
                parts_pixel_count += np.prod(np.array(gt_parts.shape))

        # Print intermediate results during evaluation
        if step % log_freq == log_freq - 1:
            if eval_pose:
                print('MPJPE: ' + str(1000 * mpjpe[:step * batch_size].mean()))
                print('MPJPE-PA: ' + str(1000 * mpjpe_pa[:step * batch_size].mean()))
                print()
            if eval_shape:
                print('Shape Error: ' + str(1000 * shape_err[:step * batch_size].mean()))
                print()
            if eval_masks:
                print('Accuracy: ', accuracy / pixel_count)
                print('F1: ', f1.mean())
                print()
            if eval_parts:
                print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
                print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean())
                print()

    # Print final results during evaluation
    print('*** Final Results ***')
    print()
    if eval_pose:
        print('MPJPE: ' + str(1000 * mpjpe.mean()))
        print('MPJPE-PA: ' + str(1000 * mpjpe_pa.mean()))
        print()
    if eval_shape:
        print('Shape Error: ' + str(1000 * shape_err.mean()))
        print()
    if eval_masks:
        print('Accuracy: ', accuracy / pixel_count)
        print('F1: ', f1.mean())
        print()
    if eval_parts:
        print('Parts Accuracy: ', parts_accuracy / parts_pixel_count)
        print('Parts F1 (BG): ', parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean())
        print()

    # Save final results to .txt file
    txt_name = join(opt.save_root, dataset_name + '.txt')
    f = open(txt_name, 'w')
    f.write('*** Final Results ***')
    f.write('\n')
    if eval_pose:
        f.write('MPJPE: ' + str(1000 * mpjpe.mean()))
        f.write('\n')
        f.write('MPJPE-PA: ' + str(1000 * mpjpe_pa.mean()))
        f.write('\n')
    if eval_shape:
        f.write('Shape Error: ' + str(1000 * shape_err.mean()))
        f.write('\n')
    if eval_masks:
        f.write('Accuracy: ' + str(accuracy / pixel_count))
        f.write('\n')
        f.write('F1: ' + str(f1.mean()))
        f.write('\n')
    if eval_parts:
        f.write('Parts Accuracy: ' + str(parts_accuracy / parts_pixel_count))
        f.write('\n')
        f.write('Parts F1 (BG): ' + str(parts_f1[[0, 1, 2, 3, 4, 5, 6]].mean()))
        f.write('\n')
Ejemplo n.º 26
0
def run_evaluation(model, args, dataset, mesh):
    """Run evaluation on the datasets and metrics we report in the paper. """

    # Create SMPL model
    smpl = SMPL().cuda()

    # Regressor for H36m joints
    J_regressor = torch.from_numpy(np.load(cfg.JOINT_REGRESSOR_H36M)).float()

    # Create dataloader for the dataset
    data_loader = DataLoader(dataset,
                             batch_size=args.batch_size,
                             shuffle=False,
                             num_workers=args.num_workers)

    # Transfer model to the GPU
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)
    model.eval()

    # predictions
    all_kps = {}

    # Iterate over the entire dataset
    for step, batch in enumerate(
            tqdm(data_loader, desc='Eval', total=args.num_imgs)):

        # Get ground truth annotations from the batch
        images = batch['img'].to(device)
        curr_batch_size = images.shape[0]

        # Run inference
        with torch.no_grad():
            pred_vertices, pred_vertices_smpl, camera, pred_rotmat, pred_betas = model(
                images)
            pred_keypoints_3d_smpl = smpl.get_joints(pred_vertices_smpl)
            pred_keypoints_2d_smpl = orthographic_projection(
                pred_keypoints_3d_smpl,
                camera.detach())[:, :, :2].cpu().data.numpy()

        eval_part = np.zeros((1, 19, 2))

        # we use custom keypoints for evaluation: MPII + COCO face joints
        # see paper / supplementary for details
        eval_part[0, :14, :] = pred_keypoints_2d_smpl[0][:14]
        eval_part[0, 14:, :] = pred_keypoints_2d_smpl[0][19:]

        all_kps[step] = eval_part
        if args.write_imgs == 'True':
            renderer = Renderer(faces=smpl.faces.cpu().numpy())
            write_imgs(batch, pred_keypoints_2d_smpl, pred_vertices_smpl,
                       camera, args, renderer)

    if args.eval_pck == 'True':
        gt_kp_path = os.path.join(cfg.BASE_DATA_DIR, args.dataset,
                                  args.crop_setting, 'keypoints.pkl')
        log_dir = os.path.join(cfg.BASE_DATA_DIR, 'cmr_pck_results.txt')
        with open(gt_kp_path, 'rb') as f:
            gt = pkl.load(f)

        calc = CalcPCK(
            all_kps,
            gt,
            num_imgs=cfg.DATASET_SIZES[args.dataset][args.crop_setting],
            log_dir=log_dir,
            dataset=args.dataset,
            crop_setting=args.crop_setting,
            pck_eval_threshold=args.pck_eval_threshold)
        calc.eval()