Beispiel #1
0
    def forward(self, output, target, target_weight):
        prediction, _ = soft_arg_max(output)
        # normalize the coordinates to 0-1
        prediction[:, :, 0] /= self.image_size[1]
        prediction[:, :, 1] /= self.image_size[0]
        target[:, :, 0] /= self.image_size[1]
        target[:, :, 1] /= self.image_size[0]
        diff = target - prediction
        diff_abs = diff.abs()
        loss = diff_abs.clone()

        idx_smaller = diff_abs < self.width
        idx_bigger = diff_abs >= self.width

        loss[idx_smaller] = self.width * torch.log(1 + diff_abs[idx_smaller] /
                                                   self.curvature)
        loss[idx_bigger] = loss[idx_bigger] - self.C
        loss = loss.mean()
        return loss
Beispiel #2
0
 def forward(self, output, target, target_weight=None, meta=None):
     """
     Loss evaluation.
     Output is in the format of (heatmaps, coordinates) where coordinates
     is optional.
     target refers to the ground truth heatmaps.
     """
     if type(output) is tuple:
         heatmaps_pred, coordinates_pred = output
     else:
         heatmaps_pred, coordinates_pred = output, None
     total_loss = 0
     if 'hm' in self.comp_dict:
         # some heatmaps map be produced by unlabeled data
         if len(heatmaps_pred) != len(target):
             heatmaps_pred = heatmaps_pred[:len(target)]
         total_loss += self.calc_hm_loss(heatmaps_pred,
                                         target) * self.comp_dict['hm'][1]
     if 'coor' in self.comp_dict:
         coordinates_gt = meta['transformed_joints'][:, :, :2].astype(
             np.float32)
         coordinates_gt = torch.from_numpy(coordinates_gt).cuda()
         if coordinates_pred == None:
             coordinates_pred, max_vals = soft_arg_max(heatmaps_pred)
             coordinates_pred[:, :, 0] /= self.hm_size[1]
             coordinates_pred[:, :, 1] /= self.hm_size[0]
         if len(coordinates_pred) != len(coordinates_gt):
             coordinates_pred_fs = coordinates_pred[:len(coordinates_gt)]
         else:
             coordinates_pred_fs = coordinates_pred
         total_loss += self.calc_coor_loss(
             coordinates_pred_fs,
             coordinates_gt) * self.comp_dict['coor'][1]
     if 'cr' in self.comp_dict and self.comp_dict['cr'][
             1] != "None" and self.apply_cr_loss:
         cr_loss_mask = self.get_cr_mask(
             coordinates_pred.clone().detach().data.cpu().numpy(),
             self.cr_loss_thres)
         total_loss += self.calc_cross_ratio_loss(
             coordinates_pred, self.target_cr,
             cr_loss_mask) * self.comp_dict['cr'][1]
     return total_loss
Beispiel #3
0
def get_distance_src(output,
                     meta_data,
                     cfgs=None,
                     image_size = (256.0, 256.0),
                     arg_max='hard'
                     ):
    """
    From predicted heatmaps, obtain local coordinates (\phi_l in the paper) 
    and transform them back to the source images based on metadata. 
    Error is then evaluated on the source image for the screen coordinates 
    (\phi_g in the paper).
    """
    # the error is reported as distance in terms of pixels in the source image
    if type(output) is tuple:
        pred, max_vals = output[1].data.cpu().numpy(), None
    elif isinstance(output, np.ndarray) and arg_max == 'soft':
        pred, max_vals = lip.soft_arg_max_np(output)
    elif isinstance(output, torch.Tensor) and arg_max == 'soft': 
        pred, max_vals = lip.soft_arg_max(output)
    elif isinstance(output, np.ndarray) or isinstance(output, torch.Tensor) and arg_max == 'hard':
        if not isinstance(output, np.ndarray):
            output = output.data.cpu().numpy()        
        pred, max_vals = lip.get_max_preds(output)
    else:
        raise NotImplementedError
    image_size = image_size if cfgs is None else cfgs['heatmapModel']['input_size']
    width, height = image_size
    # multiply by down-sample ratio
    if not isinstance(pred, np.ndarray):
        pred = pred.data.cpu().numpy()
    if (max_vals is not None) and (not isinstance(max_vals, np.ndarray)):
        max_vals = max_vals.data.cpu().numpy()
    # the coordinates need to be rescaled for different cases
    if type(output) is tuple:
        pred *= np.array(image_size).reshape(1, 1, 2)
    else:
        pred *= image_size[0] / output.shape[3]
    # inverse transform and compare pixel didstance
    centers, scales = meta_data['center'], meta_data['scale']
    # some predictions are generated for unlabeled data
    if len(pred) != len(centers):
        pred_used = pred[:len(centers)]
    else:
        pred_used = pred
    if 'rotation' in meta_data:
        rots = meta_data['rotation']
    else:
        rots = [0. for i in range(len(centers))]
    joints_original_batch = meta_data['original_joints']
    distance_list = []
    correct_cnt_sum = np.zeros((len(PCK_THRES)))
    all_src_coordinates = []
    for sample_idx in range(len(pred_used)):
        trans_inv = lip.get_affine_transform(centers[sample_idx], 
                                             scales[sample_idx], 
                                             rots[sample_idx], 
                                             (height, width), 
                                             inv=1
                                             )
        joints_original = joints_original_batch[sample_idx]        
        pred_src_coordinates = lip.affine_transform_modified(pred_used[sample_idx], 
                                                             trans_inv
                                                             ) 
        all_src_coordinates.append(pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2))
        distance_list += get_distance(joints_original, pred_src_coordinates)
        correct_cnt_sum += get_PCK(pred_src_coordinates, joints_original)
    cnt = len(distance_list)
    avg_acc = sum(distance_list) / cnt
    others = {
        'src_coord': np.concatenate(all_src_coordinates, axis=0), # screen coordinates
        'joints_pred': pred, # predicted local coordinates
        'max_vals': max_vals, 
        'correct_cnt': correct_cnt_sum,
        'PCK_batch': correct_cnt_sum / cnt
        }
    return avg_acc, cnt, others