def get_final_preds_by_softmaxed_aggregation(config, batch_heatmaps, center, scale, temperature=1.0): batch_size = batch_heatmaps.shape[0] num_joints = batch_heatmaps.shape[1] height = batch_heatmaps.shape[2] width = batch_heatmaps.shape[3] heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) maxvals = np.amax(heatmaps_reshaped, 2) maxvals = maxvals.reshape((batch_size, num_joints, 1)) # sm_vals = softmax(heatmaps_reshaped, axis=2) sm_vals = gumbel_softmax(heatmaps_reshaped, axis=2, t=temperature) sm_vals = sm_vals.reshape((batch_size, num_joints, height, width)) hs = np.linspace(0, height-1, height).reshape((height, 1)) yvals = sm_vals*hs y = yvals.reshape((batch_size, num_joints, -1)).sum(axis=2) ws = np.linspace(0, width - 1, width).reshape((1, width)) xvals = sm_vals * ws x = xvals.reshape((batch_size, num_joints, -1)).sum(axis=2) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] coords = np.stack([x,y], axis=2) preds = np.zeros_like(coords) for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def decode_preds(output, center, scale, res): coords = get_preds(output) # float type coords = coords.cpu() # pose-processing for n in range(coords.size(0)): for p in range(coords.size(1)): hm = output[n][p] px = int(math.floor(coords[n][p][0])) py = int(math.floor(coords[n][p][1])) if (px > 1) and (px < res[0]) and (py > 1) and (py < res[1]): diff = torch.Tensor([ hm[py - 1][px] - hm[py - 1][px - 2], hm[py][px - 1] - hm[py - 2][px - 1] ]) coords[n][p] += diff.sign() * .25 coords += 0.5 preds = coords.clone() # Transform back for i in range(coords.size(0)): preds[i] = transform_preds(coords[i], center[i], scale[i], res) if preds.dim() < 3: preds = preds.view(1, preds.size()) return preds
def get_final_preds(config, batch_heatmaps, center, scale): coords, maxvals = get_max_preds(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing if config.TEST.POST_PROCESS: for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(math.floor(coords[n][p][0] + 0.5)) py = int(math.floor(coords[n][p][1] + 0.5)) if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: diff = np.array( [ hm[py][px+1] - hm[py][px-1], hm[py+1][px]-hm[py-1][px] ] ) coords[n][p] += np.sign(diff) * .25 preds = coords.copy() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds( coords[i], center[i], scale[i], [heatmap_width, heatmap_height] ) return preds, maxvals
def get_final_preds_using_softargmax(config, batch_heatmaps, center, scale): soft_argmax = SoftArgmax2D(config.MODEL.HEATMAP_SIZE[1], config.MODEL.HEATMAP_SIZE[0], beta=160) coords, maxvals = soft_argmax(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] batch_heatmaps = batch_heatmaps.cpu().numpy() # post-processing if config.TEST.POST_PROCESS: for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(math.floor(coords[n][p][0] + 0.5)) py = int(math.floor(coords[n][p][1] + 0.5)) if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: diff = np.array([ hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px] ]) coords[n][p] += np.sign(diff) * .25 preds = coords.copy() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def final_preds(output, center, scale, res): coords = get_preds(output) # BxPx2 float type # pose-processing (불안정한 좌표를 약간 옮기는 과정. 약간의 성능 향상이 존재함) for b in range(coords.size(0)): for p in range(coords.size(1)): hm = output[b][p] # 64x64 px = int(math.floor(coords[b][p][0])) py = int(math.floor(coords[b][p][1])) if 1 < px < res[0] - 1 and 1 < py < res[1] - 1: # px, py is 1~64 # this means [hm[y][x+1]-hm[y][x-1], hm[y+1][x] - hm[y-1][x]] diff = torch.Tensor([ hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px] ]).to(output.device) coords[b][p] += diff.sign( ) * .25 # +0.25 or -0.25 # shifting operation? preds = coords.clone() # Transform back for i in range(coords.size(0)): # batch preds[i] = transform_preds(coords[i], center[i], scale[i], res) # BxPx2 if preds.dim() < 3: preds = preds.view(1, preds.size()) return preds
def heatmap2coord(heatmap, center, scale, k=9): N, C, H, W = heatmap.shape score, index = heatmap.view(N, C, 1, H * W).topk(k, dim=-1) coord = torch.cat([index % W, index // W], 2).float() coord = (coord * F.softmax(score, dim=-1)).sum(-1) preds = coord.cpu().numpy() for i in range(len(coord)): preds[i] = transform_preds(preds[i], center[i], scale[i], [W, H]) return preds, score[..., 0].cpu().numpy()
def get_final_preds(config, hm, center, scale, mode="DAEC"): """ this function calculates maximum coordinates of heatmap """ mode = config.TEST.DECODE_MODE assert mode in ["STANDARD", "SHIFTING", "DARK", "DAEC"] coords, maxvals = get_max_preds(hm) heatmap_height = hm.shape[2] heatmap_width = hm.shape[3] # post-processing if mode in ["SHIFTING", "DARK", "DAEC"]: if mode == "SHIFTING": for n in range(coords.shape[0]): for p in range(coords.shape[1]): y, x = coords[n, p] hm[n, p, int(x), int(y)] = 1e-10 coords_2nd, _ = get_max_preds(hm) for n in range(coords.shape[0]): for p in range(coords.shape[1]): y, x = coords[n, p] y2, x2 = coords_2nd[n, p] dist = np.sqrt((y - y2) * (y - y2) + (x - x2) * (x - x2)) y = y + 0.25 * (y2 - y) / dist x = x + 0.25 * (x2 - x) / dist coords[n, p] = y, x if mode == "DARK": hm = gaussian_blur(hm, config.TEST.BLUR_KERNEL) hm = np.maximum(hm, 1e-10) hm = np.log(hm) for n in range(coords.shape[0]): for p in range(coords.shape[1]): coords[n, p] = taylor(hm[n][p], coords[n][p]) if mode == "DAEC": hm = np.maximum(hm, 1e-10) for n in range(coords.shape[0]): for p in range(coords.shape[1]): x, y = coords[n, p] heat = hm[n, p] x, y = calibrate_coord_with_DAEC(int(x), int(y), heat, config) coords[n, p] = x, y preds = coords.copy() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def get_final_points(coords, center, scale, rot, scoremap_width, scoremap_height): preds = coords.copy() #import pdb #pdb.set_trace() for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], rot, [scoremap_width * 4, scoremap_height * 4]) return preds
def get_original_gts(config, output, center, scale): heatmap_height = config.MODEL.HEATMAP_SIZE[1] heatmap_width = config.MODEL.HEATMAP_SIZE[0] coords, maxvals = output, 1 gts = coords.copy() # Transform back for i in range(coords.shape[0]): gts[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return gts
def get_final_preds(config, batch_heatmaps, center, scale): coords, maxvals = get_max_preds(batch_heatmaps) heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing if config.TEST.POST_PROCESS: for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(math.floor(coords[n][p][0] + 0.5)) py = int(math.floor(coords[n][p][1] + 0.5)) if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: diff = np.array([ hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px] ]) coords[n][p] += np.sign(diff) * .25 preds = coords.copy() # re-org center and scale # ceter_flat = [] # scale_flat = [] # nview = len(center) # batch = len(center[0]) # for nv in range(nview): # for b in range(batch): # ceter_flat.append(center[nv][b].cpu().numpy()) # scale_flat.append(scale[nv][b].cpu().numpy()) if isinstance(center, (list, tuple)): pass else: center = center.cpu().numpy() scale = scale.cpu().numpy() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def get_final_preds(config, output, center, scale, coord_heatmaps=None): heatmap_height = config.MODEL.HEATMAP_SIZE[1] heatmap_width = config.MODEL.HEATMAP_SIZE[0] if config.MODEL.TARGET_TYPE == 'gaussian': batch_heatmaps = output coords, maxvals = get_max_preds(batch_heatmaps) # post-processing if config.TEST.POST_PROCESS: for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = batch_heatmaps[n][p] px = int(math.floor(coords[n][p][0] + 0.5)) py = int(math.floor(coords[n][p][1] + 0.5)) if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: diff = np.array([ hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px] ]) coords[n][p] += np.sign(diff) * .25 elif config.MODEL.TARGET_TYPE == 'coordinate': coords = output batch_size, num_kpoints, _ = coords.shape idx = np.round(coords.reshape(-1, 2)).astype(np.int) coord_heatmaps = coord_heatmaps.reshape(-1, heatmap_height, heatmap_width) maxvals = [] for i, heatmap in enumerate(coord_heatmaps): maxvals.append(heatmap[idx[i][1], idx[i][0]]) maxvals = np.array(maxvals).reshape(batch_size, num_kpoints, 1) preds = coords.copy() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def get_final_preds(config, hm, center, scale): coords, maxvals = get_max_preds(hm) heatmap_height = hm.shape[2] heatmap_width = hm.shape[3] # post-processing hm = gaussian_blur(hm, config.TEST.BLUR_KERNEL) hm = np.maximum(hm, 1e-10) hm = np.log(hm) for n in range(coords.shape[0]): for p in range(coords.shape[1]): coords[n, p] = taylor(hm[n][p], coords[n][p]) preds = coords.copy() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals
def get_final_preds_match(config, outputs, center, scale, flip_pairs=None): pred_logits = outputs['pred_logits'].detach().cpu() pred_coords = outputs['pred_coords'].detach().cpu() num_joints = pred_logits.shape[-1] - 1 if config.TEST.INCLUDE_BG_LOGIT: prob = F.softmax(pred_logits, dim=-1)[..., :-1] else: prob = F.softmax(pred_logits[..., :-1], dim=-1) score_holder = [] coord_holder = [] orig_coord = [] for b, C in enumerate(prob): _, query_ind = linear_sum_assignment( -C.transpose(0, 1)) # Cost Matrix: [17, N] score = prob[b, query_ind, list(np.arange(num_joints))][..., None].numpy() pred_raw = pred_coords[b, query_ind].numpy() if flip_pairs is not None: pred_raw, score = fliplr_joints(pred_raw, score, 1, flip_pairs, pixel_align=False, is_vis_logit=True) # scale to the whole patch pred_raw *= np.array(config.MODEL.IMAGE_SIZE) # transform back w.r.t. the entire img pred = transform_preds(pred_raw, center[b], scale[b], config.MODEL.IMAGE_SIZE) orig_coord.append(pred_raw) score_holder.append(score) coord_holder.append(pred) matched_score = np.stack(score_holder) matched_coord = np.stack(coord_holder) return matched_coord, matched_score, np.stack(orig_coord)
def get_final_preds(config, batch_heatmaps, center, scale): heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] if config.MODEL.TARGET_TYPE == 'gaussian': coords, maxvals = get_max_preds(batch_heatmaps) if config.TEST.POST_PROCESS: coords = post(coords,batch_heatmaps) elif config.MODEL.TARGET_TYPE == 'offset': net_output = batch_heatmaps.copy() kps_pos_distance_x = config.LOSS.KPD kps_pos_distance_y = config.LOSS.KPD batch_heatmaps = net_output[:,::3,:] offset_x = net_output[:,1::3,:] * kps_pos_distance_x offset_y = net_output[:,2::3,:] * kps_pos_distance_y for i in range(batch_heatmaps.shape[0]): for j in range(batch_heatmaps.shape[1]): batch_heatmaps[i,j,:,:] = cv2.GaussianBlur(batch_heatmaps[i,j,:,:],(15, 15), 0) offset_x[i,j,:,:] = cv2.GaussianBlur(offset_x[i,j,:,:],(7, 7), 0) offset_y[i,j,:,:] = cv2.GaussianBlur(offset_y[i,j,:,:],(7, 7), 0) coords, maxvals = get_max_preds(batch_heatmaps) for n in range(coords.shape[0]): for p in range(coords.shape[1]): px = int(coords[n][p][0]) py = int(coords[n][p][1]) coords[n][p][0] += offset_x[n,p,py,px] coords[n][p][1] += offset_y[n,p,py,px] preds = coords.copy() preds_in_input_space = preds.copy() preds_in_input_space[:,:, 0] = preds_in_input_space[:,:, 0] / (heatmap_width - 1.0) * (4 * heatmap_width - 1.0) preds_in_input_space[:,:, 1] = preds_in_input_space[:,:, 1] / (heatmap_height - 1.0) * (4 * heatmap_height - 1.0) # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds( coords[i], center[i], scale[i], [heatmap_width, heatmap_height] ) return preds, maxvals, preds_in_input_space
def get_final_preds(config, batch_heatmaps, center, scale): heatmap_height = batch_heatmaps.shape[2] heatmap_width = batch_heatmaps.shape[3] # post-processing if config.TEST.POST_PROCESS: preds, maxval = get_max_preds(batch_heatmaps) # Transform back for i in range(preds.shape[0]): preds[i] = transform_preds(preds[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxval # def get_max_preds(batch_heatmaps): # ''' # get predictions from score maps # heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) # ''' # assert isinstance(batch_heatmaps, np.ndarray), \ # 'batch_heatmaps should be numpy.ndarray' # assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim' # # batch_size = batch_heatmaps.shape[0] # num_joints = batch_heatmaps.shape[1] # width = batch_heatmaps.shape[3] # heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1)) # idx = np.argmax(heatmaps_reshaped, 2) # maxvals = np.amax(heatmaps_reshaped, 2) # # maxvals = maxvals.reshape((batch_size, num_joints, 1)) # idx = idx.reshape((batch_size, num_joints, 1)) # # preds = np.tile(idx, (1, 1, 2)).astype(np.float32) # # preds[:, :, 0] = (preds[:, :, 0]) % width # preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) # # pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) # pred_mask = pred_mask.astype(np.float32) # # preds *= pred_mask # return preds, maxvals # # # def get_final_preds(config, batch_heatmaps, center, scale): # coords, maxvals = get_max_preds(batch_heatmaps) # # heatmap_height = batch_heatmaps.shape[2] # heatmap_width = batch_heatmaps.shape[3] # # # post-processing # if config.TEST.POST_PROCESS: # for n in range(coords.shape[0]): # for p in range(coords.shape[1]): # hm = batch_heatmaps[n][p] # px = int(math.floor(coords[n][p][0] + 0.5)) # py = int(math.floor(coords[n][p][1] + 0.5)) # if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1: # diff = np.array([hm[py][px+1] - hm[py][px-1], # hm[py+1][px]-hm[py-1][px]]) # coords[n][p] += np.sign(diff) * .25 # # preds = coords.copy() # # # Transform back # for i in range(coords.shape[0]): # preds[i] = transform_preds(coords[i], center[i], scale[i], # [heatmap_width, heatmap_height]) # # return preds, maxvals
def run_evaluation(model, dataset_name, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, log_freq=50, options=None): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR, create_transl=False).to(device) smpl_male = SMPL(path_config.SMPL_MODEL_DIR, gender='male', create_transl=False).to(device) smpl_female = SMPL(path_config.SMPL_MODEL_DIR, gender='female', create_transl=False).to(device) renderer = PartRenderer() # Regressor for H36m joints J_regressor = torch.from_numpy(np.load(path_config.JOINT_REGRESSOR_H36M)).float() save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle = False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) fits_dict = None # Pose metrics # MPJPE and Reconstruction error for the non-parametric and parametric shapes mpjpe = np.zeros(len(dataset)) recon_err = np.zeros(len(dataset)) mpjpe_smpl = np.zeros(len(dataset)) recon_err_smpl = np.zeros(len(dataset)) # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) # joint_mapper_coco = constants.H36M_TO_JCOCO joint_mapper_gt = constants.J24_TO_JCOCO focal_length = 5000 num_joints = 17 num_samples = len(dataset) print('dataset length: {}'.format(num_samples)) all_preds = np.zeros( (num_samples, num_joints, 3), dtype=np.float32 ) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): for step, batch in enumerate(tqdm(data_loader, desc='Eval', total=len(data_loader))): if len(options.vis_imname) > 0: imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']] name_hit = False for i_n in imgnames: if options.vis_imname in i_n: name_hit = True print('vis: ' + i_n) if not name_hit: continue images = batch['img'].to(device) scale = batch['scale'].numpy() center = batch['center'].numpy() num_images = images.size(0) gt_keypoints_2d = batch['keypoints'] # 2D keypoints # De-normalize 2D keypoints from [-1,1] to pixel space gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * img_res * (gt_keypoints_2d_orig[:, :, :-1] + 1) if options.regressor == 'hmr': pred_rotmat, pred_betas, pred_camera = model(images) # torch.Size([32, 24, 3, 3]) torch.Size([32, 10]) torch.Size([32, 3]) elif options.regressor == 'pymaf_net': preds_dict, _ = model(images) pred_rotmat = preds_dict['smpl_out'][-1]['rotmat'].contiguous().view(-1, 24, 3, 3) pred_betas = preds_dict['smpl_out'][-1]['theta'][:, 3:13].contiguous() pred_camera = preds_dict['smpl_out'][-1]['theta'][:, :3].contiguous() pred_output = smpl_neutral(betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) # pred_vertices = pred_output.vertices pred_J24 = pred_output.joints[:, -24:] pred_JCOCO = pred_J24[:, constants.J24_TO_JCOCO] # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size # This camera translation can be used in a full perspective projection pred_cam_t = torch.stack([pred_camera[:,1], pred_camera[:,2], 2*constants.FOCAL_LENGTH/(img_res * pred_camera[:, 0] +1e-9)],dim=-1) camera_center = torch.zeros(len(pred_JCOCO), 2, device=pred_camera.device) pred_keypoints_2d = perspective_projection(pred_JCOCO, rotation=torch.eye(3, device=pred_camera.device).unsqueeze(0).expand(len(pred_JCOCO), -1, -1), translation=pred_cam_t, focal_length=constants.FOCAL_LENGTH, camera_center=camera_center) coords = pred_keypoints_2d + (img_res / 2.) coords = coords.cpu().numpy() gt_keypoints_coco = gt_keypoints_2d_orig[:, -24:][:, constants.J24_TO_JCOCO] vert_errors_batch = [] for i, (gt2d, pred2d) in enumerate(zip(gt_keypoints_coco.cpu().numpy(), coords.copy())): vert_error = np.sqrt(np.sum((gt2d[:, :2] - pred2d[:, :2]) ** 2, axis=1)) vert_error *= gt2d[:, 2] vert_mean_error = np.sum(vert_error) / np.sum(gt2d[:, 2] > 0) vert_errors_batch.append(10 * vert_mean_error) if options.vis_demo: imgnames = [i_n.split('/')[-1] for i_n in batch['imgname']] if options.regressor == 'hmr': iuv_pred = None images_vis = images * torch.tensor([0.229, 0.224, 0.225], device=images.device).reshape(1, 3, 1, 1) images_vis = images_vis + torch.tensor([0.485, 0.456, 0.406], device=images.device).reshape(1, 3, 1, 1) vis_smpl_iuv(images_vis.cpu().numpy(), pred_camera.cpu().numpy(), pred_output.vertices.cpu().numpy(), smpl_neutral.faces, iuv_pred, vert_errors_batch, imgnames, os.path.join('./notebooks/output/demo_results', dataset_name, options.checkpoint.split('/')[-3]), options) preds = coords.copy() scale_ = np.array([scale, scale]).transpose() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds( coords[i], center[i], scale_[i], [img_res, img_res] ) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = 1. all_boxes[idx:idx + num_images, 5] = 1. image_path.extend(batch['imgname']) idx += num_images if len(options.vis_imname) > 0: exit() if args.checkpoint is None or 'model_checkpoint.pt' in args.checkpoint: ckp_name = 'spin_model' else: ckp_name = args.checkpoint.split('/') ckp_name = ckp_name[2].split('_')[1] + '_' + ckp_name[-1].split('.')[0] name_values, perf_indicator = dataset.evaluate( cfg, all_preds, options.output_dir, all_boxes, image_path, ckp_name, filenames, imgnums ) model_name = options.regressor if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera)
intergral_preds = np.stack((w_coordinates, h_coordinates), axis=2) # [8860, 16, 2] test_dataset = eval('dataset.' + config.DATASET.TEST_DATASET)( config, config.DATASET.TEST_SUBSET, False) # get center and scale center = [] scale = [] for items in test_dataset.grouping: for item in items: center.append(np.array(test_dataset.db[item]['center'])) scale.append(np.array(test_dataset.db[item]['scale'])) assert len(center) == len(intergral_preds) all_preds = np.zeros_like(intergral_preds) # [8860, 16, 2] # Transform back for i in range(all_preds.shape[0]): all_preds[i] = transform_preds(intergral_preds[i], center[i], scale[i], [heatmaps.shape[3], heatmaps.shape[2]]) name_value, perf_indicator = test_dataset.evaluate(all_preds, None) names = name_value.keys() values = name_value.values() num_values = len(name_value) print('| Arch ' + ' '.join(['| {}'.format(name) for name in names]) + ' |') print('|---' * (num_values + 1) + '|') print('| ' + 'multiview_pose_resnet50X256' + ' ' + ' '.join(['| {:.3f}'.format(value) for value in values]) + ' |')
def run_evaluation(model, dataset, result_file, batch_size=32, img_res=224, num_workers=32, shuffle=False, options=None): """Run evaluation on the datasets and metrics we report in the paper. """ device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # Transfer model to the GPU model.to(device) # Load SMPL model smpl_neutral = SMPL(path_config.SMPL_MODEL_DIR, create_transl=False).to(device) save_results = result_file is not None # Disable shuffling if you want to save the results if save_results: shuffle = False # Create dataloader for the dataset data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) # Store SMPL parameters smpl_pose = np.zeros((len(dataset), 72)) smpl_betas = np.zeros((len(dataset), 10)) smpl_camera = np.zeros((len(dataset), 3)) pred_joints = np.zeros((len(dataset), 17, 3)) num_joints = 17 num_samples = len(dataset) print('dataset length: {}'.format(num_samples)) all_preds = np.zeros((num_samples, num_joints, 3), dtype=np.float32) all_boxes = np.zeros((num_samples, 6)) image_path = [] filenames = [] imgnums = [] idx = 0 with torch.no_grad(): end = time.time() for step, batch in enumerate( tqdm(data_loader, desc='Eval', total=len(data_loader))): images = batch['img'].to(device) scale = batch['scale'].numpy() center = batch['center'].numpy() num_images = images.size(0) gt_keypoints_2d = batch['keypoints'] # 2D keypoints # De-normalize 2D keypoints from [-1,1] to pixel space gt_keypoints_2d_orig = gt_keypoints_2d.clone() gt_keypoints_2d_orig[:, :, :-1] = 0.5 * img_res * ( gt_keypoints_2d_orig[:, :, :-1] + 1) if options.regressor == 'hmr': pred_rotmat, pred_betas, pred_camera = model(images) elif options.regressor == 'danet': danet_pred_dict = model.infer_net(images) para_pred = danet_pred_dict['para'] pred_camera = para_pred[:, 0:3].contiguous() pred_betas = para_pred[:, 3:13].contiguous() pred_rotmat = para_pred[:, 13:].contiguous().view(-1, 24, 3, 3) pred_output = smpl_neutral( betas=pred_betas, body_pose=pred_rotmat[:, 1:], global_orient=pred_rotmat[:, 0].unsqueeze(1), pose2rot=False) # pred_vertices = pred_output.vertices pred_J24 = pred_output.joints[:, -24:] pred_JCOCO = pred_J24[:, constants.J24_TO_JCOCO] # Convert Weak Perspective Camera [s, tx, ty] to camera translation [tx, ty, tz] in 3D given the bounding box size # This camera translation can be used in a full perspective projection pred_cam_t = torch.stack([ pred_camera[:, 1], pred_camera[:, 2], 2 * constants.FOCAL_LENGTH / (img_res * pred_camera[:, 0] + 1e-9) ], dim=-1) camera_center = torch.zeros(len(pred_JCOCO), 2, device=pred_camera.device) pred_keypoints_2d = perspective_projection( pred_JCOCO, rotation=torch.eye( 3, device=pred_camera.device).unsqueeze(0).expand( len(pred_JCOCO), -1, -1), translation=pred_cam_t, focal_length=constants.FOCAL_LENGTH, camera_center=camera_center) coords = pred_keypoints_2d + (img_res / 2.) coords = coords.cpu().numpy() # Normalize keypoints to [-1,1] # pred_keypoints_2d = pred_keypoints_2d / (img_res / 2.) gt_keypoints_coco = gt_keypoints_2d_orig[:, -24:][:, constants. J24_TO_JCOCO] preds = coords.copy() scale_ = np.array([scale, scale]).transpose() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale_[i], [img_res, img_res]) all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2] all_preds[idx:idx + num_images, :, 2:3] = 1. # double check this all_boxes parts all_boxes[idx:idx + num_images, 0:2] = center[:, 0:2] all_boxes[idx:idx + num_images, 2:4] = scale_[:, 0:2] all_boxes[idx:idx + num_images, 4] = np.prod(scale_ * 200, 1) all_boxes[idx:idx + num_images, 5] = 1. image_path.extend(batch['imgname']) idx += num_images ckp_name = options.regressor name_values, perf_indicator = dataset.evaluate(all_preds, options.output_dir, all_boxes, image_path, ckp_name, filenames, imgnums) model_name = options.regressor if isinstance(name_values, list): for name_value in name_values: _print_name_value(name_value, model_name) else: _print_name_value(name_values, model_name) # Save reconstructions to a file for further processing if save_results: np.savez(result_file, pred_joints=pred_joints, pose=smpl_pose, betas=smpl_betas, camera=smpl_camera)