def __init__(self, prior_weight=1, mask_weight=1, step_size=1e-2, num_iters=50, use_mask=False, renderer=None, device=torch.device('cpu')): # Store optimization hyperparameters self.device = device self.step_size = step_size self.num_iters = num_iters self.prior_weight = prior_weight self.mask_weight = mask_weight self.use_mask = use_mask if use_mask: self.renderer = renderer # Load Bird Mesh Model and prior self.bird = bird_model(device=device) self.faces = torch.tensor(self.bird.dd['F']) self.p_m = self.bird.p_m self.b_m = self.bird.b_m self.p_cov_in = self.bird.p_cov.inverse() self.b_cov_in = self.bird.b_cov.inverse()
def __init__(self, lim_weight=1, prior_weight=1, bone_weight=1, step_size=1e-2, num_iters=100, device=torch.device('cpu'), mesh='bird_eccv.json'): # Store optimization hyperparameters self.device = device self.step_size = step_size self.num_iters = num_iters self.lim_weight = lim_weight self.prior_weight = prior_weight self.bone_weight = bone_weight # Load Bird Mesh Model self.bird = bird_model(device=device, mesh=mesh) self.faces = torch.tensor(self.bird.dd['F'])
def evaluate_crossview(device, use_mask=False): """ Function to evaluation single reconstruction through crossview validation """ # Models and optimizer bird = bird_model() predictor = load_detector().to(device) regressor = load_regressor().to(device) if args.use_mask: if device == 'cpu': print( 'Warning: using mask during optimization without GPU acceleration is very slow!' ) silhouette_renderer = base_renderer(size=256, focal=2167, device=device) optimizer = OptimizeSV(num_iters=100, prior_weight=1, mask_weight=1, use_mask=True, renderer=silhouette_renderer, device=device) print('Using mask for single view optimization') else: optimizer = OptimizeSV(num_iters=100, prior_weight=10, mask_weight=1, use_mask=False, device=device) # Dataset to run on normalize = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229]) ]) multiview_data = Multiview_Dataset() renderer_list = mutils.get_renderer_list(bird.dd['F']) IOU = [] PCK05 = [] PCK10 = [] # Run singleview reconstruction for i, sample in enumerate(multiview_data): print('Running on sample:', i + 1) frames = sample["frames"] img_filenames = sample["imgpaths"] keypoints = sample["keypoints"] masks = sample['masks'] bboxes = sample["bboxes"] dialated_bboxes = dialate_boxes(bboxes) imgs = [] masks_gt = [] kpts_gt = [] for j in range(len(frames)): box = dialated_bboxes[j] img = cv2.imread(img_filenames[j]) img = img[box[1]:box[1] + box[3], box[0]:box[0] + box[2]] img = cv2.resize(img, dsize=(256, 256)) img = normalize(img) imgs.append(img) imgs = torch.stack(imgs) with torch.no_grad(): # Prediction output = predictor(imgs.to(device)) pred_kpts, pred_mask = postprocess(output) # Regression kpts_in = pred_kpts.reshape(pred_kpts.shape[0], -1) mask_in = pred_mask p_est, b_est = regressor(kpts_in, mask_in) pose, tran, bone = regressor.postprocess(p_est, b_est) # Optimization ignored = pred_kpts[:, :, 2] < 0.3 opt_kpts = pred_kpts.clone() opt_kpts[ignored] = 0 pose_op, bone_op, tran_op, model_mesh = optimizer( pose, bone, tran, focal_length=2167, camera_center=128, keypoints=opt_kpts, masks=mask_in.squeeze(1)) # Global rigid alignment with reconstruction from each view for j in range(len(frames)): vertex_posed, mesh_keypoints \ = multiview.multiview_rigid_alignment(bird, pose_op[[j], 3:], bone_op[[j]], keypoints, frames, num_iters=100, device='cpu') proj_masks = multiview.reproject_masks(vertex_posed, renderer_list, frames) proj_kpts = multiview.reproject_keypoints(mesh_keypoints, frames) iou = evaluate_iou(proj_masks, masks) pck05, pck10 = evaluate_pck(proj_kpts, keypoints, bboxes) # Removed jth sample to accord with the published metrics: # "average across all non-source views" iou.pop(j) pck05.pop(j) pck10.pop(j) iou = torch.stack(iou).mean() pck05 = torch.stack(pck05).mean() pck10 = torch.stack(pck10).mean() IOU.append(iou) PCK05.append(pck05) PCK10.append(pck10) avg_PCK05 = torch.mean(torch.stack(PCK05)) avg_PCK10 = torch.mean(torch.stack(PCK10)) avg_IOU = torch.mean(torch.stack(IOU)) print('Average PCK05:', avg_PCK05) print('Average PCK10:', avg_PCK10) print('Average IOU:', avg_IOU)
def evaluate_singleview(root, annfile, device, use_mask=False): """ Function to evaluation singleview reconstruction """ # Models and optimizer bird = bird_model() predictor = load_detector().to(device) regressor = load_regressor().to(device) if args.use_mask: if device == 'cpu': print('Warning: using mask during optimization without GPU acceleration is very slow!') silhouette_renderer = base_renderer(size=256, focal=2167, device=device) optimizer = OptimizeSV(num_iters=100, prior_weight=1, mask_weight=1, use_mask=True, renderer=silhouette_renderer, device=device) print('Using mask for single view optimization') else: optimizer = OptimizeSV(num_iters=100, prior_weight=1, mask_weight=1, use_mask=False, device=device) # Dataset to run on normalize = T.Compose([ T.ToTensor(), T.Normalize(mean=[0.406, 0.456, 0.485], std=[0.225, 0.224, 0.229]) ]) dataset = Cowbird_Dataset(root=root, annfile=annfile, scale_factor=0.25, transform=normalize) loader = torch.utils.data.DataLoader(dataset, batch_size=30) Pose_, Tran_, Bone_ = [], [], [] GT_kpts, GT_masks, Sizes = [], [], [] # Run reconstruction for i, (imgs, gt_kpts, gt_masks, meta) in enumerate(loader): print('Running on batch:', i+1) with torch.no_grad(): # Prediction output = predictor(imgs.to(device)) pred_kpts, pred_mask = postprocess(output) # Regression kpts_in = pred_kpts.reshape(pred_kpts.shape[0], -1) mask_in = pred_mask p_est, b_est = regressor(kpts_in, mask_in) pose, tran, bone = regressor.postprocess(p_est, b_est) # Optimization ignored = pred_kpts[:, :, 2] < 0.3 opt_kpts = pred_kpts.clone() opt_kpts[ignored] = 0 pose_op, bone_op, tran_op, model_mesh = optimizer(pose, bone, tran, focal_length=2167, camera_center=128, keypoints=opt_kpts, masks=mask_in.squeeze(1)) Pose_.append(pose_op) Tran_.append(tran_op) Bone_.append(bone_op) GT_kpts.append(gt_kpts) GT_masks.append(gt_masks) Sizes.append(meta['size']) Pose_ = torch.cat(Pose_) Tran_ = torch.cat(Tran_) Bone_ = torch.cat(Bone_) GT_kpts = torch.cat(GT_kpts) GT_masks = torch.cat(GT_masks) Sizes = torch.cat(Sizes) # Render reprojected kpts and masks kpts_3d, vertices = pose_bird(bird, Pose_[:,:3], Pose_[:,3:], Bone_, Tran_, pose2rot=True) kpts_2d = perspective_projection(kpts_3d, None, None, focal_length=2167, camera_center=128) faces = torch.tensor(bird.dd['F']) masks = [] mask_renderer = Silhouette_Renderer(focal_length=2167, center=(128,128), img_w=256, img_h=256) for i in range(len(vertices)): m = mask_renderer(vertices[i], faces) masks.append(m) masks = torch.tensor(np.stack(masks)).long() # Evaluation PCK05, PCK10 = evaluate_pck(kpts_2d[:,:12,:], GT_kpts, size=Sizes) IOU = evaluate_iou(masks, GT_masks) avg_PCK05 = torch.mean(torch.stack(PCK05)) avg_PCK10 = torch.mean(torch.stack(PCK10)) avg_IOU = torch.mean(torch.stack(IOU)) print('Average PCK05:', avg_PCK05) print('Average PCK10:', avg_PCK10) print('Average IOU:', avg_IOU)
default=0, help='Index in the dataset for example reconstruction') parser.add_argument('--use_mask', action='store_true', help='Whether masks are used in optimization') parser.add_argument('--outdir', type=str, default='examples', help='Folder for output images') if __name__ == '__main__': args = parser.parse_args() # Load model and optimizer device = 'cuda' if torch.cuda.is_available() else 'cpu' bird = bird_model() predictor = load_detector().to(device) regressor = load_regressor().to(device) print('Device used:', device) if args.use_mask: if device == 'cpu': print( 'Warning: using mask during optimization without GPU acceleration is very slow!' ) silhouette_renderer = base_renderer(size=256, focal=2167, device=device) optimizer = OptimizeSV(num_iters=100, prior_weight=1, mask_weight=1,