def main(opt): model = build_model(opt) model = model.cuda() weights = torch.load(opt.load_weights_path, map_location='cpu')['model_state_dict'] utils.safe_load_weights(model, weights) model = model.eval() img_a = imageio.imread('./sample_data/imgs/cathedral_1.jpg', pilmode='RGB') img_b = imageio.imread('./sample_data/imgs/cathedral_2.jpg', pilmode='RGB') engine = SparseEngine(model, 32, mode='tile') t0 = time.time() corrs = engine.cotr_corr_multiscale_with_cycle_consistency( img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, max_corrs=opt.max_corrs, queries_a=None) t1 = time.time() utils.visualize_corrs(img_a, img_b, corrs) print(f'spent {t1-t0} seconds for {opt.max_corrs} correspondences.') dense = triangulate_corr(corrs, img_a.shape, img_b.shape) warped = cv2.remap(img_b, dense[..., 0].astype(np.float32), dense[..., 1].astype(np.float32), interpolation=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT) plt.imshow(warped / 255 * 0.5 + img_a / 255 * 0.5) plt.show()
def main(opt): model = build_model(opt) model = model.cuda() weights = torch.load(opt.load_weights_path, map_location='cpu')['model_state_dict'] utils.safe_load_weights(model, weights) model = model.eval() img_a = imageio.imread('./sample_data/imgs/face_1.png', pilmode='RGB') img_b = imageio.imread('./sample_data/imgs/face_2.png', pilmode='RGB') queries = np.load('./sample_data/face_landmarks.npy')[0] engine = SparseEngine(model, 32, mode='stretching') corrs = engine.cotr_corr_multiscale(img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, queries_a=queries, force=False) f, axarr = plt.subplots(1, 2) axarr[0].imshow(img_a) axarr[0].scatter(*queries.T, s=1) axarr[0].title.set_text('Reference Face') axarr[0].axis('off') axarr[1].imshow(img_b) axarr[1].scatter(*corrs[:, 2:].T, s=1) axarr[1].title.set_text('Target Face') axarr[1].axis('off') plt.show()
def main(opt): model = build_model(opt) model = model.cuda() weights = torch.load(opt.load_weights_path)['model_state_dict'] utils.safe_load_weights(model, weights) model = model.eval() img_a = imageio.imread('./sample_data/imgs/21526113_4379776807.jpg') img_b = imageio.imread('./sample_data/imgs/21126421_4537535153.jpg') kp_a = np.load('./sample_data/21526113_4379776807.jpg.disk.kpts.npy') kp_b = np.load('./sample_data/21126421_4537535153.jpg.disk.kpts.npy') if opt.faster_infer: engine = FasterSparseEngine(model, 32, mode='tile') else: engine = SparseEngine(model, 32, mode='tile') t0 = time.time() corrs_a_b = engine.cotr_corr_multiscale(img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, max_corrs=kp_a.shape[0], queries_a=kp_a, force=True) corrs_b_a = engine.cotr_corr_multiscale(img_b, img_a, np.linspace(0.5, 0.0625, 4), 1, max_corrs=kp_b.shape[0], queries_a=kp_b, force=True) t1 = time.time() print(f'COTR spent {t1-t0} seconds.') inds_a_b = np.argmin(distance_matrix(corrs_a_b[:, 2:], kp_b), axis=1) matched_a_b = np.stack([np.arange(kp_a.shape[0]), inds_a_b]).T inds_b_a = np.argmin(distance_matrix(corrs_b_a[:, 2:], kp_a), axis=1) matched_b_a = np.stack([np.arange(kp_b.shape[0]), inds_b_a]).T good = 0 final_matches = [] for m_ab in matched_a_b: for m_ba in matched_b_a: if (m_ab == m_ba[::-1]).all(): good += 1 final_matches.append(m_ab) break final_matches = np.array(final_matches) final_corrs = np.concatenate( [kp_a[final_matches[:, 0]], kp_b[final_matches[:, 1]]], axis=1) _, mask = cv2.findFundamentalMat(final_corrs[:, :2], final_corrs[:, 2:], cv2.FM_RANSAC, ransacReprojThreshold=5, confidence=0.999999) utils.visualize_corrs(img_a, img_b, final_corrs[np.where(mask[:, 0])])
def main(opt): model = build_model(opt) model = model.cuda() weights = torch.load(opt.load_weights_path, map_location='cpu')['model_state_dict'] utils.safe_load_weights(model, weights) model = model.eval() img_a = imageio.imread('./sample_data/imgs/img_0.jpg', pilmode='RGB') img_b = imageio.imread('./sample_data/imgs/img_1.jpg', pilmode='RGB') if opt.faster_infer: engine = FasterSparseEngine(model, 32, mode='tile') else: engine = SparseEngine(model, 32, mode='tile') t0 = time.time() corrs = engine.cotr_corr_multiscale_with_cycle_consistency( img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, max_corrs=opt.max_corrs, queries_a=None) t1 = time.time() print(f'spent {t1-t0} seconds for {opt.max_corrs} correspondences.') camera_a = np.load('./sample_data/camera_0.npy', allow_pickle=True).item() camera_b = np.load('./sample_data/camera_1.npy', allow_pickle=True).item() center_a = camera_a['cam_center'] center_b = camera_b['cam_center'] rays_a = pcd_projector.PointCloudProjector.pcd_2d_to_pcd_3d_np( corrs[:, :2], np.ones([corrs.shape[0], 1]) * 2, camera_a['intrinsic'], motion=camera_a['c2w']) rays_b = pcd_projector.PointCloudProjector.pcd_2d_to_pcd_3d_np( corrs[:, 2:], np.ones([corrs.shape[0], 1]) * 2, camera_b['intrinsic'], motion=camera_b['c2w']) dir_a = rays_a - center_a dir_b = rays_b - center_b center_a = np.array([center_a] * corrs.shape[0]) center_b = np.array([center_b] * corrs.shape[0]) points = triangulate_rays_to_pcd(center_a, dir_a, center_b, dir_b) colors = ( img_a[tuple(np.floor(corrs[:, :2]).astype(int)[:, ::-1].T)] / 255 + img_b[tuple(np.floor(corrs[:, 2:]).astype(int)[:, ::-1].T)] / 255) / 2 colors = np.array(colors) pcd = o3d.geometry.PointCloud() pcd.points = o3d.utility.Vector3dVector(points) pcd.colors = o3d.utility.Vector3dVector(colors) o3d.visualization.draw_geometries([pcd])
def main(opt): model = build_model(opt) model = model.cuda() weights = torch.load(opt.load_weights_path, map_location='cpu')['model_state_dict'] utils.safe_load_weights(model, weights) model = model.eval() img_a = imageio.imread('./sample_data/imgs/paint_1.JPG', pilmode='RGB') img_b = imageio.imread('./sample_data/imgs/paint_2.jpg', pilmode='RGB') rep_img = imageio.imread('./sample_data/imgs/Meisje_met_de_parel.jpg', pilmode='RGB') rep_mask = np.ones(rep_img.shape[:2]) lu_corner = [932, 1025] ru_corner = [2469, 901] lb_corner = [908, 2927] rb_corner = [2436, 3080] queries = np.array([lu_corner, ru_corner, lb_corner, rb_corner]).astype(np.float32) rep_coord = np.array([[0, 0], [rep_img.shape[1], 0], [0, rep_img.shape[0]], [rep_img.shape[1], rep_img.shape[0]]]).astype(np.float32) engine = SparseEngine(model, 32, mode='stretching') corrs = engine.cotr_corr_multiscale(img_a, img_b, np.linspace(0.5, 0.0625, 4), 1, queries_a=queries, force=True) T = cv2.getPerspectiveTransform(rep_coord, corrs[:, 2:].astype(np.float32)) vmask = cv2.warpPerspective(rep_mask, T, (img_b.shape[1], img_b.shape[0])) > 0 warped = cv2.warpPerspective(rep_img, T, (img_b.shape[1], img_b.shape[0])) out = warped * vmask[..., None] + img_b * (~vmask[..., None]) f, axarr = plt.subplots(1, 4) axarr[0].imshow(rep_img) axarr[0].title.set_text('Virtual Paint') axarr[0].axis('off') axarr[1].imshow(img_a) axarr[1].title.set_text('Annotated Frame') axarr[1].axis('off') axarr[2].imshow(img_b) axarr[2].title.set_text('Target Frame') axarr[2].axis('off') axarr[3].imshow(out) axarr[3].title.set_text('Overlay') axarr[3].axis('off') plt.show()
def __init__(self, args): super().__init__() if type(args) == dict: args = Namespace(**args) self.imsize = args.imsize self.match_threshold = args.match_threshold self.batch_size = args.batch_size self.max_corrs = args.max_corrs args.dim_feedforward = args.backbone_layer_dims[args.layer] self.model = build_model(args) self.model.load_state_dict( torch.load(args.ckpt, map_location='cpu')['model_state_dict']) self.model = self.model.eval().to(self.device) self.name = 'COTR' print(f'Initialize {self.name}')
def main(opt): model = build_model(opt) model = model.cuda() weights = torch.load(opt.load_weights_path)['model_state_dict'] utils.safe_load_weights(model, weights) model = model.eval() img_a = imageio.imread('./sample_data/imgs/petrzin_01.png') img_b = imageio.imread('./sample_data/imgs/petrzin_02.png') img_a_area = 1.0 img_b_area = 1.0 gt_corrs = np.loadtxt('./sample_data/petrzin_pts.txt') kp_a = gt_corrs[:, :2] kp_b = gt_corrs[:, 2:] engine = SparseEngine(model, 32, mode='tile') t0 = time.time() corrs = engine.cotr_corr_multiscale(img_a, img_b, np.linspace(0.75, 0.1, 4), 1, max_corrs=kp_a.shape[0], queries_a=kp_a, force=True, areas=[img_a_area, img_b_area]) t1 = time.time() print(f'COTR spent {t1-t0} seconds.') utils.visualize_corrs(img_a, img_b, corrs) plt.imshow(img_b) plt.scatter(kp_b[:, 0], kp_b[:, 1]) plt.scatter(corrs[:, 2], corrs[:, 3]) plt.plot(np.stack([kp_b[:, 0], corrs[:, 2]], axis=1).T, np.stack([kp_b[:, 1], corrs[:, 3]], axis=1).T, color=[1, 0, 0]) plt.show()
def train(opt): pprint.pprint(dict(os.environ), width=1) result = subprocess.Popen(["nvidia-smi"], stdout=subprocess.PIPE) print(result.stdout.read().decode()) device = torch.cuda.current_device() print(f'can see {torch.cuda.device_count()} gpus') print( f'current using gpu at {device} -- {torch.cuda.get_device_name(device)}' ) # dummy = torch.rand(3758725612).to(device) # del dummy torch.cuda.empty_cache() model = build_model(opt) model = model.to(device) if opt.enable_zoom: train_dset = cotr_dataset.COTRZoomDataset(opt, 'train') val_dset = cotr_dataset.COTRZoomDataset(opt, 'val') else: train_dset = cotr_dataset.COTRDataset(opt, 'train') val_dset = cotr_dataset.COTRDataset(opt, 'val') train_loader = DataLoader(train_dset, batch_size=opt.batch_size, shuffle=opt.shuffle_data, num_workers=opt.workers, worker_init_fn=utils.worker_init_fn, pin_memory=True) val_loader = DataLoader(val_dset, batch_size=opt.batch_size, shuffle=opt.shuffle_data, num_workers=opt.workers, drop_last=True, worker_init_fn=utils.worker_init_fn, pin_memory=True) optim_list = [ { "params": model.transformer.parameters(), "lr": opt.learning_rate }, { "params": model.corr_embed.parameters(), "lr": opt.learning_rate }, { "params": model.query_proj.parameters(), "lr": opt.learning_rate }, { "params": model.input_proj.parameters(), "lr": opt.learning_rate }, ] if opt.lr_backbone > 0: optim_list.append({ "params": model.backbone.parameters(), "lr": opt.lr_backbone }) optim = torch.optim.Adam(optim_list) trainer = COTRTrainer(opt, model, optim, None, train_loader, val_loader) trainer.train()