def __init__(self, image_path, image_list=None, device='cpu', superpoint_config={}): print('Using SuperPoint dataset') self.DEBUG = False self.image_path = image_path self.device = device # Get image names if image_list != None: with open(image_list) as f: self.image_names = f.read().splitlines() else: self.image_names = [ name for name in os.listdir(image_path) if name.endswith('jpg') or name.endswith('png') ] # Load SuperPoint model self.superpoint = SuperPoint(superpoint_config) self.superpoint.to(device)
def __init__(self, image_path, image_list=None, device='cpu', superpoint_config={}, image_size=(640, 480), max_keypoints=1024): print('Using SuperPoint dataset') self.DEBUG = False self.image_path = image_path self.device = device self.image_size = image_size self.max_keypoints = max_keypoints # Get image names if image_list != None: with open(image_list) as f: self.image_names = f.read().splitlines() else: self.image_names = [ name for name in os.listdir(image_path) if name.endswith('jpg') or name.endswith('png') ] self.image_names = [ i for i in self.image_names if cv2.imread(os.path.join(self.image_path, i), cv2.IMREAD_GRAYSCALE) is not None ] # Load SuperPoint model self.superpoint = SuperPoint(superpoint_config) self.superpoint.to(device)
def __init__(self, typ="viewpoint", device='cuda', superpoint_config={}): print('Using SuperPoint dataset') self.DEBUG = False self.device = device self.typ = typ self.max_keypoints = 1024 if self.typ == "illumination": self.folders = glob.glob( "/nfs/WD_DS/Datasets/ImageMatching/hpatches-sequences-release/i_*" ) elif self.typ == "viewpoint": self.folders = glob.glob( "/nfs/WD_DS/Datasets/ImageMatching/hpatches-sequences-release/v_*" ) else: assert 1 == 2, "not implemented" self.pairs = self.make_pairs(self.folders) # Load SuperPoint model self.superpoint = SuperPoint(superpoint_config) self.superpoint.to(device)
def __init__(self, config, save_path_warped, save_path_sp, numProcess=1): #self.sift = cv2.xfeatures2d.SIFT_create(nfeatures=self.max_keypoints) #self.superpointFrontend = superpoint.SuperPointFrontend(weights_path = config['weights_path'], cuda=1, device_id=0) self.superpoint = SuperPoint(config).cuda().eval() self.superpoint.load_state_dict(torch.load(config['weights_path'])) self.feature_dim = config['feature_dim'] self.max_keypoints = config['max_keypoints'] self.keypoint_threshold = config['keypoint_threshold'] self.nms_radius = config['nms_radius'] self.save_path_warped = save_path_warped self.save_path_sp = save_path_sp if not os.path.isdir(save_path_warped): os.mkdir(save_path_warped) if not os.path.isdir(save_path_sp): os.mkdir(save_path_sp) self.pool = multiprocessing.Pool(numProcess)
def __init__(self, train_path, nfeatures): self.files = [] self.files += [train_path + f for f in os.listdir(train_path)] self.nfeatures = nfeatures self.superpoint = SuperPoint({'max_keypoints': nfeatures}).cuda() self.matcher = cv2.BFMatcher_create(cv2.NORM_L1, crossCheck=False)
def __init__(self, train_path, nfeatures): self.device = 'cuda:7' self.files = [] self.files += [train_path + f for f in os.listdir(train_path)] self.nfeatures = nfeatures self.superpoint = SuperPoint({ 'max_keypoints': nfeatures }).eval().to(self.device)
def __init__(self, _input='./assets/input_img/', _output=None, scene_mode='outdoor', skip=1, img_glob=['*.png', '*.jpg', '*.jpeg'], max_length=1000000): super(Arg, self).__init__() self.set_config() self.input_dir = _input self.output_dir = _output self.skip = skip self.img_glob = img_glob self.max_length = max_length self.data = {} self.superpoint = SuperPoint(self.config.get('superpoint', {}))
def __init__(self, args: DictConfig): super().__init__() self.hparams = args # Will be logged to mlflow # make sure the flags are properly used assert not (args.exp.opencv_display and not args.exp.viz), 'Must use --viz with --opencv_display' assert not (args.exp.opencv_display and not args.exp.fast_viz ), 'Cannot use --opencv_display without --fast_viz' assert not (args.exp.fast_viz and not args.exp.viz), 'Must use --viz with --fast_viz' assert not (args.exp.fast_viz and args.exp.viz_extension == 'pdf'), 'Cannot use pdf extension with --fast_viz' # store viz results # eval_output_dir = Path(f'{ROOT_PATH}/' + args.data.eval_output_dir) # eval_output_dir.mkdir(exist_ok=True, parents=True) # print('Will write visualization images to directory \"{}\"'.format(eval_output_dir)) self.superglue = SuperGlue(args.model.superglue) self.superpoint = SuperPoint(args.model.superpoint) self.lr = None
def __init__(self, nfeatures=1024, scene_list_path='megadepth_utils/train_scenes.txt', scene_info_path='/local/dataset/megadepth/scene_info', base_path='/local/dataset/megadepth', train=True, preprocessing=None, min_overlap_ratio=0.1, max_overlap_ratio=0.7, max_scale_ratio=np.inf, pairs_per_scene=200, image_size=256): self.scenes = [] with open(scene_list_path, 'r') as f: lines = f.readlines() for line in lines: self.scenes.append(line.strip('\n')) self.scene_info_path = scene_info_path self.base_path = base_path self.train = train self.preprocessing = preprocessing self.min_overlap_ratio = min_overlap_ratio self.max_overlap_ratio = max_overlap_ratio self.max_scale_ratio = max_scale_ratio self.pairs_per_scene = pairs_per_scene self.image_size = image_size self.dataset = [] self.nfeatures = nfeatures self.sift = cv2.SIFT_create(nfeatures=self.nfeatures) self.superpoint = SuperPoint({'max_keypoints': nfeatures}).cuda()
class HPatches(Dataset): def __init__(self, typ="viewpoint", device='cuda', superpoint_config={}): print('Using SuperPoint dataset') self.DEBUG = False self.device = device self.typ = typ self.max_keypoints = 1024 if self.typ == "illumination": self.folders = glob.glob( "/nfs/WD_DS/Datasets/ImageMatching/hpatches-sequences-release/i_*" ) elif self.typ == "viewpoint": self.folders = glob.glob( "/nfs/WD_DS/Datasets/ImageMatching/hpatches-sequences-release/v_*" ) else: assert 1 == 2, "not implemented" self.pairs = self.make_pairs(self.folders) # Load SuperPoint model self.superpoint = SuperPoint(superpoint_config) self.superpoint.to(device) def make_pairs(self, folders): ''' 1.ppm pairs with rest 2.ppm, 3.ppm .... 6.ppm return: [(1.ppm, 2.ppm, H_1_2),(1.ppm, 3.ppm, H_1_3) ... (1.ppm, 6.ppm, H_1_6)] ''' all_pairs = [] for f in folders: im1 = os.path.join(f, '1.ppm') for j in range(2, 7): H = np.loadtxt(os.path.join(f, f"H_1_{j}")) im2 = os.path.join(f, f"{j}.ppm") all_pairs.append([im1, im2, H]) return all_pairs def __len__(self): return len(self.pairs) def __getitem__(self, idx): img1, img2, H = self.pairs[idx] # Read image image = cv2.imread(img1, cv2.IMREAD_GRAYSCALE) height, width = image.shape[:2] min_size = min(height, width) # Extract keypoints data = frame2tensor(image, self.device) pred0 = self.superpoint({'image': data}) kps0 = pred0['keypoints'][0] desc0 = pred0['descriptors'][0] scores0 = pred0['scores'][0] if self.DEBUG: print( f'Original keypoints: {kps0.shape}, descriptors: {desc0.shape}, scores: {scores0.shape}' ) # filter keypoints idxs = np.argsort( scores0.data.cpu().numpy())[::-1][:self.max_keypoints] scores0 = scores0[idxs.copy()] kps0 = kps0[idxs.copy(), :] desc0 = desc0[:, idxs.copy()] image_warped = cv2.imread(img2, cv2.IMREAD_GRAYSCALE) # Extract keypoints data_warped = frame2tensor(image_warped, self.device) pred1 = self.superpoint({'image': data_warped}) kps1 = pred1['keypoints'][0] desc1 = pred1['descriptors'][0] scores1 = pred1['scores'][0] if self.DEBUG: print( f'Original keypoints: {kps1.shape}, descriptors: {desc1.shape}, scores: {scores1.shape}' ) # filter keypoints idxs = np.argsort( scores1.data.cpu().numpy())[::-1][:self.max_keypoints] scores1 = scores1[idxs.copy()] kps1 = kps1[idxs.copy(), :] desc1 = desc1[:, idxs.copy()] # Draw keypoints and matches if self.DEBUG: kps0cv = [ cv2.KeyPoint(k[0], k[1], 8) for k in kps0.cpu().numpy().squeeze() ] kps1cv = [ cv2.KeyPoint(k[0], k[1], 8) for k in kps1.cpu().numpy().squeeze() ] outimg = None outimg = cv2.drawKeypoints(image, kps0cv, outimg) cv2.imwrite('keypoints0.jpg', outimg) outimg = cv2.drawKeypoints(image_warped, kps1cv, outimg) cv2.imwrite('keypoints1.jpg', outimg) return { 'keypoints0': kps0, 'keypoints1': kps1, 'descriptors0': desc0, 'descriptors1': desc1, 'scores0': scores0, 'scores1': scores1, 'image0': data.squeeze(0), 'image1': data_warped.squeeze(0), 'file_name': img2, 'homography': H }
class SuperPointDataset(Dataset): def __init__(self, image_path, image_list=None, device='cpu', superpoint_config={}, image_size=(640, 480), max_keypoints=1024): print('Using SuperPoint dataset') self.DEBUG = False self.image_path = image_path self.device = device self.image_size = image_size self.max_keypoints = max_keypoints # Get image names if image_list != None: with open(image_list) as f: self.image_names = f.read().splitlines() else: self.image_names = [ name for name in os.listdir(image_path) if name.endswith('jpg') or name.endswith('png') ] self.image_names = [ i for i in self.image_names if cv2.imread(os.path.join(self.image_path, i), cv2.IMREAD_GRAYSCALE) is not None ] # Load SuperPoint model self.superpoint = SuperPoint(superpoint_config) self.superpoint.to(device) def __len__(self): return 50000 #len(self.image_names) def __getitem__(self, idx): idx = idx % len(self.image_names) # Read image image = cv2.imread( os.path.join(self.image_path, self.image_names[idx]), cv2.IMREAD_GRAYSCALE) image = cv2.resize(image, self.image_size[::-1]) height, width = image.shape[:2] min_size = min(height, width) # Transform image corners = np.array([[0, 0], [0, height], [width, 0], [width, height]], dtype=np.float32) warp = np.random.randint(-min_size / 4, min_size / 4, size=(4, 2)).astype(np.float32) M = cv2.getPerspectiveTransform(corners, corners + warp) image_warped = cv2.warpPerspective(image, M, (width, height)) if self.DEBUG: print(f'Image size: {image.shape} -> {image_warped.shape}') with torch.no_grad(): # Extract keypoints data = frame2tensor(image, self.device) pred0 = self.superpoint({'image': data}) kps0 = pred0['keypoints'][0] desc0 = pred0['descriptors'][0] scores0 = pred0['scores'][0] # filter keypoints idxs = np.argsort( scores0.data.cpu().numpy())[::-1][:self.max_keypoints] scores0 = scores0[idxs.copy()] kps0 = kps0[idxs.copy(), :] desc0 = desc0[:, idxs.copy()] if self.DEBUG: print( f'Original keypoints: {kps0.shape}, descriptors: {desc0.shape}, scores: {scores0.shape}' ) # Transform keypoints kps1 = cv2.perspectiveTransform(kps0.cpu().numpy()[None], M) # Filter keypoints matches = [[], []] kps1_filtered = [] border = self.superpoint.config.get('remove_borders', 4) for i, k in enumerate(kps1.squeeze()): if k[0] < border or k[0] >= width - border: continue if k[1] < border or k[1] >= height - border: continue kps1_filtered.append(k) matches[0].append(i) matches[1].append(len(matches[1])) all_matches = [torch.tensor(ms) for ms in matches] kps1_filtered = array2tensor(np.array(kps1_filtered), self.device) # Compute descriptors & scores data_warped = frame2tensor(image_warped, self.device) desc1, scores1 = self.superpoint.computeDescriptorsAndScores({ 'image': data_warped, 'keypoints': kps1_filtered }) if self.DEBUG: print( f'Transformed keypoints: {kps1_filtered.shape}, descriptor: {desc1[0].shape}, scores: {scores1[0].shape}' ) # Draw keypoints and matches if self.DEBUG: kps0cv = [ cv2.KeyPoint(k[0], k[1], 8) for k in kps0.cpu().numpy().squeeze() ] kps1cv = [ cv2.KeyPoint(k[0], k[1], 8) for k in kps1_filtered.cpu().numpy().squeeze() ] matchescv = [ cv2.DMatch(k0, k1, 0) for k0, k1 in zip(matches[0], matches[1]) ] outimg = None outimg = cv2.drawMatches(image, kps0cv, image_warped, kps1cv, matchescv, outimg) cv2.imwrite('matches.jpg', outimg) outimg = cv2.drawKeypoints(image, kps0cv, outimg) cv2.imwrite('keypoints0.jpg', outimg) outimg = cv2.drawKeypoints(image_warped, kps1cv, outimg) cv2.imwrite('keypoints1.jpg', outimg) return { 'keypoints0': kps0, 'keypoints1': kps1_filtered[0], 'descriptors0': desc0, 'descriptors1': desc1[0], 'scores0': scores0, 'scores1': scores1[0], 'image0': data.squeeze(0), 'image1': data_warped.squeeze(0), 'all_matches': all_matches, 'file_name': self.image_names[idx], 'homography': M }
class SuperPointDatasetTest(Dataset): def __init__(self, image_path, image_list=None, device='cpu', superpoint_config={}): print('Using SuperPoint dataset') self.DEBUG = False self.image_path = image_path self.device = device # Get image names if image_list != None: with open(image_list) as f: self.image_names = f.read().splitlines() else: self.image_names = [ name for name in os.listdir(image_path) if name.endswith('jpg') or name.endswith('png') ] self.image_names = [ i for i in self.image_names if cv2.imread(os.path.join(self.image_path, i), cv2.IMREAD_GRAYSCALE) is not None ] # Load SuperPoint model self.superpoint = SuperPoint(superpoint_config) self.superpoint.to(device) def __len__(self): return len(self.image_names) def __getitem__(self, idx): # Read image image = cv2.imread( os.path.join(self.image_path, self.image_names[idx]), cv2.IMREAD_GRAYSCALE) height, width = image.shape[:2] min_size = min(height, width) # Transform image corners = np.array([[0, 0], [0, height], [width, 0], [width, height]], dtype=np.float32) warp = np.random.randint(-min_size / 4, min_size / 4, size=(4, 2)).astype(np.float32) M = cv2.getPerspectiveTransform(corners, corners + warp) image_warped = cv2.warpPerspective(image, M, (width, height)) if self.DEBUG: print(f'Image size: {image.shape} -> {image_warped.shape}') # Extract keypoints data = frame2tensor(image, self.device) pred0 = self.superpoint({'image': data}) kps0 = pred0['keypoints'][0] desc0 = pred0['descriptors'][0] scores0 = pred0['scores'][0] if self.DEBUG: print( f'Original keypoints: {kps0.shape}, descriptors: {desc0.shape}, scores: {scores0.shape}' ) # Extract keypoints data_warped = frame2tensor(image_warped, self.device) pred1 = self.superpoint({'image': data_warped}) kps1 = pred1['keypoints'][0] desc1 = pred1['descriptors'][0] scores1 = pred1['scores'][0] if self.DEBUG: print( f'Original keypoints: {kps1.shape}, descriptors: {desc1.shape}, scores: {scores1.shape}' ) # Draw keypoints and matches if self.DEBUG: kps0cv = [ cv2.KeyPoint(k[0], k[1], 8) for k in kps0.cpu().numpy().squeeze() ] kps1cv = [ cv2.KeyPoint(k[0], k[1], 8) for k in kps1.cpu().numpy().squeeze() ] outimg = None outimg = cv2.drawKeypoints(image, kps0cv, outimg) cv2.imwrite('keypoints0.jpg', outimg) outimg = cv2.drawKeypoints(image_warped, kps1cv, outimg) cv2.imwrite('keypoints1.jpg', outimg) return { 'keypoints0': kps0, 'keypoints1': kps1, 'descriptors0': desc0, 'descriptors1': desc1, 'scores0': scores0, 'scores1': scores1, 'image0': data.squeeze(0), 'image1': data_warped.squeeze(0), 'file_name': self.image_names[idx], 'homography': M }
raise ValueError('Cannot specify more than two integers for --resize') image_list = Path(opt.image_dir).rglob(f'*.{opt.image_glob}') image_list = [p.relative_to(opt.image_dir) for p in image_list] assert len(image_list) > 0 device = 'cuda' if torch.cuda.is_available() else 'cpu' print('Running inference on device {}'.format(device)) config = { 'max_keypoints': opt.max_keypoints, 'keypoint_threshold': opt.keypoint_threshold, 'nms_radius': opt.nms_radius, 'refinement_radius': opt.refinement_radius, 'do_quadratic_refinement': opt.quadratic_refinement, } frontend = SuperPoint(config).eval().to(device) results_dir = Path(opt.results_dir) results_dir.mkdir(exist_ok=True, parents=True) if opt.hdf5: hfile = h5py.File(str(results_dir / opt.hdf5), 'w') for name in tqdm(image_list): image, inp, scales = read_image(opt.image_dir / name, device, opt.resize, 0, True, resize_force=not opt.no_resize_force, interp=cv2.INTER_CUBIC)
device = 'cuda' print('Running inference on device \"{}\"'.format(device)) config = { 'superpoint': { 'nms_radius': opt.nms_radius, 'keypoint_threshold': opt.keypoint_threshold, 'max_keypoints': opt.max_keypoints }, 'superglue': { 'weights': opt.superglue, 'sinkhorn_iterations': opt.sinkhorn_iterations, 'match_threshold': opt.match_threshold, } } matching = Matching(config).eval().to(device) superpoint = SuperPoint(config.get('superpoint', {})).eval().to(device) # Create the output directories if they do not exist already. input_dir = Path(opt.input_dir) print('Looking for data in directory \"{}\"'.format(input_dir)) output_dir = Path(opt.output_dir) output_dir.mkdir(exist_ok=True, parents=True) print('Will write matches to directory \"{}\"'.format(output_dir)) if opt.viz: print('Will write visualization images to', 'directory \"{}\"'.format(output_dir)) image_tensor = {} keypoints_tensor = {} response = {} matches_mvg = {} timer = AverageTimer(newline=True)
'max_keypoints': opt.max_keypoints }, 'superglue': { 'weights': opt.superglue, 'sinkhorn_iterations': opt.sinkhorn_iterations, 'match_threshold': opt.match_threshold, } } train_set = SparseDataset(opt.train_path, opt.nfeatures) train_loader = torch.utils.data.DataLoader(dataset=train_set, shuffle=False, batch_size=opt.batch_size, drop_last=True) superpoint = SuperPoint(config.get('superpoint', {})) superglue = SuperGlue(config.get('superglue', {})) if torch.cuda.is_available(): superpoint.cuda() superglue.cuda() else: print("### CUDA not available ###") optimizer = torch.optim.Adam(superglue.parameters(), lr=opt.learning_rate) mean_loss = [] for epoch in range(1, opt.epoch + 1): epoch_loss = 0 superglue.double().train() # train_loader = tqdm(train_loader) for i, pred in enumerate(train_loader): for k in pred:
def __getitem__(self, idx): (image1, depth1, intrinsics1, pose1, bbox1, image2, depth2, intrinsics2, pose2, bbox2) = self.recover_pair(self.dataset[idx]) # SIFT # kp1, descs1 = self.sift.detectAndCompute(cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY), None) # kp2, descs2 = self.sift.detectAndCompute(cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY), None) # SuperPoint SuperPoint = self.superpoint kp1, descs1 = self.parse_superpoint_result( SuperPoint({ 'image': frame2tensor(cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)) })) kp2, descs2 = self.parse_superpoint_result( SuperPoint({ 'image': frame2tensor(cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)) })) # im_with_keypoints1 = cv2.drawKeypoints(image1, kp1, np.array([]), (255,0,0)) # im_with_keypoints2 = cv2.drawKeypoints(image2, kp2, np.array([]), (255,0,0)) # cv2.imwrite('match_000_kp1.png', im_with_keypoints1) # cv2.imwrite('match_000_kp2.png', im_with_keypoints2) kp1_num = min(self.nfeatures, len(kp1)) kp2_num = min(self.nfeatures, len(kp2)) if kp1_num < 10 or kp2_num < 10: return { 'keypoints0': torch.zeros([0, 0, 2], dtype=torch.double), 'keypoints1': torch.zeros([0, 0, 2], dtype=torch.double), 'descriptors0': torch.zeros([0, 2], dtype=torch.double), 'descriptors1': torch.zeros([0, 2], dtype=torch.double), 'image0': image1, 'image1': image2, 'file_name': '' } kp1 = kp1[:kp1_num] kp2 = kp2[:kp2_num] descs1 = descs1[:kp1_num, :] descs2 = descs2[:kp2_num, :] # kp1_np = np.array([[kp.pt[0], kp.pt[1]] for kp in kp1]) # kp2_np = np.array([[kp.pt[0], kp.pt[1]] for kp in kp2]) kp1_np = np.array([(kp[0], kp[1]) for kp in kp1]) kp2_np = np.array([(kp[0], kp[1]) for kp in kp2]) KP1 = kp1_np KP2 = kp2_np # scores1_np = np.array([kp.response for kp in kp1]) # scores2_np = np.array([kp.response for kp in kp2]) scores1_np = np.array([kp[2] for kp in kp1]) scores2_np = np.array([kp[2] for kp in kp2]) kp1_np = kp1_np[:kp1_num, :] kp2_np = kp2_np[:kp2_num, :] descs1 = descs1[:kp1_num, :] descs2 = descs2[:kp2_num, :] kp1_np = kp1_np.reshape((1, -1, 2)) kp2_np = kp2_np.reshape((1, -1, 2)) # descs1 = np.transpose(descs1 / 256.) # descs2 = np.transpose(descs2 / 256.) descs1 = np.transpose(descs1) descs2 = np.transpose(descs2) image1_o = image1 image2_o = image2 image1 = torch.from_numpy( cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY) / 255.).double()[None].cuda() image2 = torch.from_numpy( cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY) / 255.).double()[None].cuda() try: all_matches = self.compute_all_matches(KP1, image1_o, depth1, intrinsics1, pose1, bbox1, KP2, image2_o, depth2, intrinsics2, pose2, bbox2) except EmptyTensorError: return { 'keypoints0': torch.zeros([0, 0, 2], dtype=torch.double), 'keypoints1': torch.zeros([0, 0, 2], dtype=torch.double), 'descriptors0': torch.zeros([0, 2], dtype=torch.double), 'descriptors1': torch.zeros([0, 2], dtype=torch.double), 'image0': image1, 'image1': image2, 'file_name': '' } return { 'keypoints0': list(kp1_np), 'keypoints1': list(kp2_np), 'descriptors0': list(descs1), 'descriptors1': list(descs2), 'scores0': list(scores1_np), 'scores1': list(scores2_np), 'image0': image1, 'image1': image2, 'all_matches': all_matches, 'file_name': '' }
def set_superopint(self, config): self.superpoint = SuperPoint(config.get('superpoint', {}))
class DataBuilder(object): def __init__(self, config, save_path_warped, save_path_sp, numProcess=1): #self.sift = cv2.xfeatures2d.SIFT_create(nfeatures=self.max_keypoints) #self.superpointFrontend = superpoint.SuperPointFrontend(weights_path = config['weights_path'], cuda=1, device_id=0) self.superpoint = SuperPoint(config).cuda().eval() self.superpoint.load_state_dict(torch.load(config['weights_path'])) self.feature_dim = config['feature_dim'] self.max_keypoints = config['max_keypoints'] self.keypoint_threshold = config['keypoint_threshold'] self.nms_radius = config['nms_radius'] self.save_path_warped = save_path_warped self.save_path_sp = save_path_sp if not os.path.isdir(save_path_warped): os.mkdir(save_path_warped) if not os.path.isdir(save_path_sp): os.mkdir(save_path_sp) self.pool = multiprocessing.Pool(numProcess) def extractSP(self, imageList): N = len(imageList) kpmax = self.max_keypoints data = np.stack(imageList, axis=0) data = torch.from_numpy(data[:, np.newaxis,:,:]) data = data.cuda() with torch.no_grad(): pred = self.superpoint({"image":data}) spPackList = [] for i in range(N): kp1_np = np.zeros([self.max_keypoints, 2]) descs1 = np.zeros([self.feature_dim, self.max_keypoints]) scores1_np = np.zeros([self.max_keypoints]) n1 = len(pred['keypoints'][i]) kp1_np[:n1] = pred['keypoints'][i].cpu().numpy() descs1[:,:n1] = pred['descriptors'][i].cpu().numpy() scores1_np[:n1] = pred['scores'][i].cpu().numpy() spPackList.append((n1, kp1_np, descs1, scores1_np)) return spPackList def match(self, sp_pack1, sp_pack2, mapW, isNegSample, image, warped, handMask, debug=0): nf = self.max_keypoints n1, kp1_np, descs1, scores1_np = sp_pack1 n2, kp2_np, descs2, scores2_np = sp_pack2 if isNegSample or n1==0 or n2==0: numMatch = 0 numMiss = nf MN2 = np.vstack([np.arange(nf), nf * np.ones(nf, dtype=np.int64)]) # excluded kp1 --> last index in matrix MN3 = np.vstack([nf * np.ones(nf, dtype=np.int64), np.arange(nf)]) all_matches = np.hstack([MN2, MN3]).T if debug: cv2.imshow('image', image) cv2.imshow('warped', warped) cv2.waitKey() return all_matches, numMatch, numMiss # reverse project by non-linear reverse map indices = kp2_np[:n2].astype(np.int32).T kp2_projected = mapW[indices[1], indices[0]] kp2_projected = np.vstack([kp2_projected, np.zeros((nf-n2,2))]) dists = cdist(kp1_np[:n1], kp2_projected[:n2]) dists[np.isnan(dists)]=np.inf min1 = np.argmin(dists, axis=0) # kp1 which is closest from range(len(kp2)) min2 = np.argmin(dists, axis=1) # kp2 which is closest from range(len(kp1)) min2v = np.min(dists, axis=0) # closest distance of range(len(kp2)) kp2_mutual_close = min2[min1] == np.arange(n2) if handMask is not None: kp2_blocked = ~handMask[indices[1], indices[0]] # kp2, not covered by hand kp2_mutual_close = np.logical_and(kp2_mutual_close, kp2_blocked) matches2 = np.where(np.logical_and(kp2_mutual_close, min2v < 3))[0] # kp2 matches1 = min1[matches2] missing1 = np.setdiff1d(np.arange(nf), matches1) # kp1 which are excluded missing2 = np.setdiff1d(np.arange(nf), matches2) # kp2 which are excluded if debug: # visualize matches_dmatch = [] if not isNegSample: for i, idx2 in enumerate(matches2): idx1 = matches1[i] dmatch = cv2.DMatch(min1[idx2], idx2, 0.0) matches_dmatch.append(dmatch) kp1 = [cv2.KeyPoint(p[0], p[1], 0) for p in kp1_np] #kp1 = [cv2.KeyPoint(p[0], p[1], 0) for p in kp2_projected] kp2 = [cv2.KeyPoint(p[0], p[1], 0) for p in kp2_np] out = cv2.drawMatches((image*255).astype(np.uint8), kp1, (warped*255).astype(np.uint8), kp2, matches_dmatch, None) cv2.imshow('a', out) cv2.imshow('image', image) cv2.imshow('warped', warped) cv2.waitKey() numMatch = len(matches1) numMiss = nf-numMatch lossWeightMiss = 0.2 MN = np.vstack([matches1, matches2]) MN2 = np.vstack([missing1, nf * np.ones(numMiss, dtype=np.int64)]) # excluded kp1 --> last index in matrix MN3 = np.vstack([nf * np.ones(numMiss, dtype=np.int64), missing2]) MN4 = np.zeros([2, numMatch], dtype=np.int64) # zero-pad for batch training all_matches = np.hstack([MN, MN2, MN3, MN4]).T return all_matches, numMatch, numMiss def build(self, idxList, fileNameList, handFileList, saveFlag=False, debug=1): W, H = 320, 240 batchSize = len(idxList) if handFileList is not None: handFiles = [handFileList[i] for i in np.random.randint(0,len(handFileList), (batchSize))] else: handFiles = [None]*batchSize # even idx as negative sample, force zero match taskList = [(idx, fileNameList[idx], fileNameList[idx-1], fileNameList[idx-2], handFiles[i], idx%2==0, W, H, self.save_path_warped, saveFlag) for i, idx in enumerate(idxList)] resultList = self.pool.map(mp_warp, taskList) imageList = [] mapWList = [] negList = [] handMaskList = [] for image_f, warped_f, mapW, handMask, isNegSample in resultList: imageList += [image_f, warped_f] mapWList.append(mapW) handMaskList.append(handMask) negList.append(isNegSample) spPackList = self.extractSP(imageList) dict1List = [] for i, idx in enumerate(idxList): spPack1 = spPackList[i*2] spPack2 = spPackList[i*2+1] mapW = mapWList[i] isNegSample = negList[i] handMask = handMaskList[i] n1, kp1_np, descs1, scores1_np = spPack1 n2, kp2_np, descs2, scores2_np = spPack2 all_matches, numMatch, numMiss = self.match(spPack1, spPack2, mapW, isNegSample, imageList[i*2], imageList[i*2+1], handMask, debug=debug) image_file = fileNameList[idx] warped_file = self.save_path_warped+str(idx)+".jpg" dict1 = { 'keypoints0': torch.Tensor(kp1_np), 'keypoints1': torch.Tensor(kp2_np), 'descriptors0': torch.Tensor(descs1), 'descriptors1': torch.Tensor(descs2), 'scores0': torch.Tensor(scores1_np), 'scores1': torch.Tensor(scores2_np), 'image_file': image_file, 'warped_file': warped_file, 'shape0': torch.Tensor([H,W]), 'shape1': torch.Tensor([H,W]), 'all_matches': all_matches, 'num_match_list': torch.Tensor([numMatch+2*numMiss]) } if saveFlag: with open(self.save_path_sp+str(idx)+'.pkl', 'wb') as f: pickle.dump(dict1, f) dict1List.append(dict1) return dict1List def buildAll(self, train_path, hand_path="", batchSizeMax = 64, saveFlag=False, debug=1): #with open("/home/pallas/PROJECTS/cv-book-image-recognition/resources/alpha-book-list.txt", "r") as f: # lines = f.readlines() # lines = [line.strip().split("\t") for line in lines] # bookCodeList = [line[2] for line in lines if "小学" in line[1]] # files = [train_path + bookCode + "/" + x for bookCode in bookCodeList for x in os.listdir(train_path+bookCode)] files = [root +"/"+ name for root, dirs, files in os.walk(train_path, topdown=False) for name in files if name[-4:]==".jpg"] hands = None if len(hand_path)>0: hands = [hand_path + name for name in os.listdir(hand_path)] #files = files[:360] N = len(files) print("{} images in fileList".format(N)) idxList = np.arange(N) i=0 while i<N-1: batchSize = min(batchSizeMax, N-i) self.build(idxList[i:i+batchSize], files, hands, saveFlag=saveFlag, debug=debug) i += batchSize print("building training set {}/{}".format(i, N))
def __getitem__(self, idx): before_getitem_time = time.time() file_name = self.files[idx] image = cv2.imread(file_name, cv2.IMREAD_GRAYSCALE) SuperPoint = self.superpoint width, height = image.shape[:2] corners = np.array([[0, 0], [0, height], [width, 0], [width, height]], dtype=np.float32) warp = np.random.randint(-224, 224, size=(4, 2)).astype(np.float32) # get the corresponding warped image M = cv2.getPerspectiveTransform(corners, corners + warp) warped = cv2.warpPerspective( src=image, M=M, dsize=(image.shape[1], image.shape[0])) # return an image type # extract keypoints of the image pair using SuperPoint # before_superpoint_time = time.time() kp1, descs1 = self.parse_superpoint_result( SuperPoint({'image': frame2tensor(image)})) kp2, descs2 = self.parse_superpoint_result( SuperPoint({'image': frame2tensor(warped)})) print(kp1.shape, descs1.shape) # print('SuperPoint time:', time.time() - before_superpoint_time) # limit the number of keypoints kp1_num = min(self.nfeatures, len(kp1)) kp2_num = min(self.nfeatures, len(kp2)) kp1 = kp1[:kp1_num] kp2 = kp2[:kp2_num] kp1_np = np.array([(kp[0], kp[1]) for kp in kp1]) kp2_np = np.array([(kp[0], kp[1]) for kp in kp2]) # skip this image pair if no keypoints detected in image if len(kp1) < 1 or len(kp2) < 1: return { 'keypoints0': torch.zeros([0, 0, 2], dtype=torch.double), 'keypoints1': torch.zeros([0, 0, 2], dtype=torch.double), 'descriptors0': torch.zeros([0, 2], dtype=torch.double), 'descriptors1': torch.zeros([0, 2], dtype=torch.double), 'image0': image, 'image1': warped, 'file_name': file_name } # confidence of each key point scores1_np = np.array([kp[2] for kp in kp1]) scores2_np = np.array([kp[2] for kp in kp2]) kp1_np = kp1_np[:kp1_num, :] kp2_np = kp2_np[:kp2_num, :] descs1 = descs1[:kp1_num, :] descs2 = descs2[:kp2_num, :] # obtain the matching matrix of the image pair # before_match_time = time.time() matched = self.matcher.match(descs1, descs2) # # print('match time:', time.time() - before_match_time) kp1_projected = cv2.perspectiveTransform(kp1_np.reshape((1, -1, 2)), M)[0, :, :] dists = cdist(kp1_projected, kp2_np) min1 = np.argmin(dists, axis=0) min2 = np.argmin(dists, axis=1) min1v = np.min(dists, axis=1) min1f = min2[min1v < 3] xx = np.where(min2[min1] == np.arange(min1.shape[0]))[0] matches = np.intersect1d(min1f, xx) print(matches) missing1 = np.setdiff1d(np.arange(kp1_np.shape[0]), min1[matches]) missing2 = np.setdiff1d(np.arange(kp2_np.shape[0]), matches) MN = np.concatenate( [min1[matches][np.newaxis, :], matches[np.newaxis, :]]) MN2 = np.concatenate([ missing1[np.newaxis, :], (len(kp2)) * np.ones( (1, len(missing1)), dtype=np.int64) ]) MN3 = np.concatenate([(len(kp1)) * np.ones( (1, len(missing2)), dtype=np.int64), missing2[np.newaxis, :]]) all_matches = np.concatenate([MN, MN2, MN3], axis=1) kp1_np = kp1_np.reshape((1, -1, 2)) kp2_np = kp2_np.reshape((1, -1, 2)) descs1 = np.transpose(descs1 / 256.) descs2 = np.transpose(descs2 / 256.) image = torch.from_numpy(image / 255.).double()[None].cuda() warped = torch.from_numpy(warped / 255.).double()[None].cuda() print('get item time:', time.time() - before_getitem_time) print('kp1_size, kp2_size:', kp1_np.size, kp2_np.size) print('all_matches:', all_matches) return { 'keypoints0': list(kp1_np), 'keypoints1': list(kp2_np), 'descriptors0': list(descs1), 'descriptors1': list(descs2), 'scores0': list(scores1_np), 'scores1': list(scores2_np), 'image0': image, 'image1': warped, 'all_matches': list(all_matches), 'file_name': file_name }
def __getitem__(self, idx): (image1, depth1, intrinsics1, pose1, bbox1, image2, depth2, intrinsics2, pose2, bbox2) = self.recover_pair(self.dataset[idx]) image1 = preprocess_image( image1, preprocessing=self.preprocessing) # 得到BGR格式图像,不做中心化 image2 = preprocess_image( image2, preprocessing=self.preprocessing) # 得到BGR格式图像,不做中心化 original_image1 = image1 original_image2 = image2 ''' return { 'image1': torch.from_numpy(image1.astype(np.float32)), 'depth1': torch.from_numpy(depth1.astype(np.float32)), 'intrinsics1': torch.from_numpy(intrinsics1.astype(np.float32)), 'pose1': torch.from_numpy(pose1.astype(np.float32)), 'bbox1': torch.from_numpy(bbox1.astype(np.float32)), 'image2': torch.from_numpy(image2.astype(np.float32)), 'depth2': torch.from_numpy(depth2.astype(np.float32)), 'intrinsics2': torch.from_numpy(intrinsics2.astype(np.float32)), 'pose2': torch.from_numpy(pose2.astype(np.float32)), 'bbox2': torch.from_numpy(bbox2.astype(np.float32)) } ''' # 用SuperPoint处理两张图片,得到keypoints、descriptors、scores SuperPoint = self.superpoint image1 = np.transpose(image1, (1, 2, 0)) image1 = Image.fromarray(np.uint8(image1)) image1 = image1.convert('L') image1 = np.array(image1) image2 = np.transpose(image2, (1, 2, 0)) image2 = Image.fromarray(np.uint8(image2)) image2 = image2.convert('L') image2 = np.array(image2) kp1, descs1 = self.parse_superpoint_result( SuperPoint({'image': frame2tensor(image1)})) kp2, descs2 = self.parse_superpoint_result( SuperPoint({'image': frame2tensor(image2)})) # limit the number of keypoints kp1_num = min(self.nfeatures, len(kp1)) kp2_num = min(self.nfeatures, len(kp2)) kp1 = kp1[:kp1_num] kp2 = kp2[:kp2_num] kp1_np = np.array([(kp[0], kp[1]) for kp in kp1]) kp2_np = np.array([(kp[0], kp[1]) for kp in kp2]) # skip this image pair if no keypoints detected in image # 不到10个特征点也跳过此图片对 if len(kp1) < 10 or len(kp2) < 10: return { 'keypoints0': torch.zeros([0, 0, 2], dtype=torch.double), 'keypoints1': torch.zeros([0, 0, 2], dtype=torch.double), 'descriptors0': torch.zeros([0, 2], dtype=torch.double), 'descriptors1': torch.zeros([0, 2], dtype=torch.double), 'image0': image1, 'image1': image2, 'file_name': '' } # confidence of each key point scores1_np = np.array([kp[2] for kp in kp1]) scores2_np = np.array([kp[2] for kp in kp2]) kp1_np = kp1_np[:kp1_num, :] kp2_np = kp2_np[:kp2_num, :] descs1 = descs1[:kp1_num, :] descs2 = descs2[:kp2_num, :] kp1_np = kp1_np.reshape((1, -1, 2)) kp2_np = kp2_np.reshape((1, -1, 2)) descs1 = np.transpose(descs1 / 256.) descs2 = np.transpose(descs2 / 256.) image1 = torch.from_numpy(image1 / 255.).double()[None].cuda() image2 = torch.from_numpy(image2 / 255.).double()[None].cuda() # print(image1.shape, image2.shape, depth1.shape, depth2.shape) # 根据10元组和keypoints,得到所有匹配,按SuperGlue的输入要求返回结果 # image1, depth1, intrinsics1, pose1, bbox1 # image2, depth2, intrinsics2, pose2, bbox2 # depth: (256, 256), intrinsics: (3, 3), pose: (4, 4), bbox: (2) # 例子:all_matches = list(np.array([[0],[0]])) try: all_matches = self.compute_all_matches(kp1_np, original_image1, depth1, intrinsics1, pose1, bbox1, kp2_np, original_image2, depth2, intrinsics2, pose2, bbox2) except EmptyTensorError: return { 'keypoints0': torch.zeros([0, 0, 2], dtype=torch.double), 'keypoints1': torch.zeros([0, 0, 2], dtype=torch.double), 'descriptors0': torch.zeros([0, 2], dtype=torch.double), 'descriptors1': torch.zeros([0, 2], dtype=torch.double), 'image0': image1, 'image1': image2, 'file_name': '' } # print(kp1_np.shape, kp2_np.shape, len(all_matches[0])) return { 'keypoints0': list(kp1_np), 'keypoints1': list(kp2_np), 'descriptors0': list(descs1), 'descriptors1': list(descs2), 'scores0': list(scores1_np), 'scores1': list(scores2_np), 'image0': image1, 'image1': image2, 'all_matches': all_matches, 'file_name': '' } # SuperGlue要的返回值 '''