def estimate_pose(self, query_image_info, candidate_image_info): query_features = query_image_info["features"] candidate_features = candidate_image_info["features"] keypoints0 = torch.stack(candidate_features["keypoints"]) keypoints1 = torch.stack(query_features["keypoints"]) data = { "descriptors0": torch.stack(candidate_features["descriptors"]).to(self.device_), "keypoints0": keypoints0.to(self.device_), "scores0": torch.stack(candidate_features["scores"]).to(self.device_), "descriptors1": torch.stack(query_features["descriptors"]).to(self.device_), "keypoints1": keypoints1.to(self.device_), "scores1": torch.stack(query_features["scores"]).to(self.device_), "image_shape": (1, 1, self.resolution_, self.resolution_), } with torch.no_grad(): matching_result = self.superglue_(data) kpts0 = keypoints0[0].cpu().numpy() kpts1 = keypoints1[0].cpu().numpy() matches = matching_result['matches0'][0].cpu().numpy() valid = matches > -1 mkpts0 = kpts0[valid] mkpts1 = kpts1[matches[valid]] target_kpts_in_meters = pts_from_pixel_to_meter( mkpts0, self.meters_per_pixel_) source_kpts_in_meters = pts_from_pixel_to_meter( mkpts1, self.meters_per_pixel_) T_target_source, score = compute_relative_pose_with_ransac( target_kpts_in_meters, source_kpts_in_meters) return T_target_source, score
def verify(): """ This function verify if the keypoints in from superpoint+superglue are correctly labelled by ground truth relative pose """ images_dir = os.path.join(args.dataset_dir, args.sequence) images_info = make_images_info( struct_filename=os.path.join(args.dataset_dir, 'struct_file_' + args.sequence + '.txt')) dataset = SuperglueDataset( images_info=images_info, images_dir=images_dir, positive_search_radius=args.positive_search_radius, meters_per_pixel=args.meters_per_pixel) data_loader = DataLoader(dataset, batch_size=1, shuffle=True) saved_model_file = os.path.join(args.saved_model_path, 'superglue-lidar-birdview.pth.tar') config = { 'superpoint': { 'nms_radius': 4, 'keypoint_threshold': 0.005, 'max_keypoints': 200, }, 'Superglue': { 'weights': 'outdoor', 'sinkhorn_iterations': 100, 'match_threshold': 0.2, } } model = Matching(config) model_checkpoint = torch.load(saved_model_file, map_location=lambda storage, loc: storage) model.load_state_dict(model_checkpoint) print("Loaded model checkpoints from \'{}\'.".format(saved_model_file)) device = torch.device( 'cuda' if torch.cuda.is_available() and args.use_gpu else 'cpu') model.to(device) torch.set_grad_enabled(False) for target, source, T_target_source in data_loader: # iteration += 1 assert (source.shape == target.shape) B, C, W, H = source.shape target = target.to(device) source = source.to(device) pred = model({'image0': target, 'image1': source}) target_kpts = pred['keypoints0'][0].cpu() source_kpts = pred['keypoints1'][0].cpu() if len(target_kpts) == 0 or len(source_kpts) == 0: continue # in superglue/numpy/tensor the coordinates are (i,j) which correspond to (v,u) in PIL Image/opencv target_kpts_in_meters = target_kpts * args.meters_per_pixel - 50 source_kpts_in_meters = source_kpts * args.meters_per_pixel - 50 match_mask_ground_truth = make_ground_truth_matrix( target_kpts_in_meters, source_kpts_in_meters, T_target_source[0], args.tolerance_in_meters) target_image_raw = target[0][0].cpu().numpy() source_image_raw = source[0][0].cpu().numpy() target_image_raw = np.stack([target_image_raw] * 3, -1) * 30 source_image_raw = np.stack([source_image_raw] * 3, -1) * 30 cv2.imshow('target_image_raw', target_image_raw) # target_kpts = np.round(target_kpts.numpy()).astype(int) T_target_source = T_target_source[0].numpy() source_kpts = source_kpts.numpy() source_kpts_in_meters = pts_from_pixel_to_meter( source_kpts, args.meters_per_pixel) print('T_target_source:\n', T_target_source) source_kpts_in_meters_in_target_img = [ (T_target_source[:3, :3] @ np.array( [source_kpt[0], source_kpt[1], 0]) + T_target_source[:3, 3])[:2] for source_kpt in source_kpts_in_meters ] source_kpts_in_meters_in_target_img = np.array( source_kpts_in_meters_in_target_img) source_kpts_in_target_img = pts_from_meter_to_pixel( source_kpts_in_meters_in_target_img, args.meters_per_pixel) source_kpts = np.round(source_kpts).astype(int) source_kpts_in_target_img = np.round(source_kpts_in_target_img).astype( int) target_image_poi = target_image_raw.copy() source_image_poi = source_image_raw.copy() for (x0, y0), (x1, y1) in zip(source_kpts, source_kpts_in_target_img): # c = c.tolist() # cv2.line(target_image, (x0, y0), (x0 + 50, y0 + 50), # color=[255,0,0], thickness=1, lineType=cv2.LINE_AA) # display line end-points as circles cv2.circle(target_image_poi, (x1, y1), 2, (0, 255, 0), 1, lineType=cv2.LINE_AA) cv2.circle(source_image_poi, (x0, y0), 2, (255, 0, 0), 1, lineType=cv2.LINE_AA) # cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1, # lineType=cv2.LINE_AA) cv2.imshow('target_image', target_image_poi) cv2.imshow('source_image', source_image_poi) cv2.waitKey(0) torch.set_grad_enabled(True) pass
def visualize_matching_all(): """ This function visualize the feature point matching pipeline """ images_dir = os.path.join(args.dataset_dir, args.sequence) images_info = make_images_info( struct_filename=os.path.join(args.dataset_dir, 'struct_file_' + args.sequence + '.txt')) dataset = SuperglueDataset( images_info=images_info, images_dir=images_dir, positive_search_radius=args.positive_search_radius, meters_per_pixel=args.meters_per_pixel, return_filename=True) data_loader = DataLoader(dataset, batch_size=1, shuffle=False) saved_model_file = os.path.join(args.saved_model_path, 'spsg-lidar-birdview.pth.tar') config = { 'superpoint': { 'nms_radius': 4, 'keypoint_threshold': 0.005, 'max_keypoints': 200, }, 'Superglue': { 'weights': 'outdoor', 'sinkhorn_iterations': 100, 'match_threshold': 0.1, } } model = Matching(config) model_checkpoint = torch.load(saved_model_file, map_location=lambda storage, loc: storage) model.load_state_dict(model_checkpoint) print("Loaded model checkpoints from \'{}\'.".format(saved_model_file)) device = torch.device( 'cuda' if torch.cuda.is_available() and args.use_gpu else 'cpu') model.to(device) torch.set_grad_enabled(False) for target, source, T_target_source, target_filename, source_filename in data_loader: # iteration += 1 assert (source.shape == target.shape) print(target_filename[0]) print(source_filename[0]) B, C, W, H = source.shape target = target.to(device) source = source.to(device) pred = model({'image0': target, 'image1': source}) target_kpts = pred['keypoints0'][0].cpu() source_kpts = pred['keypoints1'][0].cpu() if len(target_kpts) == 0 or len(source_kpts) == 0: continue # in superglue/numpy/tensor the coordinates are (i,j) which correspond to (v,u) in PIL Image/opencv target_kpts_in_meters = target_kpts * args.meters_per_pixel - 50 source_kpts_in_meters = source_kpts * args.meters_per_pixel - 50 match_mask_ground_truth = make_ground_truth_matrix( target_kpts_in_meters, source_kpts_in_meters, T_target_source[0], args.tolerance_in_meters) target_image_raw = target[0][0].cpu().numpy() source_image_raw = source[0][0].cpu().numpy() target_image_raw = np.stack([target_image_raw] * 3, -1) * 10 source_image_raw = np.stack([source_image_raw] * 3, -1) * 10 cv2.imshow('target_image_raw', target_image_raw) cv2.imshow('source_image_raw', source_image_raw) # target_kpts = np.round(target_kpts.numpy()).astype(int) T_target_source = T_target_source[0].numpy() source_kpts = source_kpts.numpy() target_kpts = target_kpts.numpy() source_kpts_in_meters = pts_from_pixel_to_meter( source_kpts, args.meters_per_pixel) print('T_target_source:\n', T_target_source) source_kpts_in_meters_in_target_img = [ (T_target_source[:3, :3] @ np.array( [source_kpt[0], source_kpt[1], 0]) + T_target_source[:3, 3])[:2] for source_kpt in source_kpts_in_meters ] source_kpts_in_meters_in_target_img = np.array( source_kpts_in_meters_in_target_img) source_kpts_in_target_img = pts_from_meter_to_pixel( source_kpts_in_meters_in_target_img, args.meters_per_pixel) source_kpts = np.round(source_kpts).astype(int) source_kpts_in_target_img = np.round(source_kpts_in_target_img).astype( int) target_image_poi = visualize_poi(target_image_raw.copy(), target_kpts, (0, 1, 0)) source_image_poi = visualize_poi(source_image_raw.copy(), source_kpts, (1, 0, 0)) # target_image_poi = target_image_raw.copy() # source_image_poi = source_image_raw.copy() # for (x0, y0), (x1, y1) in zip(source_kpts, target_kpts): # # c = c.tolist() # # cv2.line(target_image, (x0, y0), (x0 + 50, y0 + 50), # # color=[255,0,0], thickness=1, lineType=cv2.LINE_AA) # # display line end-points as circles # cv2.circle(target_image_poi, (x1, y1), 4, (0, 255, 0), 1, lineType=cv2.LINE_AA) # cv2.circle(source_image_poi, (x0, y0), 4, (255, 0, 0), 1, lineType=cv2.LINE_AA) # # cv2.circle(out, (x1 + margin + W0, y1), 2, c, -1, # # lineType=cv2.LINE_AA) cv2.imshow('target_image_poi', target_image_poi) cv2.imshow('source_image_poi', source_image_poi) matches = pred['matches0'][0].cpu().numpy() valid = matches > -1 target_kpts_matched = target_kpts[valid] source_kpts_matched = source_kpts[matches[valid]] # Matching visualize match_image = visualize_matching(target_image_poi, source_image_poi, target_kpts_matched, source_kpts_matched) W, H = 480, 460 h_margin = 10 v_margin = 10 window_image = np.ones((2 * H + 2 * v_margin, 2 * W + h_margin, 3)) window_image[:H, :(W)] = cv2.resize(target_image_raw, (W, H), cv2.INTER_NEAREST) window_image[:H, -W:] = cv2.resize(source_image_raw, (W, H), cv2.INTER_NEAREST) window_image[H + v_margin:, :] = cv2.resize( match_image, (2 * W + h_margin, H + v_margin), cv2.INTER_NEAREST) cv2.imshow('match_image', match_image) cv2.imshow("window_image", window_image) cv2.waitKey(0) # margin = 10 # match_image = np.ones((H, 2 * W + margin)) # match_image = np.stack([match_image] * 3, -1) # # match_image[:, :W] = target_image_poi # match_image[:, W + margin:] = source_image_poi # # # for (x0, y0), (x1, y1) in zip(target_kpts_matched, source_kpts_matched): # cv2.line(match_image, (x0, y0), (x1 + margin + W, y1), # color=[0.9, 0.9, 0], thickness=1, lineType=cv2.LINE_AA) # # display line end-points as circles # # cv2.circle(match_image, (x0, y0), 2, (0, 255, 0), -1, lineType=cv2.LINE_AA) # # cv2.circle(match_image, (x1 + margin + W, y1), 2, (255, 0, 0), -1, # # lineType=cv2.LINE_AA) torch.set_grad_enabled(True) pass
def pipeline_test(): torch.set_grad_enabled(False) # Define model for embedding base_model = BaseModel(300, 300) net_vlad = NetVLAD(num_clusters=args.num_clusters, dim=256, alpha=1.0, outdim=args.final_dim) model = EmbedNet(base_model, net_vlad) saved_model_file_bevnet = os.path.join(args.saved_model_path, 'model-to-check-top1.pth.tar') model_checkpoint = torch.load(saved_model_file_bevnet, map_location=lambda storage, loc: storage) model.load_state_dict(model_checkpoint) print("Loaded bevnet checkpoints from \'{}\'.".format(saved_model_file_bevnet)) # images_dir = os.path.join(args.dataset_dir, args.sequence) database_images_dir = os.path.join(args.dataset_dir, args.sequence) query_images_dir = os.path.join(args.dataset_dir, args.sequence) database_images_info = query_images_info = make_images_info( struct_filename=os.path.join(args.dataset_dir, 'struct_file_' + args.sequence + '.txt')) # database_images_info, query_images_info = train_test_split(images_info_validate, test_size=0.2, # random_state=10) if args.use_different_sequence: database_images_info = make_images_info( struct_filename=os.path.join(args.dataset_dir, 'struct_file_' + args.sequence_database + '.txt')) query_images_info = make_images_info( struct_filename=os.path.join(args.dataset_dir, 'struct_file_' + args.sequence_query + '.txt')) database_images_dir = os.path.join(args.dataset_dir, args.sequence_database) query_images_dir = os.path.join(args.dataset_dir, args.sequence_query) image_database = ImageDatabase(images_info=database_images_info, images_dir=database_images_dir, model=model, generate_database=True, transforms=input_transforms()) config = { 'superpoint': { 'nms_radius': 4, 'keypoint_threshold': 0.005, 'max_keypoints': -1 }, 'Superglue': { 'weights': 'indoor', 'sinkhorn_iterations': 100, 'match_threshold': 0.2, } } device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') matching = Matching(config).eval().to(device) saved_model_file_superglue = os.path.join(args.saved_model_path, 'superglue-lidar-rotation-invariant.pth.tar') model_checkpoint = torch.load(saved_model_file_superglue, map_location=lambda storage, loc: storage) matching.load_state_dict(model_checkpoint) print("Loaded superglue checkpoints from \'{}\'.".format(saved_model_file_superglue)) translation_errors = [] rotation_errors = [] success_records = [] accumulated_distance = 0 last_T_w_source_gt = None true_count = 0 for query_image_info in tqdm(query_images_info): query_results = image_database.query_image( image_filename=os.path.join(query_images_dir, query_image_info['image_file']), num_results=args.top_k+1) if args.use_different_sequence: # avoid the same image from database query_results = query_results[:args.top_k] else: query_results = query_results[1:args.top_k+1] # print('query_result: \n{}'.format(query_results)) best_score = -1 T_w_source_best = None min_inliers = 20 max_inliers = 30 # min_inliers = 0 # max_inliers = 0 resolution = int(100 / args.meters_per_pixel) for query_result in query_results: target_image = Image.open(os.path.join(database_images_dir, query_result['image_file'])) source_image = Image.open(os.path.join(query_images_dir, query_image_info['image_file'])) target_kpts, source_kpts = superglue_match(target_image, source_image, resolution, matching) target_kpts_in_meters = pts_from_pixel_to_meter(target_kpts, args.meters_per_pixel) source_kpts_in_meters = pts_from_pixel_to_meter(source_kpts, args.meters_per_pixel) T_target_source, score = compute_relative_pose_with_ransac_test(target_kpts_in_meters, source_kpts_in_meters) # T_target_source, score = compute_relative_pose_with_ransac(target_kpts_in_meters, source_kpts_in_meters) # T_target_source, score = compute_relative_pose(target_kpts_in_meters, source_kpts_in_meters), len(target_kpts) if score is None: continue if score > best_score and score > min_inliers: best_score = score # TODO: the way we handle the se3 may be inappropriate T_target_source = np.array([[T_target_source[0,0], T_target_source[0,1], 0, T_target_source[0,2]], [T_target_source[1,0], T_target_source[1,1], 0, T_target_source[1,2]], [0, 0, 1, 0], [0, 0, 0, 1]]) # T_target_source = np.array( # [[1, 0, 0, 0], # [0, 1, 0, 0], # [0, 0, 1, 0], # [0, 0, 0, 1]]) T_w_target = np.hstack([R.from_quat(query_result['orientation'][[1,2,3,0]]).as_matrix(), query_result['position'].reshape(3,1)]) T_w_target = np.vstack([T_w_target, np.array([0,0,0,1])]) T_w_source_best = T_w_target @ T_target_source # print(T_target_source) INVERSE_AUGMENTATION = False if INVERSE_AUGMENTATION: # tf = superglue_input_transforms(args.meters_per_pixel, 180) target_image_inv = TF.rotate(target_image, 180) target_kpts_inv, source_kpts = superglue_match(target_image_inv, source_image, resolution, matching) target_kpts_in_meters_inv = pts_from_pixel_to_meter(target_kpts_inv, args.meters_per_pixel) source_kpts_in_meters = pts_from_pixel_to_meter(source_kpts, args.meters_per_pixel) # T_target_source, score = compute_relative_pose_with_ransac_test(target_kpts_in_meters_inv, # source_kpts_in_meters) T_target_inv_source, score = compute_relative_pose_with_ransac(target_kpts_in_meters_inv, source_kpts_in_meters) # T_target_inv_source, score = compute_relative_pose(target_kpts_in_meters_inv, source_kpts_in_meters), len(target_kpts) if score is None: continue if score > best_score and score > min_inliers: best_score = score T_target_source = np.array( [[-T_target_source[0, 0], -T_target_source[0, 1], 0, -T_target_source[0, 2]], [-T_target_source[1, 0], -T_target_source[1, 1], 0, -T_target_source[1, 2]], [0, 0, 1, 0], [0, 0, 0, 1]]) # T_target_source = np.array( # [[1, 0, 0, 0], # [0, 1, 0, 0], # [0, 0, 1, 0], # [0, 0, 0, 1]]) T_w_target = np.hstack([R.from_quat(query_result['orientation'][[1, 2, 3, 0]]).as_matrix(), query_result['position'].reshape(3, 1)]) T_w_target = np.vstack([T_w_target, np.array([0, 0, 0, 1])]) T_w_source_best = T_w_target @ T_target_source if best_score > max_inliers: break # ground truch pose T_w_source_gt = np.hstack([R.from_quat(query_image_info['orientation'][[1, 2, 3, 0]]).as_matrix(), query_image_info['position'].reshape(3, 1)]) T_w_source_gt = np.vstack([T_w_source_gt, np.array([0, 0, 0, 1])]) # record travelled distance if last_T_w_source_gt is not None: T_last_current = np.linalg.inv(last_T_w_source_gt) @ T_w_source_gt accumulated_distance += np.sqrt(T_last_current[:3,3] @ T_last_current[:3,3]) last_T_w_source_gt = T_w_source_gt if T_w_source_best is not None: delta_T_w_source = np.linalg.inv(T_w_source_best) @ T_w_source_gt delta_translation = np.sqrt(delta_T_w_source[:3,3] @ delta_T_w_source[:3,3]) delta_degree = np.arccos(min(1, 0.5 * (np.trace(delta_T_w_source[:3,:3]) - 1))) / np.pi * 180 print('Translation error: {}'.format(delta_translation)) print('Rotation error: {}'.format(delta_degree)) translation_errors.append(delta_translation) rotation_errors.append(delta_degree) success_records.append((accumulated_distance, True)) else: print('Global localization failed.') success_records.append((accumulated_distance, False)) pass # translation_errors.append(float('nan')) # print('accumulated_distance', accumulated_distance) translation_errors = np.array(translation_errors) rotation_errors = np.array(rotation_errors) print('Mean translation error: {}'.format(translation_errors.mean())) for r in [0.1, 0.2, 0.3, 0.5, 0.8, 1.0, 2, 3, 4, 5, 6, 7, 8, 9, 10]: print('Percentage of translation errors under {} m: {}'.format(r, (translation_errors<r).sum() / len(translation_errors))) for theta in [1.0, 2, 3, 4, 5, 6, 7, 8, 9, 10]: print('Percentage of rotation errors under {} degrees: {}'.format(theta, (rotation_errors<theta).sum() / len(rotation_errors))) plt.scatter(np.linspace(0, 50, num=len(translation_errors)), np.array(translation_errors)) plt.show() travelled_distances = [0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 2, 3, 4, 5, 6, 8, 10, 15, 20, 25, 30, 35, 40, 45, 50] probabilities = [] for thres_distance in travelled_distances: probabilities.append(localization_probability(accumulated_distance, np.array(success_records), thres_distance)) plt.plot(travelled_distances, probabilities, lw=1) # plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label="Luck") plt.xlabel("travelled distance") plt.ylabel("probabilities") plt.show() translation_errors = translation_errors[~np.isnan(translation_errors)] rotation_errors = rotation_errors[~np.isnan(rotation_errors)] trans_err_avg = translation_errors.mean() trans_err_std = translation_errors - trans_err_avg trans_err_std = np.sqrt((trans_err_std * trans_err_std).mean()) print("average translation error: {}".format(trans_err_avg)) print("standard deviation of translation error: {}".format(trans_err_std)) rotation_err_avg = rotation_errors.mean() rotation_err_std = rotation_errors - rotation_err_avg rotation_err_std = np.sqrt((rotation_err_std * rotation_err_std).mean()) print("average rotation_errors error: {}".format(rotation_err_avg)) print("standard deviation of rotation_errors error: {}".format(rotation_err_std)) print("recall: {}".format(len(translation_errors) / len(query_images_info))) pass
def validate(epoch, model, data_loader, viz_validate=None): torch.set_grad_enabled(False) iteration = (epoch - 1) * len(data_loader) accum_accuracy = 0 accum_recall = 0 accum_precision = 0 accum_true_pairs = 0 count_accumulate = 0 overall_detection = 0 overall_recall = 0 overall_precision = 0 overall_true_pairs = 0 overall_count = 0 device = torch.device("cuda" if args.use_gpu else "cpu") with tqdm(data_loader) as tq: for target, source, T_target_source in tq: iteration += 1 assert (target.shape == source.shape) B, C, W, H = target.shape target = target.to(device) source = source.to(device) pred = model({'image0': target, 'image1': source}) # comment for computation cose evaluation target_kpts = pred['keypoints0'][0].cpu() source_kpts = pred['keypoints1'][0].cpu() if len(target_kpts) == 0 or len(source_kpts) == 0: continue # in superglue/numpy/tensor the coordinates are (i,j) which correspond to (v,u) in PIL Image/opencv target_kpts_in_meters = pts_from_pixel_to_meter( target_kpts, args.meters_per_pixel) source_kpts_in_meters = pts_from_pixel_to_meter( source_kpts, args.meters_per_pixel) match_mask_ground_truth = make_ground_truth_matrix( target_kpts_in_meters, source_kpts_in_meters, T_target_source[0], args.tolerance_in_meters) # print(match_mask_ground_truth[:-1,:-1].sum()) # match_mask_ground_truth # matches = pred['matches0'][0].cpu().numpy() # confidence = pred['matching_scores0'][0].cpu().detach().numpy() if match_mask_ground_truth[:-1, :-1].sum() > 0 and ( pred['matches0'] > 0).sum() > 0 and (pred['matches1'] > 0).sum() > 0: metrics = compute_metrics(pred['matches0'], pred['matches1'], match_mask_ground_truth) accum_accuracy += float(metrics['matches0_acc']) accum_recall += float(metrics['matches0_recall']) accum_precision += float(metrics['matches0_precision']) accum_true_pairs += match_mask_ground_truth[:-1, :-1].sum() count_accumulate += 1 overall_recall += float(metrics['matches0_recall']) overall_precision += float(metrics['matches0_precision']) overall_true_pairs += match_mask_ground_truth[:-1, :-1].sum() overall_detection += (len(target_kpts) + len(source_kpts)) / 2 overall_count += 1 if iteration % 50 == 0: print("accuracy: {}".format(accum_accuracy / 50)) print("precision: {}".format(accum_precision / 50)) print("recall: {}".format(accum_recall / 50)) print("true pairs: {}".format(accum_true_pairs / 50)) if viz_validate is not None: viz_validate['viz'].scatter( X=np.array( [[iteration, accum_precision / count_accumulate]]), name="validate-precision", win=viz_validate['validate_precision'], update="append") viz_validate['viz'].scatter( X=np.array( [[iteration, accum_recall / count_accumulate]]), name="validate-recall", win=viz_validate['validate_recall'], update="append") viz_validate['viz'].scatter( X=np.array( [[iteration, accum_true_pairs / count_accumulate]]), name="validate-true-pairs", win=viz_validate['validate_true_pairs'], update="append") # print('Cuda memory allocated:', torch.cuda.memory_allocated() / 1024 ** 2, "MB") # print('Cuda memory cached:', torch.cuda.memory_reserved() / 1024 ** 2, "MB") accum_accuracy = 0 accum_recall = 0 accum_precision = 0 accum_true_pairs = 0 count_accumulate = 0 del target, source torch.cuda.empty_cache() torch.set_grad_enabled(True) print("average recall: {}".format(overall_recall / overall_count)) print("average precision: {}".format(overall_precision / overall_count)) print("average true pairs: {}".format(overall_true_pairs / overall_count)) print("average detected points: {}".format(overall_detection / overall_count))
def train(epoch, model, optimizer, data_loader, viz_train=None): print("Processing epoch {} ......".format(epoch)) # epoch_loss = 0 accum_loss = 0 accum_accuracy = 0 accum_recall = 0 accum_precision = 0 accum_true_pairs = 0 print_results_period = 20 count_accumulate = 0 iteration = (epoch - 1) * len(data_loader) model.train() device = torch.device("cuda" if args.use_gpu else "cpu") # device = torch.device("cpu") model.to(device) # criterion = nn.TripletMarginLoss(margin=args.margin ** 0.5, p=2, reduction='sum') with tqdm(data_loader) as tq: for targets, sources, T_target_sources in tq: iteration += 1 optimizer.zero_grad() batch_loss = None for target, source, T_target_source in zip(targets, sources, T_target_sources): assert (target.shape == source.shape) C, W, H = target.shape target = target[None, ...].to(device) source = source[None, ...].to(device) pred = model({'image0': target, 'image1': source}) target_kpts = pred['keypoints0'][0].cpu() source_kpts = pred['keypoints1'][0].cpu() if len(target_kpts) == 0 or len(source_kpts) == 0: continue # in superglue/numpy/tensor the coordinates are (u,v) which correspond to (y,x) target_kpts_in_meters = pts_from_pixel_to_meter( target_kpts, args.meters_per_pixel) source_kpts_in_meters = pts_from_pixel_to_meter( source_kpts, args.meters_per_pixel) match_mask_ground_truth = make_ground_truth_matrix( target_kpts_in_meters, source_kpts_in_meters, T_target_source, args.tolerance_in_meters) # DEBUG: # N, D = source_kpts.shape # T_target_source = T_target_source[0] # source_kpts_in_meters = torch.cat([source_kpts_in_meters, torch.zeros(N, 1)], dim=1) # # source_kpts_in_meters_in_target_img = source_kpts_in_meters @ \ # (T_target_source[0:3, 0:3].transpose(1, 0).float()) + T_target_source[0:3, 3] # source_kpts_in_meters_in_target_img = source_kpts_in_meters_in_target_img[:,:2] # source_kpts_in_target_img = pts_from_meter_to_pixel(source_kpts_in_meters_in_target_img, # args.meters_per_pixel) # # source_kpts = np.round(source_kpts.numpy()).astype(int) # source_kpts_in_target_img = np.round(source_kpts_in_target_img.numpy()).astype(int) # # target_image = target[0][0].cpu().numpy() # source_image = source[0][0].cpu().numpy() # target_image = np.stack([target_image] * 3, -1) * 5 # source_image = np.stack([source_image] * 3, -1) * 5 # # for (x0, y0), (x1, y1) in zip(source_kpts, source_kpts_in_target_img): # cv2.circle(source_image, (x0, y0), 2, (0, 255, 0), 1, lineType=cv2.LINE_AA) # cv2.circle(target_image, (x1, y1), 2, (255, 0, 0), 1, lineType=cv2.LINE_AA) # # cv2.imshow('target_image', target_image) # cv2.imshow('source_image', source_image) # # cv2.waitKey(0) # End of DEBUG # print(match_mask_ground_truth[:-1,:-1].sum()) # match_mask_ground_truth # matches = pred['matches0'][0].cpu().numpy() # confidence = pred['matching_scores0'][0].cpu().detach().numpy() # loss = ... loss = -pred['scores'][0] * match_mask_ground_truth.to(device) loss = loss.sum() if batch_loss is None: batch_loss = loss else: batch_loss += loss # record training loss if match_mask_ground_truth[:-1, :-1].sum() > 0 and ( pred['matches0'] > 0).sum() > 0 and (pred['matches1'] > 0).sum() > 0: metrics = compute_metrics(pred['matches0'], pred['matches1'], match_mask_ground_truth) accum_accuracy += float(metrics['matches0_acc']) accum_recall += float(metrics['matches0_recall']) accum_precision += float(metrics['matches0_precision']) accum_true_pairs += match_mask_ground_truth[:-1, :-1].sum() count_accumulate += 1 accum_loss += loss.item() batch_loss.backward() optimizer.step() accum_loss += batch_loss.item() # accum_accuracy /= args.batch_size # accum_recall /= args.batch_size # accum_precision /= args.batch_size # accum_true_pairs /= args.batch_size if iteration % print_results_period == 0: print("loss: {}".format(accum_loss / print_results_period / args.batch_size)) print("accuracy: {}".format(accum_accuracy / count_accumulate)) print("precision: {}".format(accum_precision / count_accumulate)) print("recall: {}".format(accum_recall / count_accumulate)) print("true pairs: {}".format(accum_true_pairs / count_accumulate)) if viz_train is not None: viz_train['viz'].scatter(X=np.array([[ iteration, float(accum_loss / print_results_period / args.batch_size) ]]), name="train-loss", win=viz_train['train_loss'], update="append") viz_train['viz'].scatter(X=np.array( [[iteration, accum_precision / count_accumulate]]), name="train-precision", win=viz_train['train_precision'], update="append") viz_train['viz'].scatter(X=np.array( [[iteration, accum_recall / count_accumulate]]), name="train-recall", win=viz_train['train_recall'], update="append") viz_train['viz'].scatter(X=np.array( [[iteration, accum_true_pairs / count_accumulate]]), name="train-true-pairs", win=viz_train['train_true_pairs'], update="append") # print('Cuda memory allocated:', torch.cuda.memory_allocated() / 1024 ** 2, "MB") # print('Cuda memory cached:', torch.cuda.memory_reserved() / 1024 ** 2, "MB") accum_loss = 0 accum_accuracy = 0 accum_recall = 0 accum_precision = 0 accum_true_pairs = 0 count_accumulate = 0 del target, source torch.cuda.empty_cache()
def validate_detector(detector, data_loader): accum_accuracy = 0 accum_recall = 0 accum_precision = 0 accum_true_pairs = 0 count_accumulate = 0 overall_recall = 0 overall_precision = 0 overall_true_pairs = 0 overall_count = 0 device = torch.device("cuda" if args.use_gpu else "cpu") with tqdm(data_loader) as tq: for target, source, T_target_source in tq: assert (target.shape == source.shape) B, C, W, H = target.shape assert (B == 1 and C == 1) target = (target.squeeze().numpy() * 255).astype("uint8") source = (source.squeeze().numpy() * 255).astype("uint8") target_kpts, target_descs = detector.detectAndCompute(target, None) source_kpts, source_descs = detector.detectAndCompute(source, None) target_kpts = torch.Tensor(np.array([kp.pt for kp in target_kpts])) source_kpts = torch.Tensor(np.array([kp.pt for kp in source_kpts])) if len(target_kpts) == 0 or len(source_kpts) == 0: continue # bf = cv2.BFMatcher() # matches = bf.knnMatch(source_descs, target_descs, k=2) bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=False) # Match descriptors. matches = bf.match(source_descs, target_descs) # good = # Apply ratio test good = [[m.trainIdx, m.queryIdx] for m in matches] # for m, n in matches: # if m.distance < 0.9 * n.distance: # good.append([m.trainIdx, m.queryIdx]) good = np.array(good) # in superglue/numpy/tensor the coordinates are (i,j) which correspond to (v,u) in PIL Image/opencv target_kpts_in_meters = pts_from_pixel_to_meter( target_kpts, args.meters_per_pixel) source_kpts_in_meters = pts_from_pixel_to_meter( source_kpts, args.meters_per_pixel) match_mask_ground_truth = make_ground_truth_matrix( target_kpts_in_meters, source_kpts_in_meters, T_target_source[0], args.tolerance_in_meters) if len(good) == 0 or match_mask_ground_truth[:-1, :-1].sum() == 0: continue def compute_metrics(matches, ground_truth_mask): TP = 0 for target_id, source_id in matches: if ground_truth_mask[target_id, source_id] > 0: TP += 1 precision = TP / len(matches) recall = TP / match_mask_ground_truth[:-1, :-1].sum() return precision, recall precision, recall = compute_metrics(good, match_mask_ground_truth) print("precision: {}".format(precision)) print("recall: {}".format(recall)) print("true pairs: {}".format( match_mask_ground_truth[:-1, :-1].sum())) overall_recall += recall overall_precision += precision overall_true_pairs += match_mask_ground_truth[:-1, :-1].sum() overall_count += 1 # matches = pred['matches0'][0].cpu().numpy() # confidence = pred['matching_scores0'][0].cpu().detach().numpy() # if match_mask_ground_truth[:-1, :-1].sum() > 0 and (pred['matches0'] > 0).sum() > 0 and ( # pred['matches1'] > 0).sum() > 0: # metrics = compute_metrics(pred['matches0'], pred['matches1'], match_mask_ground_truth) # # accum_accuracy += float(metrics['matches0_acc']) # accum_recall += float(metrics['matches0_recall']) # accum_precision += float(metrics['matches0_precision']) # accum_true_pairs += match_mask_ground_truth[:-1, :-1].sum() # count_accumulate += 1 # # overall_recall += float(metrics['matches0_recall']) # overall_precision += float(metrics['matches0_precision']) # overall_true_pairs += match_mask_ground_truth[:-1, :-1].sum() # overall_count += 1 print("average precision: {}".format(overall_precision / overall_count)) print("average recall: {}".format(overall_recall / overall_count)) print("average true pairs: {}".format(overall_true_pairs / overall_count)) pass
def pipeline_test(): torch.set_grad_enabled(False) # Define model for embedding base_model = BaseModel(300, 300) net_vlad = NetVLAD(num_clusters=args.num_clusters, dim=256, alpha=1.0, outdim=args.final_dim) model = EmbedNet(base_model, net_vlad) saved_model_file_spinetvlad = os.path.join(args.saved_model_path, 'model-to-check-top1.pth.tar') model_checkpoint = torch.load(saved_model_file_spinetvlad, map_location=lambda storage, loc: storage) model.load_state_dict(model_checkpoint) print("Loaded spinetvlad checkpoints from \'{}\'.".format( saved_model_file_spinetvlad)) # images_dir = os.path.join(args.dataset_dir, args.sequence) database_images_dir = os.path.join(args.dataset_dir, args.sequence) query_images_dir = os.path.join(args.dataset_dir, args.sequence) database_images_info = query_images_info = make_images_info( struct_filename=os.path.join(args.dataset_dir, 'struct_file_' + args.sequence + '.txt')) # database_images_info, query_images_info = train_test_split(images_info_validate, test_size=0.2, # random_state=10) if args.use_different_sequence: database_images_info = make_images_info(struct_filename=os.path.join( args.dataset_dir, 'struct_file_' + args.sequence_database + '.txt')) query_images_info = make_images_info(struct_filename=os.path.join( args.dataset_dir, 'struct_file_' + args.sequence_query + '.txt')) database_images_dir = os.path.join(args.dataset_dir, args.sequence_database) query_images_dir = os.path.join(args.dataset_dir, args.sequence_query) image_database = ImageDatabase(images_info=database_images_info, images_dir=database_images_dir, model=model, generate_database=True, transforms=input_transforms()) config = { 'superpoint': { 'nms_radius': 4, 'keypoint_threshold': 0.005, 'max_keypoints': -1 }, 'Superglue': { 'weights': 'indoor', 'sinkhorn_iterations': 100, 'match_threshold': 0.2, } } device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') matching = Matching(config).eval().to(device) saved_model_file_superglue = os.path.join( args.saved_model_path, 'spsg-rotation-invariant.pth.tar') # saved_model_file_superglue = os.path.join(args.saved_model_path, 'superglue-juxin.pth.tar') model_checkpoint = torch.load(saved_model_file_superglue, map_location=lambda storage, loc: storage) matching.load_state_dict(model_checkpoint) print("Loaded superglue checkpoints from \'{}\'.".format( saved_model_file_superglue)) translation_errors = [] rotation_errors = [] success_records = [] accumulated_distance = 0 last_T_w_source_gt = None true_count = 0 rospy.init_node('global_localization', anonymous=False) pose_publisher = rospy.Publisher('query_spi_pose', PoseStamped, queue_size=10) gt_pose_publisher = rospy.Publisher('gt_query_spi_pose', PoseStamped, queue_size=10) position_offset = np.array([851, -332, 1204]) T_offset_w = None path_estimation_publisher = rospy.Publisher('query_spi_path', Path, queue_size=10) path_gt_publisher = rospy.Publisher('gt_spi_path', Path, queue_size=10) path_estimated = Path() path_gt = Path() path_estimated.header.frame_id = 'velodyne' path_gt.header.frame_id = 'velodyne' for query_image_info in tqdm(query_images_info): query_results = image_database.query_image(image_filename=os.path.join( query_images_dir, query_image_info['image_file']), num_results=args.top_k + 1) if args.use_different_sequence: # avoid the same image from database query_results = query_results[:args.top_k] else: query_results = query_results[1:args.top_k + 1] # print('query_result: \n{}'.format(query_results)) best_score = -1 T_w_source_best = None target_image_best = None min_inliers = 20 max_inliers = 30 # min_inliers = 0 # max_inliers = 0 resolution = int(100 / args.meters_per_pixel) source_image = Image.open( os.path.join(query_images_dir, query_image_info['image_file'])) for query_result in query_results: target_image = Image.open( os.path.join(database_images_dir, query_result['image_file'])) # source_image_displayed = cv2 target_kpts, source_kpts, raw_target_kpts, raw_source_kpts = superglue_match( target_image, source_image, resolution, matching) target_kpts_in_meters = pts_from_pixel_to_meter( target_kpts, args.meters_per_pixel) source_kpts_in_meters = pts_from_pixel_to_meter( source_kpts, args.meters_per_pixel) # print("len of target_kpts_in_meters:", len(target_kpts_in_meters)) T_target_source, score, matches = compute_relative_pose_with_ransac_test( target_kpts_in_meters, source_kpts_in_meters, output_matches=True) # T_target_source, score = compute_relative_pose_with_ransac(target_kpts_in_meters, source_kpts_in_meters) # T_target_source, score = compute_relative_pose(target_kpts_in_meters, source_kpts_in_meters), len(target_kpts) if score is None: continue if score > best_score: target_image_best = target_image best_target_kpts, best_source_kpts, best_raw_target_kpts, best_raw_source_kpts = target_kpts, source_kpts, raw_target_kpts, raw_source_kpts if score > best_score and score > min_inliers and best_score < max_inliers: best_score = score # TODO: the way we handle the se3 may be inappropriate T_target_source = np.array([[ T_target_source[0, 0], T_target_source[0, 1], 0, T_target_source[0, 2] ], [ T_target_source[1, 0], T_target_source[1, 1], 0, T_target_source[1, 2] ], [0, 0, 1, 0], [0, 0, 0, 1]]) # T_target_source = np.array( # [[1, 0, 0, 0], # [0, 1, 0, 0], # [0, 0, 1, 0], # [0, 0, 0, 1]]) T_w_target = np.hstack([ R.from_quat(query_result['orientation'][[1, 2, 3, 0]]).as_matrix(), query_result['position'].reshape(3, 1) ]) T_w_target = np.vstack([T_w_target, np.array([0, 0, 0, 1])]) T_w_source_best = T_w_target @ T_target_source # print(T_target_source) INVERSE_AUGMENTATION = False if INVERSE_AUGMENTATION: # tf = superglue_input_transforms(args.meters_per_pixel, 180) target_image_inv = TF.rotate(target_image, 180) target_kpts_inv, source_kpts, _, _ = superglue_match( target_image_inv, source_image, resolution, matching) target_kpts_in_meters_inv = pts_from_pixel_to_meter( target_kpts_inv, args.meters_per_pixel) source_kpts_in_meters = pts_from_pixel_to_meter( source_kpts, args.meters_per_pixel) # T_target_source, score = compute_relative_pose_with_ransac_test(target_kpts_in_meters_inv, # source_kpts_in_meters) T_target_inv_source, score = compute_relative_pose_with_ransac( target_kpts_in_meters_inv, source_kpts_in_meters) # T_target_inv_source, score = compute_relative_pose(target_kpts_in_meters_inv, source_kpts_in_meters), len(target_kpts) if score is None: continue if score > best_score and score > min_inliers: best_score = score # Since the target image is rotated by 180 degrees, its pose is rotated in the same manner T_target_source = np.array([[ -T_target_source[0, 0], -T_target_source[0, 1], 0, -T_target_source[0, 2] ], [ -T_target_source[1, 0], -T_target_source[1, 1], 0, -T_target_source[1, 2] ], [0, 0, 1, 0], [0, 0, 0, 1]]) # T_target_source = np.array( # [[1, 0, 0, 0], # [0, 1, 0, 0], # [0, 0, 1, 0], # [0, 0, 0, 1]]) T_w_target = np.hstack([ R.from_quat( query_result['orientation'][[1, 2, 3, 0]]).as_matrix(), query_result['position'].reshape(3, 1) ]) T_w_target = np.vstack( [T_w_target, np.array([0, 0, 0, 1])]) T_w_source_best = T_w_target @ T_target_source if best_score > max_inliers: break # display raw query spi query_image = np.array(source_image) / 255 * 10 query_image = cv2.resize(query_image, (resolution, resolution), cv2.INTER_NEAREST) # cv2.imshow("query SPI", query_image) # display raw candidate spi candidate_image = np.array(target_image_best) / 255 * 10 candidate_image = cv2.resize(candidate_image, (resolution, resolution), cv2.INTER_NEAREST) # cv2.imshow("database SPI", candidate_image) query_image = np.stack([query_image] * 3, -1) candidate_image = np.stack([candidate_image] * 3, -1) # display query spi with features query_image_with_poi = visualize_poi(query_image.copy(), best_raw_source_kpts, color=(255, 0, 0)) # cv2.imshow("query spi with features", query_image_with_poi) # display candidate spi with features candidate_image_with_poi = visualize_poi(candidate_image.copy(), best_raw_target_kpts, color=(0, 255, 0)) # cv2.imshow("candidate spi with features", candidate_image_with_poi) # display matching image match_image = visualize_matching(query_image_with_poi, candidate_image_with_poi, best_source_kpts, best_target_kpts, matches, threshold=min_inliers) # cv2.imshow("matching result", match_image) # display all images inside a window W, H = 480, 460 h_margin = 10 v_margin = 10 window_image = np.ones((2 * H + 2 * v_margin, 2 * W + h_margin, 3)) window_image[:H, :(W)] = cv2.resize(query_image, (W, H), cv2.INTER_NEAREST) window_image[:H, -W:] = cv2.resize(candidate_image, (W, H), cv2.INTER_NEAREST) window_image[H + v_margin:, :] = cv2.resize( match_image, (2 * W + h_margin, H + v_margin), cv2.INTER_NEAREST) cv2.imshow("LiDAR global localization using SPI", window_image) cv2.waitKey(1) # ground truch pose T_w_source_gt = np.hstack([ R.from_quat(query_image_info['orientation'][[1, 2, 3, 0]]).as_matrix(), query_image_info['position'].reshape(3, 1) ]) T_w_source_gt = np.vstack([T_w_source_gt, np.array([0, 0, 0, 1])]) if T_offset_w is None: T_offset_w = np.linalg.inv(T_w_source_gt) # record travelled distance if last_T_w_source_gt is not None: T_last_current = np.linalg.inv(last_T_w_source_gt) @ T_w_source_gt accumulated_distance += np.sqrt( T_last_current[:3, 3] @ T_last_current[:3, 3]) last_T_w_source_gt = T_w_source_gt if T_w_source_best is not None: delta_T_w_source = np.linalg.inv(T_w_source_best) @ T_w_source_gt delta_translation = np.sqrt( delta_T_w_source[:3, 3] @ delta_T_w_source[:3, 3]) delta_degree = np.arccos( min(1, 0.5 * (np.trace(delta_T_w_source[:3, :3]) - 1))) / np.pi * 180 print('Translation error: {}'.format(delta_translation)) print('Rotation error: {}'.format(delta_degree)) translation_errors.append(delta_translation) rotation_errors.append(delta_degree) success_records.append((accumulated_distance, True)) # publish estimated pose and path msg = PoseStamped() msg.header.stamp = rospy.Time.now() msg.header.frame_id = 'velodyne' T_offset_source_best = T_offset_w @ T_w_source_best msg.pose.position.x, msg.pose.position.y, msg.pose.position.z = T_offset_source_best[: 3, 3] quaternion = R.from_matrix(T_offset_source_best[:3, :3]).as_quat() msg.pose.orientation.x, msg.pose.orientation.y, msg.pose.orientation.z, msg.pose.orientation.w = quaternion pose_publisher.publish(msg) path_estimated.poses.append(msg) path_estimated.header.stamp = rospy.Time.now() path_estimation_publisher.publish(path_estimated) else: print('Global localization failed.') success_records.append((accumulated_distance, False)) pass # publish ground truth pose and path gt_msg = PoseStamped() gt_msg.header.stamp = rospy.Time.now() gt_msg.header.frame_id = 'velodyne' T_offset_source_gt = T_offset_w @ T_w_source_gt gt_msg.pose.position.x, gt_msg.pose.position.y, gt_msg.pose.position.z = T_offset_source_gt[: 3, 3] quaternion = R.from_matrix(T_offset_source_gt[:3, :3]).as_quat() gt_msg.pose.orientation.x, gt_msg.pose.orientation.y, gt_msg.pose.orientation.z, gt_msg.pose.orientation.w = quaternion gt_pose_publisher.publish(gt_msg) path_gt.poses.append(gt_msg) path_gt.header.stamp = rospy.Time.now() path_gt_publisher.publish(path_gt) # translation_errors.append(float('nan')) # print('accumulated_distance', accumulated_distance) translation_errors = np.array(translation_errors) rotation_errors = np.array(rotation_errors) print('Mean translation error: {}'.format(translation_errors.mean())) for r in [0.1, 0.2, 0.3, 0.5, 0.8, 1.0, 2, 3, 4, 5, 6, 7, 8, 9, 10]: print('Percentage of translation errors under {} m: {}'.format( r, (translation_errors < r).sum() / len(translation_errors))) for theta in [1.0, 2, 3, 4, 5, 6, 7, 8, 9, 10]: print('Percentage of rotation errors under {} degrees: {}'.format( theta, (rotation_errors < theta).sum() / len(rotation_errors))) plt.scatter( np.linspace(0, len(translation_errors), num=len(translation_errors)), np.array(translation_errors)) plt.xlabel("SPI id") plt.ylabel("translation error") plt.show() travelled_distances = [ 0.2, 0.4, 0.6, 0.8, 1.0, 1.5, 2, 3, 4, 5, 6, 8, 10, 15, 20, 25, 30, 35, 40, 45, 50 ] probabilities = [] for thres_distance in travelled_distances: probabilities.append( localization_probability(accumulated_distance, np.array(success_records), thres_distance)) plt.plot(travelled_distances, probabilities, lw=1) # plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label="Luck") plt.xlabel("travelled distance") plt.ylabel("probabilities") # plt.show() translation_errors = translation_errors[~np.isnan(translation_errors)] rotation_errors = rotation_errors[~np.isnan(rotation_errors)] trans_err_avg = translation_errors.mean() trans_err_std = translation_errors - trans_err_avg trans_err_std = np.sqrt((trans_err_std * trans_err_std).mean()) print("average translation error: {}".format(trans_err_avg)) print("standard deviation of translation error: {}".format(trans_err_std)) rotation_err_avg = rotation_errors.mean() rotation_err_std = rotation_errors - rotation_err_avg rotation_err_std = np.sqrt((rotation_err_std * rotation_err_std).mean()) print("average rotation_errors error: {}".format(rotation_err_avg)) print("standard deviation of rotation_errors error: {}".format( rotation_err_std)) print("recall: {}".format( len(translation_errors) / len(query_images_info))) pass