def main(): args = parser.parse_args() from kitti_eval.exp_mask_utils import test_framework_KITTI as test_framework weights = torch.load(args.pretrained_posenet, map_location=lambda storage, loc: storage) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=True).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) framework = test_framework(dataset_dir, args.sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() predictions_array = np.zeros((len(framework), seq_length, 3, 4)) for j, sample in enumerate(tqdm(framework)): imgs = sample['imgs'] h,w,_ = imgs[0].shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs] imgs = [np.transpose(img, (2,0,1)) for img in imgs] ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img/255 - 0.5)/0.5).to(device) if i == len(imgs)//2: tgt_img = img else: ref_imgs.append(img) exp_mask, poses = pose_net(tgt_img, ref_imgs) # print('This is the maskp') print(exp_mask.data.size()) args.output_disp = True if args.output_disp: # disp_ = exp_mask.data[:,2,:,:].reshape(1,128,416) # print(disp_) # disp = (255*tensor2array(disp_, max_value=10, colormap='bone')).astype(np.uint8) max_value = exp_mask.data.max().item() array_1 = exp_mask.data[:,0,:,:].squeeze().numpy() print(array_1) array_1 = (255*array_1).astype(np.uint8) print(array_1) print(np.min(array_1)) imsave(output_dir/'{}_disp{}'.format(j, '.png'), array_1)
def main(): output_file = Path(args.output_file) tracker_file = Path(args.tracker_file) pretrained_weights = Path(args.pretrained_weights) dataset_dir = Path(args.dataset_dir) # Load pretrained model weights = torch.load(pretrained_weights, map_location=device) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) # Set Kitti framework for sequence number 09 with 5-snippet samples. from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework framework = test_framework(dataset_dir, ['09'], 5) attacker = Attacker(framework, pose_net) noise_mask = np.load(tracker_file) pertubation = attacker.generate(noise_mask, 691, 731) np.save(output_file, pertubation) print('Attacked!')
def main(): args = parser.parse_args() if args.gt_type == 'KITTI': from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework elif args.gt_type == 'stillbox': from stillbox_eval.depth_evaluation_utils import test_framework_stillbox as test_framework disp_net = DispNetS().to(device) weights = torch.load(args.pretrained_dispnet) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() if args.pretrained_posenet is None: print('no PoseNet specified, scale_factor will be determined by median ratio, which is kiiinda cheating\ (but consistent with original paper)') seq_length = 0 else: weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = list(f.read().splitlines()) else: test_files = [file.relpathto(dataset_dir) for file in sum([dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], [])] framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth) print('{} files to test'.format(len(test_files))) errors = np.zeros((2, 7, len(test_files)), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() for j, sample in enumerate(tqdm(framework)): tgt_img = sample['tgt'] ref_imgs = sample['ref'] h,w,_ = tgt_img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): tgt_img = imresize(tgt_img, (args.img_height, args.img_width)).astype(np.float32) ref_imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in ref_imgs] tgt_img = np.transpose(tgt_img, (2, 0, 1)) ref_imgs = [np.transpose(img, (2,0,1)) for img in ref_imgs] tgt_img = torch.from_numpy(tgt_img).unsqueeze(0) tgt_img = ((tgt_img/255 - 0.5)/0.5).to(device) for i, img in enumerate(ref_imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img/255 - 0.5)/0.5).to(device) ref_imgs[i] = img pred_disp = disp_net(tgt_img).cpu().numpy()[0,0] if args.output_dir is not None: if j == 0: predictions = np.zeros((len(test_files), *pred_disp.shape)) predictions[j] = 1/pred_disp gt_depth = sample['gt_depth'] pred_depth = 1/pred_disp pred_depth_zoomed = zoom(pred_depth, (gt_depth.shape[0]/pred_depth.shape[0], gt_depth.shape[1]/pred_depth.shape[1]) ).clip(args.min_depth, args.max_depth) if sample['mask'] is not None: pred_depth_zoomed = pred_depth_zoomed[sample['mask']] gt_depth = gt_depth[sample['mask']] if seq_length > 0: # Reorganize ref_imgs : tgt is middle frame but not necessarily the one used in DispNetS # (in case sample to test was in end or beginning of the image sequence) middle_index = seq_length//2 tgt = ref_imgs[middle_index] reorganized_refs = ref_imgs[:middle_index] + ref_imgs[middle_index + 1:] _, poses = pose_net(tgt, reorganized_refs) mean_displacement_magnitude = poses[0,:,:3].norm(2,1).mean().item() scale_factor = sample['displacement'] / mean_displacement_magnitude errors[0,:,j] = compute_errors(gt_depth, pred_depth_zoomed*scale_factor) # scale_factor = np.median(gt_depth)/np.median(pred_depth_zoomed) scale_factor=1 errors[1,:,j] = compute_errors(gt_depth, pred_depth_zoomed*scale_factor) mean_errors = errors.mean(2) error_names = ['abs_rel','sq_rel','rms','log_rms','a1','a2','a3'] if args.pretrained_posenet: print("Results with scale factor determined by PoseNet : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names)) print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[0])) print("Results with scale factor determined by GT/prediction ratio (like the original paper) : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format(*error_names)) print("{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}".format(*mean_errors[1])) if args.output_dir is not None: np.save(output_dir/'predictions.npy', predictions)
def main(): global tgt_img, disp_net args = parser.parse_args() '''加载训练后的模型''' weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) # 网络模型的MD5 ID net_ID = MD5_ID(args.pretrained_posenet) # L和C的转换矩阵,对齐输入位姿到雷达坐标系 Transform_matrix_L2C = np.identity(4) '''Kitti switch''' if args.isKitti: if not args.isDynamic: from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework else: from kitti_eval.pose_evaluation_utils_forDynamicTest import test_framework_KITTI as test_framework save_dir = os.path.join(args.output_dir, "kitti", args.sequences[0], 'net_' + net_ID) if args.trainedOnMydataset: downsample_img_height = args.img_height downsample_img_width = args.img_width else: # on kitti train set downsample_img_height = 128 downsample_img_width = 416 Transform_matrix_L2C[:3, :3] = np.array( [[7.533745e-03, -9.999714e-01, -6.166020e-04], [1.480249e-02, 7.280733e-04, -9.998902e-01], [9.998621e-01, 7.523790e-03, 1.480755e-02]]) Transform_matrix_L2C[:3, -1:] = np.array( [-4.069766e-03, -7.631618e-02, -2.717806e-01]).reshape(3, 1) else: from mydataset_eval.pose_evaluation_utils import test_framework_MYDATASET as test_framework save_dir = os.path.join(args.output_dir, "mydataset", args.sequences[0], 'net_' + net_ID) if args.trainedOnMydataset: downsample_img_height = args.img_height downsample_img_width = args.img_width else: # on kitti train set downsample_img_height = 128 downsample_img_width = 416 Transform_matrix_L2C[:3, :3] = np.array( [[-1.51482698e-02, -9.99886648e-01, 5.36310553e-03], [-4.65337018e-03, -5.36307196e-03, -9.99969412e-01], [9.99870070e-01, -1.56647995e-02, -4.48880010e-03]]) Transform_matrix_L2C[:3, -1:] = np.array( [4.29029924e-03, -6.08539196e-02, -9.20346161e-02]).reshape(3, 1) Transform_matrix_L2C = GramSchmidtHelper(Transform_matrix_L2C) Transform_matrix_C2L = np.linalg.inv(Transform_matrix_L2C) # *************************可删除********************************* # 为了进行动态场景下的Mask评估,这里需要引入disp net if args.isDynamic: from models import DispNetS disp_net = DispNetS().to(device) weights = torch.load(args.pretrained_dispnet) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() # normalize = custom_transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]) # valid_transform = custom_transforms.Compose([custom_transforms.ArrayToTensor(), normalize]) # from datasets.sequence_folders import SequenceFolder # val_set = SequenceFolder( # '/home/sda/mydataset/preprocessing/formatted/data/', # transform=valid_transform, # seed=0, # train=False, # sequence_length=3, # ) # val_loader = torch.utils.data.DataLoader( # val_set, batch_size=1, shuffle=False, # num_workers=4, pin_memory=True) # # intrinsics = None # for i, (tgt_img, ref_imgs, intrinsics, intrinsics_inv) in enumerate(val_loader): # intrinsics = intrinsics.to(device) # break # ************************************************************************* '''载入测试数据集''' dataset_dir = Path(args.dataset_dir) framework = test_framework(dataset_dir, args.sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) '''输出到文件夹中的数据''' num_poses = len(framework) - (seq_length - 2) predictions_array = np.zeros((len(framework), seq_length, 3, 4)) processing_time = np.zeros((num_poses - 1, 1)) # 输出文件夹 save_dir = Path(save_dir) print('Output files wiil be saved in: ' + save_dir) if not os.path.exists(save_dir): save_dir.makedirs_p() # Pose Graph Manager (for back-end optimization) initialization PGM = PoseGraphManager() PGM.addPriorFactor() # Result saver num_frames = len(framework) ResultSaver = PoseGraphResultSaver(init_pose=PGM.curr_se3, save_gap=args.save_gap, num_frames=num_frames, seq_idx=args.sequences[0], save_dir=save_dir) # for save the results as a video fig_idx = 1 fig = plt.figure(fig_idx) writer = FFMpegWriter(fps=15) video_path = save_dir + '/' + args.sequences[0] + ".mp4" num_frames_to_skip_to_show = 5 num_frames_to_save = np.floor(num_frames / num_frames_to_skip_to_show) with writer.saving( fig, video_path, num_frames_to_save): # this video saving part is optional for j, sample in enumerate(tqdm(framework)): ''' VO部分 ''' imgs = sample['imgs'] w, h = imgs[0].size if (not args.no_resize) and (h != downsample_img_height or w != downsample_img_width): imgs = [ imresize(img, (downsample_img_height, downsample_img_width)).astype(np.float32) for img in imgs ] imgs = [np.transpose(img, (2, 0, 1)) for img in imgs] ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) if i == len(imgs) // 2: tgt_img = img else: ref_imgs.append(img) startTimeVO = time.time() _, poses = pose_net(tgt_img, ref_imgs) processing_time[j] = (time.time() - startTimeVO) / (seq_length - 1) # **************************可删除******************************** if args.isDynamic: '''测试Photo mask的效果''' if args.isKitti: intrinsics = [[ 2.416744631239935472e+02, 0.000000000000000000e+00, 2.041680103059581199e+02 ], [ 0.000000000000000000e+00, 2.462848682666666491e+02, 5.900083200000000261e+01 ], [ 0.000000000000000000e+00, 0.000000000000000000e+00, 1.000000000000000000e+00 ]] else: intrinsics = [[279.1911, 0.0000, 210.8265], [0.0000, 279.3980, 172.3114], [0.0000, 0.0000, 1.0000]] PhotoMask_Output(_, disp_net, intrinsics, j, poses, ref_imgs, save_dir) # *************************************************************** final_poses = pose2tf_mat(args.rotation_mode, imgs, poses) predictions_array[j] = final_poses # rel_VO_pose取final poses的第2项,整体则是取T10,T21,T32。。。 rel_VO_pose = np.identity(4) rel_VO_pose[:3, :] = final_poses[1] # 引入尺度因子对单目VO输出的位姿进行修正,并进行坐标系对齐到雷达坐标系 scale_factor = 7 rel_VO_pose[:3, -1:] = rel_VO_pose[:3, -1:] * scale_factor rel_VO_pose = Transform_matrix_C2L @ rel_VO_pose @ np.linalg.inv( Transform_matrix_C2L) rel_VO_pose = GramSchmidtHelper(rel_VO_pose) ResultSaver.saveRelativePose(rel_VO_pose) PGM.curr_node_idx = j + 1 PGM.curr_se3 = np.matmul(PGM.curr_se3, rel_VO_pose) PGM.addOdometryFactor(rel_VO_pose) PGM.prev_node_idx = PGM.curr_node_idx ResultSaver.saveUnoptimizedPoseGraphResult(PGM.curr_se3, PGM.curr_node_idx) # if (j % num_frames_to_skip_to_show == 0): # ResultSaver.vizCurrentTrajectory(fig_idx=fig_idx) # writer.grab_frame() if args.isKitti: ATE, RE = compute_pose_error(sample['poses'], final_poses) errors[j] = ATE, RE '''save output files''' if save_dir is not None: # np.save(save_dir / 'predictions.npy', predictions_array) ResultSaver.saveFinalPoseGraphResult(filename='abs_VO_poses.txt') ResultSaver.saveRelativePosesResult(filename='rel_VO_poses.txt') np.savetxt(save_dir / 'processing_time.txt', processing_time) if args.isKitti: np.savetxt(save_dir / 'errors.txt', errors) mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE', 'RE'] print('') print("Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors))
def main(): from utils.UtilsMyData import test_framework_MyData as test_framework weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) sequences = [args.sequence_idx] framework = test_framework(dataset_dir, sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) optimized_errors = np.zeros((len(framework), 2), np.float32) ICP_iterations = np.zeros(len(framework)) VO_processing_time = np.zeros(len(framework)) ICP_iteration_time = np.zeros(len(framework)) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() predictions_array = np.zeros((len(framework), seq_length, 3, 4)) '''绝对位姿列表初始化''' # 对齐到雷达坐标系,VO模型输出的带有尺度的绝对位姿 abs_VO_poses = np.zeros((len(framework), 12)) # 对齐到雷达坐标系,LO模型输出的带有尺度的绝对位姿 abs_LO_poses = np.zeros((len(framework), 12)) # 位姿估计值,对齐到相机坐标系下,和真值直接比较(仅适用于有相机坐标系下的真值) est_poses = np.zeros((len(framework), 12)) est_poses[0] = np.identity(4)[:3, :].reshape(-1, 12)[0] '''帧间位姿列表初始化''' # 对齐到相机坐标系,VO模型输出的带有尺度的帧间位姿 cur_VO_poses_C = np.zeros((len(framework), 12)) # 对齐到雷达坐标系,VO模型输出的带有尺度的帧间位姿 cur_VO_poses = np.zeros((len(framework), 6)) # 对齐到雷达坐标系,VO模型输出的带有尺度的帧间位姿 cur_LO_poses = np.zeros((len(framework), 12)) '''尺度因子''' scale_factors = np.zeros((len(framework), 1)) abs_VO_pose = np.identity(4)[:3, :] '''循环周期所需变量初始化''' # 用来估计尺度因子 last_pose = np.identity(4) last_gap2_pose = np.identity(4) last_VO_pose = np.identity(4) # 周期中涉及到的点云,当前,前一个,前前一个 curr_pts = None last_pts = None sec_last_pts = None # 当前节点和前一个,前前一个节点的帧间预估 gap1_VO_pose = np.identity(4) gap2_VO_pose = np.identity(4) last_gap2_VO_pose = np.identity(4) # L和C的转换矩阵,对齐输入位姿到雷达坐标系 Transform_matrix_L2C = np.identity(4) Transform_matrix_L2C[:3, :3] = np.array([[-1.51482698e-02, -9.99886648e-01, 5.36310553e-03], [-4.65337018e-03, -5.36307196e-03, -9.99969412e-01], [9.99870070e-01, -1.56647995e-02, -4.48880010e-03]]) Transform_matrix_L2C[:3, -1:] = np.array([4.29029924e-03, -6.08539196e-02, -9.20346161e-02]).reshape(3, 1) Transform_matrix_L2C = GramSchmidtHelper(Transform_matrix_L2C) Transform_matrix_C2L = np.linalg.inv(Transform_matrix_L2C) pointClouds = loadPointCloud(args.dataset_dir + "/sequences/" + args.sequence_idx + "/velodyne") '''Pose Graph Manager (for back-end optimization) initialization''' PGM = PoseGraphManager() PGM.addPriorFactor() num_frames = len(tqdm(framework)) save_dir = "result/" + args.sequence_idx if not os.path.exists(save_dir): os.makedirs(save_dir) ResultSaver = PoseGraphResultSaver(init_pose=PGM.curr_se3, save_gap=args.save_gap, num_frames=num_frames, seq_idx=args.sequence_idx, save_dir=save_dir) '''Scan Context Manager (for loop detection) initialization''' SCM = ScanContextManager(shape=[args.num_rings, args.num_sectors], num_candidates=args.num_candidates, threshold=args.loop_threshold) '''Mapping initialzation''' if args.mapping is True: Map = MappingManager() # for save the result as a video fig_idx = 1 fig = plt.figure(fig_idx) writer = FFMpegWriter(fps=15) video_name = args.sequence_idx + "_" + str(args.num_icp_points) + "_prop@" + str( args.proposal) + "_tolerance@" + str( args.tolerance) + "_scm@" + str(args.scm_type) + "_thresh@" + str(args.loop_threshold) + ".mp4" num_frames_to_skip_to_show = 5 num_frames_to_save = np.floor(num_frames / num_frames_to_skip_to_show) with writer.saving(fig, video_name, num_frames_to_save): # this video saving part is optional for j, sample in enumerate(tqdm(framework)): ''' ***************************************VO部分******************************************* ''' imgs = sample['imgs'] w, h = imgs[0].size if (not args.no_resize) and (h != args.img_height or w != args.img_width): imgs = [(np.array(img.resize((args.img_width, args.img_height)))).astype(np.float32) for img in imgs] imgs = [np.transpose(img, (2, 0, 1)) for img in imgs] ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) if i == len(imgs) // 2: tgt_img = img else: ref_imgs.append(img) startTimeVO = time.time() _, poses = pose_net(tgt_img, ref_imgs) VO_processing_time[j] = time.time() - startTimeVO poses = poses.cpu()[0] poses = torch.cat([poses[:len(imgs) // 2], torch.zeros(1, 6).float(), poses[len(imgs) // 2:]]) inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64) rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3]) tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:] transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1) # print('**********DeepVO result: time_cost {:.3} s'.format(timeCostVO / (len(imgs) - 1))) # 将对[0 1 2]中间1的转换矩阵变成对0的位姿转换 first_inv_transform = inv_transform_matrices[0] final_poses = first_inv_transform[:, :3] @ transform_matrices final_poses[:, :, -1:] += first_inv_transform[:, -1:] # gap2_VO_pose取final poses的第3项(0-2) gap2_VO_pose[:3, :] = final_poses[2] # cur_VO_pose取final poses的第2项(0-1) gap1_VO_pose[:3, :] = final_poses[1] # 尺度因子的确定:采用上一帧的LO输出位姿和VO输出位姿的尺度比值作为当前帧的尺度因子,初始尺度为1 if j == 0: scale_factor = 7 else: scale_factor = math.sqrt(np.sum(last_pose[:3, -1] ** 2) / np.sum(last_VO_pose[:3, -1] ** 2)) scale_factors[j] = scale_factor last_VO_pose = copy.deepcopy(gap1_VO_pose) # 注意深拷贝 # 先尺度修正,再对齐坐标系,施密特正交化避免病态矩阵 gap2_VO_pose[:3, -1:] = gap2_VO_pose[:3, -1:] * scale_factor gap2_VO_pose = Transform_matrix_C2L @ gap2_VO_pose @ np.linalg.inv(Transform_matrix_C2L) gap2_VO_pose = GramSchmidtHelper(gap2_VO_pose) gap1_VO_pose[:3, -1:] = gap1_VO_pose[:3, -1:] * scale_factor cur_VO_poses_C[j] = gap1_VO_pose[:3, :].reshape(-1, 12)[0] gap1_VO_pose = Transform_matrix_C2L @ gap1_VO_pose @ np.linalg.inv(Transform_matrix_C2L) gap1_VO_pose = GramSchmidtHelper(gap1_VO_pose) '''*************************LO部分******************************************''' # 初始化 if j == 0: last_pts = random_sampling(pointClouds[j], args.num_icp_points) sec_last_pts = last_pts SCM.addNode(j, last_pts) if args.mapping is True: Map.updateMap(curr_se3=PGM.curr_se3,curr_local_ptcloud=last_pts) curr_pts = random_sampling(pointClouds[j + 1], args.num_icp_points) from modules.ICP import icp # 选择LO的初值预估,分别是无预估,上一帧位姿,VO位姿 if args.proposal == 0: init_pose_1 = None init_pose_2 = None elif args.proposal == 1: init_pose_1 = last_pose init_pose_2 = last_gap2_pose elif args.proposal == 2: init_pose_1 = gap1_VO_pose init_pose_2 = last_gap2_VO_pose print("init_pose_1 ") print(init_pose_1) print("init_pose_2 ") print(init_pose_2) startTime = time.time() icp_odom_transform_1, distacnces, iterations = icp(curr_pts, last_pts, init_pose=init_pose_1, tolerance=args.tolerance, max_iterations=50) if j > 0: icp_odom_transform_2, distacnces_2, iterations_2 = icp(last_pts, sec_last_pts, init_pose=init_pose_2, tolerance=args.tolerance, max_iterations=50) else: icp_odom_transform_2 = icp_odom_transform_1 distacnces_2 = distacnces iterations_2 = iterations ICP_iteration_time[j] = time.time() - startTime ICP_iterations[j] = (iterations + iterations_2) / 2 print("last_pose ") print(last_pose) '''更新指针''' # 点云 sec_last_pts = last_pts last_pts = curr_pts # gap2预估位姿 last_gap2_VO_pose = copy.deepcopy(gap2_VO_pose) # icp 输出位姿 last_pose = icp_odom_transform_1 last_gap2_pose = icp_odom_transform_2 print("LO优化后的位姿,mean_dis: ", np.asarray(distacnces).mean()) print(icp_odom_transform_1) print("icp_odom_transform_2") print(icp_odom_transform_2) '''Update loop detection nodes''' SCM.addNode(j + 1, curr_pts) '''Update the edges and nodes of pose graph''' PGM.curr_node_idx = j + 1 PGM.curr_se3 = np.matmul(PGM.curr_se3, icp_odom_transform_1) PGM.addOdometryFactor(icp_odom_transform_1, -1) PGM.addOdometryFactor(icp_odom_transform_2, -2) PGM.sec_prev_node_idx = PGM.prev_node_idx PGM.prev_node_idx = PGM.curr_node_idx # 建图更新 if args.mapping is True: Map.updateMap(curr_se3=PGM.curr_se3,curr_local_ptcloud=curr_pts) # loop detection and optimize the graph if (PGM.curr_node_idx > 1 and PGM.curr_node_idx % args.try_gap_loop_detection == 0): # 1/ loop detection loop_idx, loop_dist, yaw_diff_deg = SCM.detectLoop() if (loop_idx == None): # NOT FOUND pass else: print("Loop event detected: ", PGM.curr_node_idx, loop_idx, loop_dist) # 2-1/ add the loop factor loop_scan_down_pts = SCM.getPtcloud(loop_idx) loop_transform, _, _ = icp(curr_pts, loop_scan_down_pts, init_pose=yawdeg2se3(yaw_diff_deg), max_iterations=20) PGM.addLoopFactor(loop_transform, loop_idx) # 2-2/ graph optimization PGM.optimizePoseGraph() # 2-2/ save optimized poses ResultSaver.saveOptimizedPoseGraphResult(PGM.curr_node_idx, PGM.graph_optimized) # 2-3/ updateMap if args.mapping is True: Map.optimizeGlobalMap(PGM.graph_optimized, PGM.curr_node_idx) # 定时进行位姿图优化 if (PGM.curr_node_idx > 1 and PGM.curr_node_idx % args.optimization_period == 0): # 2-2/ graph optimization PGM.optimizePoseGraph() # 2-2/ save optimized poses ResultSaver.saveOptimizedPoseGraphResult(PGM.curr_node_idx, PGM.graph_optimized) # 2-3/ updateMap if args.mapping is True: Map.optimizeGlobalMap(PGM.graph_optimized, PGM.curr_node_idx) # save the ICP odometry pose result (no loop closure) ResultSaver.saveUnoptimizedPoseGraphResult(PGM.curr_se3, PGM.curr_node_idx) if (j % num_frames_to_skip_to_show == 0): ResultSaver.vizCurrentTrajectory(fig_idx=fig_idx) writer.grab_frame() if args.vizmapping is True: Map.vizMapWithOpen3D() # 对齐到雷达坐标系下,VO输出的绝对位姿 abs_VO_pose[:, :3] = gap1_VO_pose[:3, :3] @ abs_VO_pose[:, :3] abs_VO_pose[:, -1:] += gap1_VO_pose[:3, -1:] if args.output_dir is not None: predictions_array[j] = final_poses # cur_VO_poses[j]=cur_VO_pose[:3, :].reshape(-1, 12)[0] cur_LO_poses[j] = icp_odom_transform_1[:3, :].reshape(-1, 12)[0] abs_VO_poses[j] = abs_VO_pose[:3, :].reshape(-1, 12)[0] abs_LO_poses[j] = PGM.curr_se3[:3, :].reshape(-1, 12)[0] est_pose = Transform_matrix_L2C @ PGM.curr_se3 @ np.linalg.inv(Transform_matrix_L2C) est_poses[j + 1] = est_pose[:3, :].reshape(-1, 12)[0] if args.mapping is True: Map.saveMap2File('map_' + args.sequence_idx + "_" + str(args.num_icp_points) + "_prop@" + str( args.proposal) + "_tolerance@" + str(args.tolerance) + "_scm@" + str(args.scm_type) + "_thresh@" + str(args.loop_threshold) + '.pcd') if args.output_dir is not None: # np.save(output_dir / 'predictions.npy', predictions_array) np.savetxt(output_dir / 'scale_factors.txt', scale_factors) np.savetxt(output_dir / 'cur_VO_poses_C.txt', cur_VO_poses_C) np.savetxt(output_dir / 'cur_VO_poses.txt', cur_VO_poses) np.savetxt(output_dir / 'abs_VO_poses.txt', abs_VO_poses) np.savetxt(output_dir / 'cur_LO_poses.txt', cur_LO_poses) np.savetxt(output_dir / 'abs_LO_poses.txt', abs_LO_poses) np.savetxt(output_dir / 'iterations.txt', ICP_iterations) np.savetxt(output_dir / 'est_kitti_{0}_poses.txt'.format(args.sequence_idx), est_poses) # VO输出位姿的精度指标 mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE', 'RE'] print('') print("VO_Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors)) # LO二次优化后的精度指标 optimized_mean_errors = optimized_errors.mean(0) optimized_std_errors = optimized_errors.std(0) optimized_error_names = ['optimized_ATE', 'optimized_RE'] print('') print("LO_optimized_Results") print("\t {:>10}, {:>10}".format(*optimized_error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*optimized_mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*optimized_std_errors)) # 迭代次数 mean_iterations = ICP_iterations.mean() std_iterations = ICP_iterations.std() _names = ['iteration'] print('') print("LO迭代次数") print("\t {:>10}".format(*_names)) print("mean \t {:10.4f}".format(mean_iterations)) print("std \t {:10.4f}".format(std_iterations)) # 迭代时间 mean_iter_time = ICP_iteration_time.mean() std_iter_time = ICP_iteration_time.std() _names = ['iter_time'] print('') print("LO迭代时间:单位/s") print("\t {:>10}".format(*_names)) print("mean \t {:10.4f}".format(mean_iter_time)) print("std \t {:10.4f}".format(std_iter_time))
def main(): args = parser.parse_args() from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) framework = test_framework(dataset_dir, args.sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() predictions_array = np.zeros((len(framework), seq_length, 3, 4)) for j, sample in enumerate(tqdm(framework)): imgs = sample['imgs'] h, w, _ = imgs[0].shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): imgs = [ imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs ] imgs = [np.transpose(img, (2, 0, 1)) for img in imgs] ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) if i == len(imgs) // 2: tgt_img = img else: ref_imgs.append(img) _, poses = pose_net(tgt_img, ref_imgs) poses = poses.cpu()[0] poses = torch.cat([ poses[:len(imgs) // 2], torch.zeros(1, 6).float(), poses[len(imgs) // 2:] ]) inv_transform_matrices = pose_vec2mat( poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64) rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3]) tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:] transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1) first_inv_transform = inv_transform_matrices[0] final_poses = first_inv_transform[:, :3] @ transform_matrices final_poses[:, :, -1:] += first_inv_transform[:, -1:] if args.output_dir is not None: predictions_array[j] = final_poses ATE, RE = compute_pose_error(sample['poses'], final_poses) errors[j] = ATE, RE mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE', 'RE'] print('') print("Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors)) if args.output_dir is not None: np.save(output_dir / 'predictions.npy', predictions_array)
def main(): global tgt_pc, tgt_img args = parser.parse_args() from kitti_eval.VOLO_data_utils import test_framework_KITTI as test_framework weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) framework = test_framework(dataset_dir, args.sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) optimized_errors = np.zeros((len(framework), 2), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() predictions_array = np.zeros((len(framework), seq_length, 3, 4)) f=open('//') for j, sample in enumerate(tqdm(framework)): ''' VO部分 并计算和真值的差值 ''' imgs = sample['imgs'] h,w,_ = imgs[0].shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs] imgs = [np.transpose(img, (2,0,1)) for img in imgs] ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img/255 - 0.5)/0.5).to(device) if i == len(imgs)//2: tgt_img = img else: ref_imgs.append(img) timeCostVO=0 startTimeVO=time.time() _, poses = pose_net(tgt_img, ref_imgs) timeCostVO=time.time()-startTimeVO poses = poses.cpu()[0] poses = torch.cat([poses[:len(imgs)//2], torch.zeros(1,6).float(), poses[len(imgs)//2:]]) inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64) rot_matrices = np.linalg.inv(inv_transform_matrices[:,:,:3]) tr_vectors = -rot_matrices @ inv_transform_matrices[:,:,-1:] transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1) print('**********DeepVO result: time_cost {:.3} s'.format(timeCostVO/(len(imgs)-1))) #print(transform_matrices) first_inv_transform = inv_transform_matrices[0] final_poses = first_inv_transform[:,:3] @ transform_matrices final_poses[:,:,-1:] += first_inv_transform[:,-1:] # print('first') # print(first_inv_transform) print('poses') print(final_poses) if args.output_dir is not None: predictions_array[j] = final_poses ATE, RE = compute_pose_error(sample['poses'], final_poses) errors[j] = ATE, RE ''' LO部分 以VO的结果作为预估,并计算和真值的差值 ''' pointclouds=sample['pointclouds'] from VOLO import LO #pointcluds是可以直接处理的 for i, pc in enumerate(pointclouds): if i == len(pointclouds)//2: tgt_pc =pointclouds[i] optimized_transform_matrices=[] timeCostLO=0 startTimeLO=time.time() totalIterations=0 for i,pc in enumerate(pointclouds): pose_proposal=np.identity(4) pose_proposal[:3,:]=transform_matrices[i] print('======pose proposal for LO=====') print(pose_proposal) T,distacnces,iterations=LO(pc,tgt_pc,init_pose=pose_proposal, max_iterations=50, tolerance=0.001,LO='icp') optimized_transform_matrices.append(T) totalIterations+=iterations print('iterations:\n') print(iterations) timeCostLO=time.time()-startTimeLO optimized_transform_matrices=np.asarray(optimized_transform_matrices) print('*****LO result: time_cost {:.3} s'.format(timeCostLO/(len(pointclouds)-1))+' average iterations: {}' .format(totalIterations/(len(pointclouds)-1))) # print(optimized_transform_matrices) #TODO 打通VO-LO pipeline: 需要将转换矩阵格式对齐; 评估VO的预估对LO的增益:效率上和精度上; 评估过程可视化 #TODO 利用数据集有对应的图像,点云和位姿真值的数据集(Kitti的odomerty) inv_optimized_rot_matrices = np.linalg.inv(optimized_transform_matrices[:,:3,:3]) inv_optimized_tr_vectors = -inv_optimized_rot_matrices @ optimized_transform_matrices[:,:3,-1:] inv_optimized_transform_matrices = np.concatenate([inv_optimized_rot_matrices, inv_optimized_tr_vectors], axis=-1) first_inv_optimized_transform = inv_optimized_transform_matrices[0] final_optimized_poses = first_inv_optimized_transform[:,:3] @ optimized_transform_matrices[:,:3,:] final_optimized_poses[:,:,-1:] += first_inv_optimized_transform[:,-1:] # print('first') # print(first_inv_optimized_transform) print('poses') print(final_optimized_poses) if args.output_dir is not None: predictions_array[j] = final_poses optimized_ATE, optimized_RE = compute_pose_error(sample['poses'], final_optimized_poses) optimized_errors[j] = optimized_ATE, optimized_RE print('==============\n===============\n') mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE','RE'] print('') print("Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors)) optimized_mean_errors = optimized_errors.mean(0) optimized_std_errors = optimized_errors.std(0) optimized_error_names = ['optimized_ATE','optimized_RE'] print('') print("optimized_Results") print("\t {:>10}, {:>10}".format(*optimized_error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*optimized_mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*optimized_std_errors)) if args.output_dir is not None: np.save(output_dir/'predictions.npy', predictions_array)
def main(): args = parser.parse_args() if args.gt_type == 'KITTI': from kitti_eval.depth_evaluation_utils import test_framework_KITTI as test_framework disp_net = DispNetS().cuda() weights = torch.load(args.pretrained_dispnet) disp_net.load_state_dict(weights['state_dict']) disp_net.eval() if args.pretrained_posenet is None: print( 'no PoseNet specified, scale_factor will be determined by median ratio, which is kiiinda cheating\ (but consistent with original paper)') seq_length = 0 else: weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).cuda() pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) if args.dataset_list is not None: with open(args.dataset_list, 'r') as f: test_files = list(f.read().splitlines()) else: test_files = [ file.relpathto(dataset_dir) for file in sum([ dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts ], []) ] framework = test_framework(dataset_dir, test_files, seq_length, args.min_depth, args.max_depth) output_dir = Path(args.output_dir) output_dir.makedirs_p() print('{} files to test'.format(len(test_files))) errors = np.zeros((2, 7, len(test_files)), np.float32) for j, sample in enumerate(tqdm(framework)): tgt_img = sample['tgt'] ref_imgs = sample['ref'] h, w, _ = tgt_img.shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): tgt_img = imresize( tgt_img, (args.img_height, args.img_width)).astype(np.float32) ref_imgs = [ imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in ref_imgs ] tgt_img = np.transpose(tgt_img, (2, 0, 1)) ref_imgs = [np.transpose(img, (2, 0, 1)) for img in ref_imgs] tgt_img = torch.from_numpy(tgt_img).unsqueeze(0) tgt_img = ((tgt_img / 255 - 0.5) / 0.2).cuda() tgt_img_var = Variable(tgt_img, volatile=True) ref_imgs_var = [] for i, img in enumerate(ref_imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.2).cuda() ref_imgs_var.append(Variable(img, volatile=True)) pred_disp = disp_net(tgt_img_var).data.cpu().numpy()[0, 0] gt_depth = sample['gt_depth'] pred_depth = 1 / pred_disp pred_depth_zoomed = zoom( pred_depth, (gt_depth.shape[0] / pred_depth.shape[0], gt_depth.shape[1] / pred_depth.shape[1])).clip(args.min_depth, args.max_depth) if sample['mask'] is not None: pred_depth_zoomed = pred_depth_zoomed[sample['mask']] gt_depth = gt_depth[sample['mask']] if seq_length > 0: _, poses = pose_net(tgt_img_var, ref_imgs_var) displacements = poses[0, :, :3].norm( 2, 1).cpu().data.numpy() # shape [1 - seq_length] scale_factors = (sample['displacements'] / displacements)[sample['displacements'] > 0] scale_factors = [ s1 / s2 for s1, s2 in zip(sample['displacements'], displacements) if s1 > 0 ] scale_factor = np.mean( scale_factors) if len(scale_factors) > 0 else 0 if len(scale_factors) == 0: print('not good ! ', sample['path'], sample['displacements']) errors[0, :, j] = compute_errors(gt_depth, pred_depth_zoomed * scale_factor) scale_factor = np.median(gt_depth) / np.median(pred_depth_zoomed) errors[1, :, j] = compute_errors(gt_depth, pred_depth_zoomed * scale_factor) mean_errors = errors.mean(2) error_names = ['abs_rel', 'sq_rel', 'rms', 'log_rms', 'a1', 'a2', 'a3'] if args.pretrained_posenet: print("Results with scale factor determined by PoseNet : ") print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( *error_names)) print( "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}" .format(*mean_errors[0])) print( "Results with scale factor determined by GT/prediction ratio (like the original paper) : " ) print("{:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}, {:>10}".format( *error_names)) print( "{:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}, {:10.4f}". format(*mean_errors[1]))
def main(): args = parser.parse_args() from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework #net init weights = torch.load(args.pretrained_posenet)#权重参数载入,return orderedDict seq_length = int(weights['state_dict']['conv1.0.weight'].size(1)/3) #conv1.0.weight .shape = (16,15,7,7),这里注意哈, 网络结构定义的是 #in_planse = 15, out_plans = 16, kernel_size =7,7 #但是这里dict存储的时候就是反的???与conv定义前两个是颠倒的!!! #seq_lenth ==5由于模型如此 pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False)#载入模型参数 dataset_dir = Path(args.dataset_dir) framework = test_framework(dataset_dir, args.sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() predictions_array = np.zeros((len(framework), seq_length, 3, 4)) # main cycle for j, sample in enumerate(tqdm(framework)) :#j from 0~1591#tqdm(obj)调用__iter__ if j>100: break; imgs = sample['imgs']#[375,1242,3] h,w,_ = imgs[0].shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs] #[128,416,3] imgs = [np.transpose(img, (2,0,1)) for img in imgs]#[3,128,416],201 通道提前 ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img/255 - 0.5)/0.5).to(device) if i == len(imgs)//2: tgt_img = img else: ref_imgs.append(img) #pose predict #tgt_img size [1,3,h,w] #ref :list of [1,3,h,w], lenth 5 _, poses = pose_net(tgt_img, ref_imgs)#return exp_mask,pose,# # 这里的1是因为在训练的时候需要batch_size输入,但其他时候不需要 #pose tensorsize =( 1,num_ref_imgs(4),6) poses = poses.cpu()[0]#(4,6) poses = torch.cat([poses[:len(imgs)//2], torch.zeros(1,6).float(), poses[len(imgs)//2:]])#中间插入全0, 代表关键帧 #相对于自己自运动为0,[5,6] inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64) #shape = 5,3,4 rot_matrices = np.linalg.inv(inv_transform_matrices[:,:,:3]) tr_vectors = -rot_matrices @ inv_transform_matrices[:,:,-1:] transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1) first_inv_transform = inv_transform_matrices[0] final_poses = first_inv_transform[:,:3] @ transform_matrices final_poses[:,:,-1:] += first_inv_transform[:,-1:]#5,3,4 if args.output_dir is not None:#forwad pass 结果记录一下,留着输出 predictions_array[j] = final_poses ATE, RE = compute_pose_error(sample['poses'], final_poses) errors[j] = ATE, RE mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE','RE'] print('') print("Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors)) if args.output_dir is not None: np.save(output_dir/'predictions.npy', predictions_array)
def main(): '''加载训练后的模型''' weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) # 网络模型的MD5 ID net_ID = MD5_ID(args.pretrained_posenet) # L和C的转换矩阵,对齐输入位姿到雷达坐标系 Transform_matrix_L2C = np.identity(4) '''Kitti switch''' if args.isKitti: from utils.UtilsKitti import test_framework_KITTI as test_framework save_dir = os.path.join(args.output_dir, "kitti", args.sequence_idx, 'net_' + net_ID) downsample_img_height = 128 downsample_img_width = 416 Transform_matrix_L2C[:3, :3] = np.array( [[7.533745e-03, -9.999714e-01, -6.166020e-04], [1.480249e-02, 7.280733e-04, -9.998902e-01], [9.998621e-01, 7.523790e-03, 1.480755e-02]]) Transform_matrix_L2C[:3, -1:] = np.array( [-4.069766e-03, -7.631618e-02, -2.717806e-01]).reshape(3, 1) else: from utils.UtilsMyData import test_framework_MyData as test_framework save_dir = os.path.join(args.output_dir, "mydataset", args.sequence_idx, 'net_' + net_ID) downsample_img_height = args.img_height downsample_img_width = args.img_width Transform_matrix_L2C[:3, :3] = np.array( [[-1.51482698e-02, -9.99886648e-01, 5.36310553e-03], [-4.65337018e-03, -5.36307196e-03, -9.99969412e-01], [9.99870070e-01, -1.56647995e-02, -4.48880010e-03]]) Transform_matrix_L2C[:3, -1:] = np.array( [4.29029924e-03, -6.08539196e-02, -9.20346161e-02]).reshape(3, 1) Transform_matrix_L2C = GramSchmidtHelper(Transform_matrix_L2C) Transform_matrix_C2L = np.linalg.inv(Transform_matrix_L2C) '''载入测试数据:图像和点云''' dataset_dir = Path(args.dataset_dir) sequences = [args.sequence_idx] framework = test_framework(dataset_dir, sequences, seq_length) pointcloud_dir = os.path.join(args.dataset_dir, 'sequences', args.sequence_idx, 'velodyne') pointClouds = loadPointCloud(pointcloud_dir) print('{} snippets to test'.format(len(framework))) '''误差初始化''' errors = np.zeros((len(framework), 2), np.float32) optimized_errors = np.zeros((len(framework), 2), np.float32) ############################################################## '''输出到文件中的数据初始化''' num_poses = len(framework) - (seq_length - 2) '''效率''' VO_processing_time = np.zeros(num_poses - 1) ICP_iterations = np.zeros(num_poses - 1) ICP_fitness = np.zeros(num_poses - 1) ICP_iteration_time = np.zeros(num_poses - 1) '''绝对位姿列表初始化''' # 对齐到雷达坐标系,VO模型输出的带有尺度的绝对位姿 abs_VO_poses = np.zeros((num_poses, 12)) abs_VO_poses[0] = np.identity(4)[:3, :].reshape(12) abs_VO_pose = np.identity(4) # 位姿估计值,对齐到相机坐标系下,和真值直接比较(仅适用于有相机坐标系下的真值) est_poses = np.zeros((num_poses, 12)) est_poses[0] = np.identity(4)[:3, :].reshape(12) '''帧间位姿列表初始化''' # 对齐到雷达坐标系,VO模型输出的带有尺度的帧间位姿 rel_VO_poses = np.zeros((num_poses - 1, 12)) '''尺度因子''' scale_factors = np.zeros((num_poses - 1, 1)) # 用来估计尺度因子 last_rel_LO_pose = np.identity(4) last_rel_VO_pose = np.identity(4) ############################################################## '''创建输出文件夹位置及文件后缀''' save_dir = Path(save_dir) print('Output files wiil be saved in: ' + save_dir) if not os.path.exists(save_dir): save_dir.makedirs_p() suffix = "pts@" + str(args.num_icp_points) + \ "_prop@" + str(args.proposal) + \ "_tolerance@" + str(args.tolerance) + \ "_scm@" + str(args.scm_type) + \ "_thresh@" + str(args.loop_threshold) if args.scan2submap: suffix = suffix + '_scan2map@True' '''Pose Graph Manager (for back-end optimization) initialization''' PGM = PoseGraphManager() PGM.addPriorFactor() '''Saver初始化''' num_frames = len(tqdm(framework)) ResultSaver = PoseGraphResultSaver(init_pose=PGM.curr_se3, save_gap=args.save_gap, num_frames=num_frames, seq_idx=args.sequence_idx, save_dir=save_dir) if args.loop: '''Scan Context Manager (for loop detection) initialization''' SCM = ScanContextManager(shape=[args.num_rings, args.num_sectors], num_candidates=args.num_candidates, threshold=args.loop_threshold) '''Mapping initialzation''' if args.mapping is True: Map = MappingManager(k=args.k) # for save the result as a video fig_idx = 1 fig = plt.figure(fig_idx) writer = FFMpegWriter(fps=15) vedio_name = suffix + ".mp4" vedio_path = os.path.join(save_dir, vedio_name) num_frames_to_skip_to_show = 5 num_frames_to_save = np.floor(num_frames / num_frames_to_skip_to_show) with writer.saving( fig, vedio_path, num_frames_to_save): # this video saving part is optional for j, sample in enumerate(tqdm(framework)): ''' ***********************************VO部分********************************* ''' # 图像降采样 imgs = sample['imgs'] w, h = imgs[0].size if (not args.no_resize) and (h != downsample_img_height or w != downsample_img_width): imgs = [(np.array( img.resize((downsample_img_width, downsample_img_height)))).astype(np.float32) for img in imgs] imgs = [np.transpose(img, (2, 0, 1)) for img in imgs] # numpy array 转troch tensor ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) if i == len(imgs) // 2: tgt_img = img else: ref_imgs.append(img) startTimeVO = time.time() _, poses = pose_net(tgt_img, ref_imgs) VO_processing_time[j] = time.time() - startTimeVO final_poses = pose2tf_mat(args.rotation_mode, imgs, poses) # cur_VO_pose取final poses的第2项,则是取T10,T21,T32。。。 rel_VO_pose = np.identity(4) rel_VO_pose[:3, :] = final_poses[1] # 尺度因子的确定:采用上一帧的LO输出位姿和VO输出位姿的尺度比值作为当前帧的尺度因子,初始尺度为1 # version1.0 if args.isKitti: if j == 0: scale_factor = 7 else: scale_factor = math.sqrt( np.sum(last_rel_LO_pose[:3, -1]**2) / np.sum(last_rel_VO_pose[:3, -1]**2)) else: scale_factor = 7 # version2.0 固定模型的尺度因子 # if args.isKitti: # scale_factor = 35 # else: # scale_factor = 7 scale_factors[j] = scale_factor last_rel_VO_pose = copy.deepcopy(rel_VO_pose) # 注意深拷贝 # 先尺度修正,再对齐坐标系,施密特正交化避免病态矩阵 rel_VO_pose[:3, -1:] = rel_VO_pose[:3, -1:] * scale_factor rel_VO_pose = Transform_matrix_C2L @ rel_VO_pose @ np.linalg.inv( Transform_matrix_C2L) rel_VO_pose = GramSchmidtHelper(rel_VO_pose) rel_VO_poses[j] = rel_VO_pose[:3, :].reshape(12) abs_VO_pose = np.matmul(abs_VO_pose, rel_VO_pose) abs_VO_poses[j + 1] = abs_VO_pose[:3, :].reshape(12) '''*************************LO部分******************************************''' # 初始化 if j == 0: last_pts = pointClouds[j] if args.loop: down_points = random_sampling( last_pts, num_points=args.num_icp_points) SCM.addNode(j, down_points) if args.mapping is True: Map.updateMap(curr_se3=PGM.curr_se3, curr_local_ptcloud=last_pts, down_points=args.map_down_points, submap_points=args.num_icp_points) curr_pts = pointClouds[j + 1] # 选择LO的初值预估,分别是无预估,上一帧位姿,VO位姿 if args.proposal == 0: init_pose = np.identity(4) elif args.proposal == 1: init_pose = last_rel_LO_pose elif args.proposal == 2: init_pose = rel_VO_pose # print('init_pose') # print(init_pose) '''icp 类型选择2*2=4''' startTime = time.time() if args.scan2submap: submap = Map.getSubMap() if args.icp_version == 0: curr_pts = random_sampling(curr_pts, args.num_icp_points) rel_LO_pose, distacnces, iterations = icp( curr_pts, submap, init_pose=init_pose, tolerance=args.tolerance, max_iterations=50) elif args.icp_version == 1: if args.isKitti: curr_pts = random_sampling(curr_pts, args.num_icp_points) rel_LO_pose, fitness, inlier_rmse = p2l_icp( curr_pts, submap, trans_init=init_pose, threshold=0.05) else: if args.icp_version == 0: curr_pts = random_sampling(curr_pts, args.num_icp_points) last_pts = random_sampling(last_pts, args.num_icp_points) rel_LO_pose, distacnces, iterations = icp( curr_pts, last_pts, init_pose=init_pose, tolerance=args.tolerance, max_iterations=50) elif args.icp_version == 1: if args.isKitti: curr_pts = random_sampling(curr_pts, args.num_icp_points) last_pts = random_sampling(last_pts, args.num_icp_points) rel_LO_pose, fitness, inlier_rmse = p2l_icp( curr_pts, last_pts, trans_init=init_pose, threshold=0.05) ICP_iteration_time[j] = time.time() - startTime # print('rel_LO_pose1') # print(rel_LO_pose) if args.icp_version == 0: ICP_iterations[j] = iterations elif args.icp_version == 1: ICP_fitness[j] = fitness # 开始精匹配 if args.fine_matching == 1: submap = Map.getSubMap() if args.icp_version == 0: rel_LO_pose, distacnces, iterations = icp( curr_pts, submap, init_pose=rel_LO_pose, tolerance=args.tolerance, max_iterations=50) elif args.icp_version == 1: rel_LO_pose, fitness, inlier_rmse = p2l_icp( curr_pts, submap, trans_init=rel_LO_pose, threshold=0.05) # print('rel_LO_pose2') # print(rel_LO_pose) ResultSaver.saveRelativePose(rel_LO_pose) '''更新变量''' last_pts = curr_pts last_rel_LO_pose = rel_LO_pose if args.loop: '''Update loop detection nodes''' down_points = random_sampling(curr_pts, num_points=args.num_icp_points) SCM.addNode(j + 1, down_points) '''Update the edges and nodes of pose graph''' PGM.curr_node_idx = j + 1 PGM.curr_se3 = np.matmul(PGM.curr_se3, rel_LO_pose) PGM.addOdometryFactor(rel_LO_pose) PGM.prev_node_idx = PGM.curr_node_idx est_pose = Transform_matrix_L2C @ PGM.curr_se3 @ np.linalg.inv( Transform_matrix_L2C) est_poses[j + 1] = est_pose[:3, :].reshape(12) # 建图更新 if args.mapping is True: Map.updateMap(curr_se3=PGM.curr_se3, curr_local_ptcloud=curr_pts, down_points=args.map_down_points, submap_points=args.num_icp_points) # loop detection and optimize the graph if (args.loop and PGM.curr_node_idx > 1 and PGM.curr_node_idx % args.try_gap_loop_detection == 0): # 1/ loop detection loop_idx, loop_dist, yaw_diff_deg = SCM.detectLoop() if (loop_idx == None): # NOT FOUND pass else: print("Loop event detected: ", PGM.curr_node_idx, loop_idx, loop_dist) # 2-1/ add the loop factor loop_scan_down_pts = SCM.getPtcloud(loop_idx) if args.icp_version == 0: loop_transform, _, _ = icp( curr_pts, loop_scan_down_pts, init_pose=yawdeg2se3(yaw_diff_deg), tolerance=args.tolerance, max_iterations=50) elif args.icp_version == 1: loop_transform, _, _ = p2l_icp( curr_pts, loop_scan_down_pts, trans_init=yawdeg2se3(yaw_diff_deg), threshold=0.05) PGM.addLoopFactor(loop_transform, loop_idx) # 2-2/ graph optimization PGM.optimizePoseGraph() # 2-2/ save optimized poses ResultSaver.saveOptimizedPoseGraphResult( PGM.curr_node_idx, PGM.graph_optimized) # 2-3/ updateMap if args.mapping is True: Map.optimizeGlobalMap(PGM.graph_optimized, PGM.curr_node_idx) # save the ICP odometry pose result (no loop closure) ResultSaver.saveUnoptimizedPoseGraphResult(PGM.curr_se3, PGM.curr_node_idx) # if (j % num_frames_to_skip_to_show == 0): # ResultSaver.vizCurrentTrajectory(fig_idx=fig_idx) # writer.grab_frame() if args.vizmapping is True: Map.vizMapWithOpen3D() if args.mapping is True: map_name = suffix + '.pcd' map_path = os.path.join(save_dir, map_name) Map.saveMap2File(map_path) if args.output_dir is not None: # np.save(output_dir / 'predictions.npy', predictions_array) np.savetxt(save_dir / 'scale_factors_' + suffix + '.txt', scale_factors) np.savetxt(save_dir / 'rel_VO_poses.txt', rel_VO_poses) np.savetxt(save_dir / 'abs_VO_poses.txt', abs_VO_poses) rel_LO_poses_file = 'rel_LO_poses_' + suffix + '.txt' abs_LO_poses_file = 'abs_LO_poses_' + suffix + '.txt' ResultSaver.saveRelativePosesResult(rel_LO_poses_file) ResultSaver.saveFinalPoseGraphResult(abs_LO_poses_file) if args.icp_version == 0: np.savetxt(save_dir / 'iterations_' + suffix + '.txt', ICP_iterations) elif args.icp_version == 1: np.savetxt(save_dir / 'fitness_' + suffix + '.txt', ICP_fitness) np.savetxt(save_dir / 'iteration_time_' + suffix + '.txt', ICP_iteration_time) np.savetxt(save_dir / 'VO_processing_time.txt', VO_processing_time) if args.isKitti: np.savetxt( save_dir / 'est_poses_' + suffix + '.txt'.format(args.sequence_idx), est_poses) # VO输出位姿的精度指标 mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE', 'RE'] print('') print("VO_Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors)) # LO二次优化后的精度指标 optimized_mean_errors = optimized_errors.mean(0) optimized_std_errors = optimized_errors.std(0) optimized_error_names = ['optimized_ATE', 'optimized_RE'] print('') print("LO_optimized_Results") print("\t {:>10}, {:>10}".format(*optimized_error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*optimized_mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*optimized_std_errors)) # 存储优化前后误差精度指标 if args.isKitti: err_statics = np.array([ mean_errors, std_errors, optimized_mean_errors, optimized_std_errors ]) np.savetxt( save_dir / 'err_statics_' + suffix + '.txt'.format(args.sequence_idx), err_statics) # 迭代次数 mean_iterations = ICP_iterations.mean() std_iterations = ICP_iterations.std() _names = ['iteration'] print('') print("LO迭代次数") print("\t {:>10}".format(*_names)) print("mean \t {:10.4f}".format(mean_iterations)) print("std \t {:10.4f}".format(std_iterations)) # 迭代时间 mean_iter_time = ICP_iteration_time.mean() std_iter_time = ICP_iteration_time.std() _names = ['iter_time'] print('') print("LO迭代时间:单位/s") print("\t {:>10}".format(*_names)) print("mean \t {:10.4f}".format(mean_iter_time)) print("std \t {:10.4f}".format(std_iter_time))
def main(): args = parser.parse_args() # from kitti_eval.VOLO_data_utils import test_framework_KITTI as test_framework from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework weights = torch.load(args.pretrained_posenet) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) sequences=[args.sequence_idx] framework = test_framework(dataset_dir, sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) optimized_errors = np.zeros((len(framework), 2), np.float32) iteration_arr = np.zeros(len(framework)) LO_iter_times = np.zeros(len(framework)) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() predictions_array = np.zeros((len(framework), seq_length, 3, 4)) abs_VO_poses = np.zeros((len(framework), 12)) abs_VO_pose = np.identity(4) last_pose = np.identity(4) last_VO_pose = np.identity(4) # L和C的转换矩阵,对齐输入位姿到雷达坐标系 Transform_matrix_L2C = np.identity(4) Transform_matrix_L2C[:3, :3] = np.array([[7.533745e-03, -9.999714e-01, -6.166020e-04], [1.480249e-02, 7.280733e-04, -9.998902e-01], [9.998621e-01, 7.523790e-03, 1.480755e-02]]) Transform_matrix_L2C[:3, -1:] = np.array([-4.069766e-03, -7.631618e-02, -2.717806e-01]).reshape(3, 1) Transform_matrix_C2L = np.linalg.inv(Transform_matrix_L2C) pointClouds = loadPointCloud(args.dataset_dir + "/sequences/" + args.sequence_idx + "/velodyne") # *************可视化准备*********************** num_frames = len(tqdm(framework)) # Pose Graph Manager (for back-end optimization) initialization PGM = PoseGraphManager() PGM.addPriorFactor() # Result saver save_dir = "result/" + args.sequence_idx if not os.path.exists(save_dir): os.makedirs(save_dir) ResultSaver = PoseGraphResultSaver(init_pose=PGM.curr_se3, save_gap=args.save_gap, num_frames=num_frames, seq_idx=args.sequence_idx, save_dir=save_dir) # Scan Context Manager (for loop detection) initialization SCM = ScanContextManager(shape=[args.num_rings, args.num_sectors], num_candidates=args.num_candidates, threshold=args.loop_threshold) # for save the results as a video fig_idx = 1 fig = plt.figure(fig_idx) writer = FFMpegWriter(fps=15) video_name = args.sequence_idx + "_" + str(args.num_icp_points) + "_prop@" + str(args.proposal) + "_tol@" + str( args.tolerance) + ".mp4" num_frames_to_skip_to_show = 5 num_frames_to_save = np.floor(num_frames / num_frames_to_skip_to_show) with writer.saving(fig, video_name, num_frames_to_save): # this video saving part is optional for j, sample in enumerate(tqdm(framework)): ''' ***************************************VO部分******************************************* ''' imgs = sample['imgs'] h, w, _ = imgs[0].shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): imgs = [imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs] imgs = [np.transpose(img, (2, 0, 1)) for img in imgs] ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) if i == len(imgs) // 2: tgt_img = img else: ref_imgs.append(img) startTimeVO = time.time() _, poses = pose_net(tgt_img, ref_imgs) timeCostVO = time.time() - startTimeVO poses = poses.cpu()[0] poses = torch.cat([poses[:len(imgs) // 2], torch.zeros(1, 6).float(), poses[len(imgs) // 2:]]) inv_transform_matrices = pose_vec2mat(poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64) rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3]) tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:] transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1) print('**********DeepVO result: time_cost {:.3} s'.format(timeCostVO / (len(imgs) - 1))) # print(transform_matrices) # 将对[0 1 2]中间1的转换矩阵变成对0的位姿转换 first_inv_transform = inv_transform_matrices[0] final_poses = first_inv_transform[:, :3] @ transform_matrices final_poses[:, :, -1:] += first_inv_transform[:, -1:] # print('first') # print(first_inv_transform) print('poses') print(final_poses) # cur_VO_pose取final poses的第2项,则是取T10,T21,T32。。。 cur_VO_pose = np.identity(4) cur_VO_pose[:3, :] = final_poses[1] print("对齐前未有尺度修正的帧间位姿") print(cur_VO_pose) print("last_pose") print(last_pose) print("last_VO_pose") print(last_VO_pose) #尺度因子的确定:采用上一帧的LO输出位姿和VO输出位姿的尺度比值作为当前帧的尺度因子,初始尺度为1 if j == 0: scale_factor = 7 else: scale_factor = math.sqrt(np.sum(last_pose[:3, -1] ** 2) / np.sum(last_VO_pose[:3, -1] ** 2)) print("分子", np.sum(last_pose[:3, -1] ** 2)) print("分母", np.sum(last_VO_pose[:3, -1] ** 2)) last_VO_pose = copy.deepcopy(cur_VO_pose) # 注意深拷贝 print("尺度因子:", scale_factor) # 先尺度修正,再对齐 cur_VO_pose[:3, -1:] = cur_VO_pose[:3, -1:] * scale_factor print("尺度修正后...") print(cur_VO_pose) cur_VO_pose = Transform_matrix_C2L @ cur_VO_pose @ np.linalg.inv(Transform_matrix_C2L) print("对齐到雷达坐标系帧间位姿") print(cur_VO_pose) '''*************************LO部分******************************************''' tgt_pc = random_sampling(pointClouds[j], 5000) pc = random_sampling(pointClouds[j + 1], 5000) from point_cloud_processing.icpImpl import icp if args.proposal == 0: init_pose = None elif args.proposal == 1: init_pose = last_pose elif args.proposal == 2: init_pose = cur_VO_pose startTimeLO = time.time() odom_transform, distacnces, iterations = icp(pc, tgt_pc, init_pose=init_pose, tolerance=args.tolerance, max_iterations=50) iter_time = time.time() - startTimeLO LO_iter_times[j] = iter_time iteration_arr[j] = iterations last_pose = odom_transform print("LO优化后的位姿,mean_dis: ", np.asarray(distacnces).mean()) print(odom_transform) print("LO迭代次数:", iterations) PGM.curr_node_idx = j # make start with 0 if (PGM.curr_node_idx == 0): PGM.prev_node_idx = PGM.curr_node_idx continue # update the current (moved) pose PGM.curr_se3 = np.matmul(PGM.curr_se3, odom_transform) # add the odometry factor to the graph # PGM.addOdometryFactor(cur_VO_pose) # renewal the prev information PGM.prev_node_idx = PGM.curr_node_idx # loop detection and optimize the graph if (PGM.curr_node_idx > 1 and PGM.curr_node_idx % args.try_gap_loop_detection == 0): # 1/ loop detection loop_idx, loop_dist, yaw_diff_deg = SCM.detectLoop() if (loop_idx == None): # NOT FOUND pass # else: # print("Loop event detected: ", PGM.curr_node_idx, loop_idx, loop_dist) # # 2-1/ add the loop factor # loop_scan_down_pts = SCM.getPtcloud(loop_idx) # loop_transform, _, _ = ICP.icp(curr_scan_down_pts, loop_scan_down_pts, # init_pose=yawdeg2se3(yaw_diff_deg), max_iterations=20) # PGM.addLoopFactor(loop_transform, loop_idx) # # # 2-2/ graph optimization # PGM.optimizePoseGraph() # # # 2-2/ save optimized poses # ResultSaver.saveOptimizedPoseGraphResult(PGM.curr_node_idx, PGM.graph_optimized) # save the ICP odometry pose result (no loop closure) ResultSaver.saveUnoptimizedPoseGraphResult(PGM.curr_se3, PGM.curr_node_idx) if (j % num_frames_to_skip_to_show == 0): ResultSaver.vizCurrentTrajectory(fig_idx=fig_idx) writer.grab_frame() if args.output_dir is not None: predictions_array[j] = final_poses abs_VO_poses[j] = abs_VO_pose[:3, :].reshape(-1, 12)[0] ATE, RE = compute_pose_error(sample['poses'], final_poses) errors[j] = ATE, RE optimized_ATE, optimized_RE = compute_LO_pose_error(sample['poses'], odom_transform, Transform_matrix_L2C) optimized_errors[j] = optimized_ATE, optimized_RE # VO输出位姿的精度指标 mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE', 'RE'] print('') print("VO_Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors)) # LO二次优化后的精度指标 optimized_mean_errors = optimized_errors.mean(0) optimized_std_errors = optimized_errors.std(0) optimized_error_names = ['optimized_ATE', 'optimized_RE'] print('') print("LO_optimized_Results") print("\t {:>10}, {:>10}".format(*optimized_error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*optimized_mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*optimized_std_errors)) # 迭代次数 mean_iterations = iteration_arr.mean() std_iterations = iteration_arr.std() _names = ['iteration'] print('') print("LO迭代次数") print("\t {:>10}".format(*_names)) print("mean \t {:10.4f}".format(mean_iterations)) print("std \t {:10.4f}".format(std_iterations)) # 迭代时间 mean_iter_time = LO_iter_times.mean() std_iter_time = LO_iter_times.std() _names = ['iter_time'] print('') print("LO迭代时间:单位/s") print("\t {:>10}".format(*_names)) print("mean \t {:10.4f}".format(mean_iter_time)) print("std \t {:10.4f}".format(std_iter_time)) if args.output_dir is not None: np.save(output_dir / 'predictions.npy', predictions_array) np.savetxt(output_dir / 'abs_VO_poses.txt', abs_VO_poses)
def main(): args = parser.parse_args() attack = False if args.perturbation and args.tracker_file: attack = True perturbation = np.load(Path(args.perturbation)) noise_mask = np.load(Path(args.tracker_file)) from kitti_eval.pose_evaluation_utils import test_framework_KITTI as test_framework weights = torch.load(args.pretrained_posenet, map_location=device) seq_length = int(weights['state_dict']['conv1.0.weight'].size(1) / 3) pose_net = PoseExpNet(nb_ref_imgs=seq_length - 1, output_exp=False).to(device) pose_net.load_state_dict(weights['state_dict'], strict=False) dataset_dir = Path(args.dataset_dir) framework = test_framework(dataset_dir, args.sequences, seq_length) print('{} snippets to test'.format(len(framework))) errors = np.zeros((len(framework), 2), np.float32) if args.output_dir is not None: output_dir = Path(args.output_dir) output_dir.makedirs_p() predictions_array = np.zeros((len(framework), seq_length, 3, 4)) ground_truth_array = np.zeros((len(framework), seq_length, 3, 4)) for j, sample in enumerate(tqdm(framework)): imgs = sample['imgs'] h, w, _ = imgs[0].shape if (not args.no_resize) and (h != args.img_height or w != args.img_width): imgs = [ imresize(img, (args.img_height, args.img_width)).astype(np.float32) for img in imgs ] imgs = [np.transpose(img, (2, 0, 1)) for img in imgs] ref_imgs = [] for i, img in enumerate(imgs): img = torch.from_numpy(img).unsqueeze(0) img = ((img / 255 - 0.5) / 0.5).to(device) if i == len(imgs) // 2: tgt_img = img else: ref_imgs.append(img) if attack: # Add noise to target image if j + 2 >= first_frame and j + 2 < last_frame: curr_mask = noise_mask[j - first_frame + 2].astype(np.int) w = curr_mask[2] - curr_mask[0] h = curr_mask[3] - curr_mask[1] noise_box = resize2d(perturbation, (h, w)) tgt_img[0][:, curr_mask[1]:curr_mask[3], curr_mask[0]:curr_mask[2]] += noise_box tgt_img[0] = tgt_img[0].clamp(-1, 1) # Add noise to reference images for k in range(5): ref_idx = k if k == 2: # Skip target image continue if k > 2: # Since it is numbered: ref1, ref2, tgt, ref3, ref4 ref_idx = k - 1 if j + k >= first_frame and j + k < last_frame: curr_mask = noise_mask[j - first_frame + k].astype(np.int) w = curr_mask[2] - curr_mask[0] h = curr_mask[3] - curr_mask[1] noise_box = resize2d(perturbation, (h, w)) ref_imgs[ref_idx][ 0][:, curr_mask[1]:curr_mask[3], curr_mask[0]:curr_mask[2]] += noise_box ref_imgs[ref_idx] = ref_imgs[ref_idx].clamp(-1, 1) _, poses = pose_net(tgt_img, ref_imgs) poses = poses.cpu()[0] poses = torch.cat([ poses[:len(imgs) // 2], torch.zeros(1, 6).float(), poses[len(imgs) // 2:] ]) inv_transform_matrices = pose_vec2mat( poses, rotation_mode=args.rotation_mode).numpy().astype(np.float64) rot_matrices = np.linalg.inv(inv_transform_matrices[:, :, :3]) tr_vectors = -rot_matrices @ inv_transform_matrices[:, :, -1:] transform_matrices = np.concatenate([rot_matrices, tr_vectors], axis=-1) first_inv_transform = inv_transform_matrices[0] final_poses = first_inv_transform[:, :3] @ transform_matrices final_poses[:, :, -1:] += first_inv_transform[:, -1:] if args.output_dir is not None: ground_truth_array[j] = sample['poses'] predictions_array[j] = final_poses ATE, RE = compute_pose_error(sample['poses'], final_poses) errors[j] = ATE, RE mean_errors = errors.mean(0) std_errors = errors.std(0) error_names = ['ATE', 'RE'] print('') print("Results") print("\t {:>10}, {:>10}".format(*error_names)) print("mean \t {:10.4f}, {:10.4f}".format(*mean_errors)) print("std \t {:10.4f}, {:10.4f}".format(*std_errors)) if args.output_dir is not None: np.save(output_dir / 'ground_truth.npy', ground_truth_array) np.save(output_dir / 'predictions_perturbed.npy', predictions_array)