def evaluate(opt): """Evaluate odometry on the KITTI dataset """ assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10", \ "eval_split should be either odom_9 or odom_10" sequence_id = int(opt.eval_split.split("_")[1]) filenames = readlines( os.path.join(os.path.dirname(__file__), "splits", "odom", "test_files_{:02d}.txt".format(sequence_id))) dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width, [0, 1], 4, is_train=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.cuda() pose_encoder.eval() pose_decoder.cuda() pose_decoder.eval() pred_poses = [] print("-> Computing pose predictions") opt.frame_ids = [0, 1] # pose network only takes two frames as input with torch.no_grad(): for inputs in dataloader: for key, ipt in inputs.items(): inputs[key] = ipt.cuda() all_color_aug = torch.cat([inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1) features = [pose_encoder(all_color_aug)] axisangle, translation = pose_decoder(features) pred_poses.append( transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy()) pred_poses = np.concatenate(pred_poses) gt_poses_path = os.path.join(opt.data_path, "poses", "{:02d}.txt".format(sequence_id)) gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4) gt_global_poses = np.concatenate( (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1) gt_global_poses[:, 3, 3] = 1 gt_xyzs = gt_global_poses[:, :3, 3] gt_local_poses = [] for i in range(1, len(gt_global_poses)): gt_local_poses.append( np.linalg.inv(np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i]))) ates = [] num_frames = gt_xyzs.shape[0] track_length = 5 for i in range(0, num_frames - 1): local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1])) gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i:i + track_length - 1])) ates.append(compute_ate(gt_local_xyzs, local_xyzs)) print("\n Trajectory error: {:0.3f}, std: {:0.3f}\n".format(np.mean(ates), np.std(ates))) save_path = os.path.join(opt.load_weights_folder, "poses.npy") np.save(save_path, pred_poses) print("-> Predictions saved to", save_path)
def evaluate(opt): """Evaluate odometry on the KITTI dataset """ assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) # Depth encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") encoder_dict = torch.load(encoder_path) encoder = networks.ResnetEncoder(opt.num_layers, False) depth_decoder = networks.DepthDecoder(encoder.num_ch_enc) model_dict = encoder.state_dict() encoder.load_state_dict( {k: v for k, v in encoder_dict.items() if k in model_dict}) depth_decoder.load_state_dict(torch.load(decoder_path)) encoder.cuda() encoder.eval() depth_decoder.cuda() depth_decoder.eval() # Pose pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.cuda() pose_encoder.eval() pose_decoder.cuda() pose_decoder.eval() if opt.use_imu: imu_lstm = nn.LSTM(6, opt.lstm_hidden_size, opt.lstm_num_layers) imu_lstm.cuda() imu_lstm.eval() lstm_hs = None hidden_to_imu = torch.nn.Sequential( torch.nn.Linear(opt.lstm_hidden_size, 6), ) hidden_to_imu.cuda() hidden_to_imu.eval() if opt.pose_fuse: pose_fuse_mlp = torch.nn.Sequential( torch.nn.Linear(24, opt.pose_mlp_hidden_size), torch.nn.Sigmoid(), torch.nn.Linear(opt.pose_mlp_hidden_size, 6), ) pose_fuse_mlp.cuda() pose_fuse_mlp.eval() img_ext = '.png' if opt.png else '.jpg' pred_disps = [] scale_factors = [] kitty_odom = False if opt.eval_split.startswith("odom"): kitty_odom = True if kitty_odom: ids = [int(opt.eval_split.split("_")[1])] else: splits_dir = os.path.join(os.path.dirname(__file__), "splits") videonames = readlines( os.path.join(splits_dir, opt.eval_split, "test_video_list.txt")) ids = videonames for videoname in ids: if kitty_odom: filenames = readlines( os.path.join(splits_dir, opt.eval_split, "test_files_{:02d}.txt".format(videoname))) else: filenames = readlines( os.path.join(splits_dir, opt.eval_split, "test_files.txt")) if kitty_odom: dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width, [0, 1], 4, is_train=False, use_imu=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) else: if opt.use_imu: dataset = SequenceRawKittiDataset( opt.data_path, [videoname], filenames, 1, imu_data_path=opt.imu_data_path, img_ext=img_ext, frame_idxs=[0, 1], height=encoder_dict['height'], width=encoder_dict['width'], num_scales=4, is_train=False) dataloader = DataLoader(dataset, shuffle=False, num_workers=0) else: filenames = list( filter(lambda f: f.startswith(videoname), filenames)) dataset = KITTIRAWDataset(opt.data_path, filenames, opt.height, opt.width, [0, 1], 4, is_train=False, use_imu=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) # pred_poses = [np.eye(4).reshape(1, 4, 4)] pred_poses = [] imu_scale_factors = [] print("EVALUATING ", opt.model_name) print("-> Computing pose predictions") opt.frame_ids = [0, 1] # pose network only takes two frames as input with torch.no_grad(): for inputs in dataloader: for key, ipt in inputs.items(): inputs[key] = ipt.cuda() if opt.use_imu: inputs[key] = inputs[key].squeeze(0) input_color = inputs[("color", 0, 0)] feature = encoder(input_color) output = depth_decoder(feature) pred_disp, _ = disp_to_depth(output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() pred_disps.append(pred_disp) all_color_aug = torch.cat([ inputs[("color_aug", i, 0)] for i in sorted(opt.frame_ids) ], 1) features = [pose_encoder(all_color_aug)] axisangle, translation = pose_decoder(features) outputs = {} outputs[("cam_T_cam", 0, 1)] = transformation_from_parameters(axisangle[:, 0], translation[:, 0], invert=False) T = outputs[("cam_T_cam", 0, 1)] if opt.use_imu: outputs = predict_poses_from_imu2(opt, inputs, imu_lstm, lstm_hs, hidden_to_imu) T_better = outputs[("cam_T_cam_imu", 0, 1)] if opt.pose_fuse: fuse_poses(opt, outputs, pose_fuse_mlp) T_better = outputs[("cam_T_cam_fuse", 0, 1)] R, t = rot_translation_from_transformation(T) Rb, tb = rot_translation_from_transformation(T_better) imu_scale_factor = torch.sum(tb * t) / torch.sum(t**2) imu_scale_factors.append(imu_scale_factor.cpu().numpy()) # scale_factors.append(imu_scale_factors) T = T_better pred_poses.append(T.cpu().numpy()) pred_poses = np.concatenate(pred_poses) if opt.eval_split.startswith("odom"): gt_poses_path = os.path.join(opt.data_path, "poses", "{:02d}.txt".format(videoname)) else: gt_poses_path = os.path.join(opt.data_path, videoname, "oxts", "poses.txt") eval_pose(opt, pred_poses, gt_poses_path) scale_factors = {} if imu_scale_factors: scale_factors["IMU factor"] = imu_scale_factors pred_disps = np.concatenate(pred_disps) if not kitty_odom: eval_depth(opt, pred_disps, scale_factors)
def evaluate(opt): """Evaluate odometry on the KITTI dataset """ assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) sequence_id = int(opt.eval_split.split("_")[1]) opt.batch_size = 1 filenames = readlines( os.path.join(os.path.dirname(__file__), "splits", "odom", "test_files_{:02d}.txt".format(sequence_id))) dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width, [0, -1, 1], 4, 1, is_train=False, img_ext='.png') dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) # pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") config_file = "./configs/e2e_mask_rcnn_R_50_FPN_1x.yaml" cfg.merge_from_file(config_file) cfg.freeze() maskrcnn_path = "./e2e_mask_rcnn_R_50_FPN_1x.pth" pose_encoder = networks.ResnetEncoder(cfg, maskrcnn_path) # pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) # pose_encoder.load_state_dict(torch.load(pose_encoder_path)) pose_decoder = networks.PoseDecoder(len(opt.frame_ids)) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) pose_encoder.cuda() pose_encoder.eval() pose_decoder.cuda() pose_decoder.eval() pred_poses = [] print("-> Computing pose predictions") # opt.frame_ids = [0, 1] # pose network only takes two frames as input ii = 0 with torch.no_grad(): for inputs in dataloader: for key, ipt in inputs.items(): if isinstance(ipt, torch.Tensor): inputs[key] = ipt.cuda() all_color_aug = torch.cat( [inputs[("color_aug", i, 0)] for i in opt.frame_ids]) all_features = pose_encoder(all_color_aug) all_features = [ torch.split(f, opt.batch_size) for f in all_features ] features = {} for i, k in enumerate(opt.frame_ids): features[k] = [f[i] for f in all_features] pose_inputs = [features[i] for i in opt.frame_ids if i != "s"] axisangle, translation = pose_decoder(pose_inputs) if ii == 0: pred_poses.append( transformation_from_parameters(axisangle[:, 0], translation[:, 0], True).cpu().numpy()) pred_poses.append( transformation_from_parameters(axisangle[:, 1], translation[:, 1]).cpu().numpy()) if ii % opt.log_frequency == 0: print("{:04d}-th image processing".format(ii)) ii += 1 # pred_poses.append( # transformation_from_parameters(axisangle[:, 1], translation[:, 1]).cpu().numpy()) pred_poses = np.concatenate(pred_poses) gt_poses_path = os.path.join( "/usr/stud/linp/storage/user/linp/results/kitti", "poses", "{:02d}.txt".format(sequence_id)) gt_global_poses = np.loadtxt(gt_poses_path).reshape((-1, 3, 4)) gt_global_poses = np.concatenate( (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1) gt_global_poses[:, 3, 3] = 1 gt_xyzs = gt_global_poses[:, :3, 3] gt_local_poses = [] for i in range(1, len(gt_global_poses)): gt_local_poses.append( np.linalg.inv( np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i]))) ates = [] num_frames = gt_xyzs.shape[0] track_length = 3 for i in range(0, num_frames - 1): local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1])) gt_local_xyzs = np.array( dump_xyz(gt_local_poses[i:i + track_length - 1])) ates.append(compute_ate(gt_local_xyzs, local_xyzs)) ''' for i in range(0, num_frames - 2): local_xyzs = np.array(dump_xyz(pred_poses[i:i + track_length - 1])) gt_local_xyzs = np.array(dump_xyz(gt_local_poses[i + 1:i + track_length])) ates.append(compute_ate(gt_local_xyzs, local_xyzs)) ''' print("\n Trajectory error: {:0.3f}, std: {:0.3f}\n".format( np.mean(ates), np.std(ates))) save_path = os.path.join(opt.load_weights_folder, "poses.npy") np.save(save_path, pred_poses) print("-> Predictions saved to", save_path)
def evaluate(opt): """Evaluate odometry on the KITTI dataset """ MIN_DEPTH = 1e-3 MAX_DEPTH = 80 K = np.array( [[0.58, 0, 0.5, 0], [0, 1.92, 0.5, 0], [0, 0, 1, 0], [0, 0, 0, 1]], dtype=np.float32) assert os.path.isdir(opt.load_weights_folder), \ "Cannot find a folder at {}".format(opt.load_weights_folder) assert opt.eval_split == "odom_9" or opt.eval_split == "odom_10" or opt.eval_split == "odom_0", \ "eval_split should be either odom_9 or odom_10" sequence_id = int(opt.eval_split.split("_")[1]) filenames = readlines( os.path.join(os.path.dirname(__file__), "splits", "odom", "test_files_{:02d}.txt".format(sequence_id))) dataset = KITTIOdomDataset(opt.data_path, filenames, opt.height, opt.width, [0, 1], 4, is_train=False) dataloader = DataLoader(dataset, opt.batch_size, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=False) pose_encoder_path = os.path.join(opt.load_weights_folder, "pose_encoder.pth") pose_decoder_path = os.path.join(opt.load_weights_folder, "pose.pth") depth_encoder_path = os.path.join(opt.load_weights_folder, "encoder.pth") depth_decoder_path = os.path.join(opt.load_weights_folder, "depth.pth") pose_encoder = networks.ResnetEncoder(opt.num_layers, False, 2) pose_encoder.load_state_dict(torch.load(pose_encoder_path)) depth_encoder = networks.ResnetEncoder(opt.num_layers, False) depth_encoder_dict = torch.load(depth_encoder_path) model_dict = depth_encoder.state_dict() depth_encoder.load_state_dict( {k: v for k, v in depth_encoder_dict.items() if k in model_dict}) pose_decoder = networks.PoseDecoder(pose_encoder.num_ch_enc, 1, 2) pose_decoder.load_state_dict(torch.load(pose_decoder_path)) depth_decoder = networks.DepthDecoder(depth_encoder.num_ch_enc) depth_decoder.load_state_dict(torch.load(depth_decoder_path)) pose_encoder.cuda() pose_encoder.eval() pose_decoder.cuda() pose_decoder.eval() depth_encoder.cuda() depth_encoder.eval() depth_decoder.cuda() depth_decoder.eval() pred_poses = [] pred_disps = [] print("-> Computing pose predictions") opt.frame_ids = [0, 1] # pose network only takes two frames as input with torch.no_grad(): for inputs in dataloader: input_color = inputs[("color", 0, 0)].cuda() depth_output = depth_decoder(depth_encoder(input_color)) pred_disp, _ = disp_to_depth(depth_output[("disp", 0)], opt.min_depth, opt.max_depth) pred_disp = pred_disp.cpu()[:, 0].numpy() pred_disps.append(pred_disp) for key, ipt in inputs.items(): inputs[key] = ipt.cuda() all_color_aug = torch.cat( [inputs[("color_aug", i, 0)] for i in opt.frame_ids], 1) features = [pose_encoder(all_color_aug)] axisangle, translation = pose_decoder(features) pred_poses.append( transformation_from_parameters(axisangle[:, 0], translation[:, 0]).cpu().numpy()) pred_poses = np.concatenate(pred_poses) pred_disps = np.concatenate(pred_disps) pred_poses_scaled = [] ratios_d = [] gt_norms_div = [] gt_norms = [] pred_norms = [] td_divs_dgc = [] poses_pred = [] for i in range(pred_poses.shape[0]): pred_pose = pred_poses[i] pred_disp = pred_disps[i + 1] pred_depth = 1 / pred_disp scale_recovery = ScaleRecovery(1, 192, 640, K).cuda() pred_depth = torch.from_numpy(pred_depth).unsqueeze(0).cuda() ratio = scale_recovery(pred_depth).cpu().item() pred_pose_scaled = pred_pose[:3, 3] * ratio poses_pred.append(pred_pose[:3, 3]) pred_poses_scaled.append(pred_pose_scaled) ratios_d.append(ratio) gt_poses_path = os.path.join(opt.data_path, "poses", "{:02d}.txt".format(sequence_id)) gt_global_poses = np.loadtxt(gt_poses_path).reshape(-1, 3, 4) gt_global_poses = np.concatenate( (gt_global_poses, np.zeros((gt_global_poses.shape[0], 1, 4))), 1) gt_global_poses[:, 3, 3] = 1 gt_xyzs = gt_global_poses[:, :3, 3] gt_local_poses = [] for i in range(1, len(gt_global_poses)): gt_local_poses.append( np.linalg.inv( np.dot(np.linalg.inv(gt_global_poses[i - 1]), gt_global_poses[i]))) ates = [] num_frames = gt_xyzs.shape[0] track_length = 5 for i in range(0, num_frames - 1): local_xyzs = np.array( dump_xyz(pred_poses_scaled[i:i + track_length - 1])) gt_local_xyzs = np.array( dump_xyz(gt_local_poses[i:i + track_length - 1])) gt_norm_div = np.linalg.norm(gt_local_xyzs) / np.linalg.norm( local_xyzs) ates.append(compute_ate(gt_local_xyzs, local_xyzs)) gt_norms_div.append(gt_norm_div) gt_norms.append(np.linalg.norm(gt_local_xyzs)) print("\n Trajectory error: {:0.3f}, std: {:0.3f}\n".format( np.mean(ates), np.std(ates))) save_path = os.path.join(os.path.dirname(__file__), "poses_scaled{:02d}.npy".format(sequence_id)) np.save(save_path, pred_poses) save_path = os.path.join(os.path.dirname(__file__), "poses_gt{:02d}.npy".format(sequence_id)) np.save(save_path, pred_poses) save_path = os.path.join(os.path.dirname(__file__), "poses_pred{:02d}.npy".format(sequence_id)) np.save(save_path, gt_xyzs) save_path = os.path.join(os.path.dirname(__file__), "gt_norms{:02d}.npy".format(sequence_id)) np.save(save_path, gt_norms) save_path = os.path.join(os.path.dirname(__file__), "gt_norms_div{:02d}.npy".format(sequence_id)) np.save(save_path, gt_norms_div) save_path = os.path.join(os.path.dirname(__file__), "ratios_d{:02d}.npy".format(sequence_id)) np.save(save_path, ratios_d) save_path = os.path.join(os.path.dirname(__file__), "pred_norms{:02d}.npy".format(sequence_id)) np.save(save_path, pred_norms) print("-> Predictions saved to", save_path)