def __init__(self, scene, data_path, train, transform=None, target_transform=None, mode=0, seed=7, real=False, skip_images=False, vo_lib='orbslam'): """ :param scene: scene name ['chess', 'pumpkin', ...] :param data_path: root 7scenes data directory. Usually '../data/deepslam_data/7Scenes' :param train: if True, return the training images. If False, returns the testing images :param transform: transform to apply to the images :param target_transform: transform to apply to the poses :param mode: 0: just color image, 1: just depth image, 2: [c_img, d_img] :param real: If True, load poses from SLAM/integration of VO :param skip_images: If True, skip loading images and return None instead :param vo_lib: Library to use for VO (currently only 'dso') """ self.mode = mode self.transform = transform self.target_transform = target_transform self.skip_images = skip_images np.random.seed(seed) # directories base_dir = osp.join(osp.expanduser(data_path), scene) data_dir = osp.join('..', 'data', '7Scenes', scene) # decide which sequences to use if train: split_file = osp.join(base_dir, 'TrainSplit.txt') else: split_file = osp.join(base_dir, 'TestSplit.txt') with open(split_file, 'r') as f: seqs = [int(l.split('sequence')[-1]) for l in f if not l.startswith('#')] # read poses and collect image names self.c_imgs = [] self.d_imgs = [] self.gt_idx = np.empty((0,), dtype=np.int) ps = {} vo_stats = {} gt_offset = int(0) for seq in seqs: seq_dir = osp.join(base_dir, 'seq-{:02d}'.format(seq)) seq_data_dir = osp.join(data_dir, 'seq-{:02d}'.format(seq)) p_filenames = [n for n in os.listdir(osp.join(seq_dir, '.')) if n.find('pose') >= 0] if real: pose_file = osp.join(data_dir, '{:s}_poses'.format(vo_lib), 'seq-{:02d}.txt'.format(seq)) pss = np.loadtxt(pose_file) frame_idx = pss[:, 0].astype(np.int) if vo_lib == 'libviso2': frame_idx -= 1 ps[seq] = pss[:, 1:13] vo_stats_filename = osp.join(seq_data_dir, '{:s}_vo_stats.pkl'.format(vo_lib)) with open(vo_stats_filename, 'rb') as f: vo_stats[seq] = load_pickle(f) # # uncomment to check that PGO does not need aligned VO! # vo_stats[seq]['R'] = np.eye(3) # vo_stats[seq]['t'] = np.zeros(3) else: frame_idx = np.array(range(len(p_filenames)), dtype=np.int) pss = [np.loadtxt(osp.join(seq_dir, 'frame-{:06d}.pose.txt'. format(i))).flatten()[:12] for i in frame_idx] ps[seq] = np.asarray(pss) vo_stats[seq] = {'R': np.eye(3), 't': np.zeros(3), 's': 1} self.gt_idx = np.hstack((self.gt_idx, gt_offset + frame_idx)) gt_offset += len(p_filenames) c_imgs = [osp.join(seq_dir, 'frame-{:06d}.color.png'.format(i)) for i in frame_idx] d_imgs = [osp.join(seq_dir, 'frame-{:06d}.depth.png'.format(i)) for i in frame_idx] self.c_imgs.extend(c_imgs) self.d_imgs.extend(d_imgs) pose_stats_filename = osp.join(data_dir, 'pose_stats.txt') if train and not real: # optionally, use the ps dictionary to calc stats mean_t = np.zeros(3) std_t = np.ones(3) np.savetxt( pose_stats_filename, np.vstack( (mean_t, std_t)), fmt='%8.7f') else: mean_t, std_t = np.loadtxt(pose_stats_filename) # convert pose to translation + log quaternion self.poses = np.empty((0, 6)) for seq in seqs: pss = process_poses(poses_in=ps[seq], mean_t=mean_t, std_t=std_t, align_R=vo_stats[seq]['R'], align_t=vo_stats[seq]['t'], align_s=vo_stats[seq]['s']) self.poses = np.vstack((self.poses, pss))
vo_stats_filename = osp.join(seq_dir, '{:s}_vo_stats.pkl'.format(args.vo_lib)) else: raise NotImplementedError vo_stats = {'R': align_R, 't': align_t, 's': align_s} with open(vo_stats_filename, 'wb') as f: pickle.dump(vo_stats, f) print('{:s} saved.'.format(vo_stats_filename)) # apply alignment pose_stats_filename = osp.join(data_dir, args.scene, 'pose_stats.txt') mean_t, std_t = np.loadtxt(pose_stats_filename) real_poses = process_poses(real_poses, mean_t=mean_t, std_t=std_t, align_R=align_R, align_t=align_t, align_s=align_s) gt_poses = process_poses(gt_poses, mean_t=mean_t, std_t=std_t, align_R=np.eye(3), align_t=np.zeros(3), align_s=1) # loop fig = plt.figure() ax = fig.add_subplot(111, projection='3d') for r, g in zip(real_poses[::args.subsample], gt_poses[::args.subsample]): ax.scatter(r[0], r[1], zs=r[2], c='r') ax.scatter(g[0], g[1], zs=g[2], c='g')
def __init__(self, scene, data_path, train, transform=None, target_transform=None, real=False, skip_images=False, seed=7, undistort=False, vo_lib='stereo'): """ :param scene: e.g. 'full' or 'loop'. collection of sequences. :param data_path: Root RobotCar data directory. Usually '../data/deepslam_data/RobotCar' :param train: flag for training / validation :param transform: Transform to be applied to images :param target_transform: Transform to be applied to poses :param real: if True, load poses from SLAM / integration of VO :param skip_images: return None images, only poses :param seed: random seed :param undistort: whether to undistort images (slow) :param vo_lib: Library to use for VO ('stereo' or 'gps') (`gps` is a misnomer in this code - it just loads the position information from GPS) """ np.random.seed(seed) self.transform = transform self.target_transform = target_transform self.skip_images = skip_images self.undistort = undistort base_dir = osp.expanduser(osp.join(data_path, scene)) data_dir = osp.join('..', 'data', 'RobotCar', scene) # decide which sequences to use if train: split_filename = osp.join(base_dir, 'train_split.txt') else: split_filename = osp.join(base_dir, 'test_split.txt') with open(split_filename, 'r') as f: seqs = [l.rstrip() for l in f if not l.startswith('#')] ps = {} ts = {} vo_stats = {} self.imgs = [] for seq in seqs: seq_dir = osp.join(base_dir, seq) seq_data_dir = osp.join(data_dir, seq) # read the image timestamps ts_filename = osp.join(seq_dir, 'stereo.timestamps') with open(ts_filename, 'r') as f: ts[seq] = [int(l.rstrip().split(' ')[0]) for l in f] if real: # poses from integration of VOs if vo_lib == 'stereo': vo_filename = osp.join(seq_dir, 'vo', 'vo.csv') p = np.asarray( interpolate_vo_poses(vo_filename, ts[seq], ts[seq][0])) elif vo_lib == 'gps': vo_filename = osp.join(seq_dir, 'gps', 'gps_ins.csv') p = np.asarray( interpolate_ins_poses(vo_filename, ts[seq], ts[seq][0])) else: raise NotImplementedError vo_stats_filename = osp.join( seq_data_dir, '{:s}_vo_stats.pkl'.format(vo_lib)) with open(vo_stats_filename, 'r') as f: vo_stats[seq] = pickle.load(f) ps[seq] = np.reshape(p[:, :3, :], (len(p), -1)) else: # GT poses pose_filename = osp.join(seq_dir, 'gps', 'ins.csv') p = np.asarray( interpolate_ins_poses(pose_filename, ts[seq], ts[seq][0])) ps[seq] = np.reshape(p[:, :3, :], (len(p), -1)) vo_stats[seq] = {'R': np.eye(3), 't': np.zeros(3), 's': 1} self.imgs.extend([ osp.join(seq_dir, 'stereo', 'centre', '{:d}.png'.format(t)) for t in ts[seq] ]) # read / save pose normalization information poses = np.empty((0, 12)) for p in list(ps.values()): poses = np.vstack((poses, p)) pose_stats_filename = osp.join(data_dir, 'pose_stats.txt') if train and not real: mean_t = np.mean(poses[:, [3, 7, 11]], axis=0) std_t = np.std(poses[:, [3, 7, 11]], axis=0) np.savetxt(pose_stats_filename, np.vstack((mean_t, std_t)), fmt='%8.7f') else: mean_t, std_t = np.loadtxt(pose_stats_filename) # convert the pose to translation + log quaternion, align, normalize self.poses = np.empty((0, 6)) for seq in seqs: pss = process_poses(poses_in=ps[seq], mean_t=mean_t, std_t=std_t, align_R=vo_stats[seq]['R'], align_t=vo_stats[seq]['t'], align_s=vo_stats[seq]['s']) self.poses = np.vstack((self.poses, pss)) self.gt_idx = np.asarray(list(range(len(self.poses)))) # camera model and image loader camera_model = CameraModel( osp.join('..', 'data', 'robotcar_camera_models'), osp.join('stereo', 'centre')) self.im_loader = partial(load_image, model=camera_model)
def __init__(self, scene, data_path, train, transform=None, target_transform=None, real=False, skip_images=False, seed=7, undistort=False, vo_lib='stereo', data_dir=None, unsupervise=False, config=None): """ :param scene: e.g. 'full' or 'loop'. collection of sequences. :param data_path: Root RobotCar data directory. Usually '../data/deepslam_data/RobotCar' :param train: flag for training / validation :param transform: Transform to be applied to images :param target_transform: Transform to be applied to poses :param real: it determines load ground truth pose or vo pose :param skip_images: return None images, only poses :param seed: random seed :param undistort: whether to undistort images (slow) :param vo_lib: Library to use for VO ('stereo' or 'gps') (`gps` is a misnomer in this code - it just loads the position information from GPS) :param data_dir: indicating where to load stats.txt file(to normalize image&pose) :param unsupervise: load training set as supervise or unsupervise """ np.random.seed(seed) self.train = train self.transform = transform self.target_transform = target_transform self.skip_images = skip_images self.undistort = undistort base_dir = osp.expanduser(osp.join(data_path, scene)) self.config = config # data_dir = osp.join('..', 'data', 'RobotCar', scene) # decide which sequences to use if train: if unsupervise: split_filename = osp.join(base_dir, 'unsupervised_train_split.txt') else: split_filename = osp.join(base_dir, 'train_split.txt') else: split_filename = osp.join(base_dir, 'test_split.txt') with open(split_filename, 'r') as f: seqs = [l.rstrip() for l in f if not l.startswith('#')] print("Extract data from {}".format(seqs)) ps = {} ts = {} vo_stats = {} self.imgs = [] self.masks = [] self.mask_sampling = self.config.mask_sampling self.uniform_sampling = self.config.uniform_sampling #if 'mask_image' in self.config: # self.mask_image = self.config.mask_image #else: # self.mask_image = False self.mask_image = False if self.mask_sampling: muimg = read_grayscale_image(osp.join(base_dir, self.config.mu_mask_name)) self.muimg = torch.tensor(muimg.transpose(2, 0, 1)).type(torch.FloatTensor) if self.uniform_sampling: self.muimg.fill_(self.muimg.mean()) self.sigmaimg = self.muimg * (1 - self.muimg) if not self.train: self.prob_mask = torch.FloatTensor(norm.cdf((self.config.sampling_threshold - self.muimg) / self.sigmaimg)) for seq in seqs: seq_dir = osp.join(base_dir, seq) seq_data_dir = osp.join(data_dir, seq) # read the image timestamps ts_filename = osp.join(seq_dir, 'stereo.timestamps') with open(ts_filename, 'r') as f: ts[seq] = [int(l.rstrip().split(' ')[0]) for l in f] if real: # poses from integration of VOs if vo_lib == 'stereo': vo_filename = osp.join(seq_dir, 'vo', 'vo.csv') p = np.asarray(interpolate_vo_poses(vo_filename, ts[seq], ts[seq][0])) elif vo_lib == 'gps': vo_filename = osp.join(seq_dir, 'gps', 'gps_ins.csv') p = np.asarray(interpolate_ins_poses(vo_filename, ts[seq], ts[seq][0])) else: raise NotImplementedError vo_stats_filename = osp.join(seq_data_dir, '{:s}_vo_stats.pkl'. format(vo_lib)) with open(vo_stats_filename, 'r') as f: vo_stats[seq] = pickle.load(f) ps[seq] = np.reshape(p[:, :3, :], (len(p), -1)) else: # GT poses pose_filename = osp.join(seq_dir, 'gps', 'ins.csv') p = np.asarray(interpolate_ins_poses(pose_filename, ts[seq], ts[seq][0])) ps[seq] = np.reshape(p[:, :3, :], (len(p), -1)) vo_stats[seq] = {'R': np.eye(3), 't': np.zeros(3), 's': 1} self.imgs.extend([osp.join(seq_dir, 'stereo', 'centre', '{:d}.png'. format(t)) for t in ts[seq]]) if self.mask_image: self.masks.extend([osp.join(seq_dir, 'mask', '{:d}.png'. format(t)) for t in ts[seq]]) # read / save pose normalization information poses = np.empty((0, 12)) for p in ps.values(): poses = np.vstack((poses, p)) pose_stats_filename = osp.join(data_dir, 'pose_stats.txt') if train and not real: mean_t = np.mean(poses[:, [3, 7, 11]], axis=0) std_t = np.std(poses[:, [3, 7, 11]], axis=0) np.savetxt(pose_stats_filename, np.vstack((mean_t, std_t)), fmt='%8.7f') else: mean_t, std_t = np.loadtxt(pose_stats_filename) # convert the pose to translation + log quaternion, align, normalize self.poses = np.empty((0, 6)) for seq in seqs: pss = process_poses(poses_in=ps[seq], mean_t=mean_t, std_t=std_t, align_R=vo_stats[seq]['R'], align_t=vo_stats[seq]['t'], align_s=vo_stats[seq]['s']) self.poses = np.vstack((self.poses, pss)) self.gt_idx = np.asarray(range(len(self.poses))) # camera model and image loader camera_model = CameraModel(osp.join('..', 'data', 'robotcar_camera_models'), osp.join('stereo', 'centre')) self.im_loader = partial(load_image, model=camera_model)