def filter_seq3(seq_list: FrameSeqData, base_dir): for seq in seq_list.frames: pre_frame = seq[0] center_frame = seq[1] next_frame = seq[2] pre_Tcw = seq_list.get_Tcw(pre_frame) center_Tcw = seq_list.get_Tcw(center_frame) next_Tcw = seq_list.get_Tcw(next_frame) K_mat = seq_list.get_K_mat(center_frame) # Read Image pre_img_name = seq_list.get_image_name(pre_frame) center_img_name = seq_list.get_image_name(center_frame) next_img_name = seq_list.get_image_name(pre_frame) pre_img = cv2.imread(os.path.join(base_dir, pre_img_name)).astype( np.float32) / 255.0 center_img = cv2.imread(os.path.join( base_dir, center_img_name)).astype(np.float32) / 255.0 next_img = cv2.imread(os.path.join(base_dir, next_img_name)).astype( np.float32) / 255.0 # Read depth pre_depth_name = seq_list.get_depth_name(pre_frame) center_depth_name = seq_list.get_depth_name(center_frame) next_depth_name = seq_list.get_depth_name(next_frame) pre_depth = read_sun3d_depth(pre_depth_name) center_depth = read_sun3d_depth(center_depth_name) next_depth = read_sun3d_depth(next_depth_name)
def __getitem__(self, idx): frames = self.seq_list[idx].frames C, H, W = self.output_dim rand_flip_flag = np.random.randint(2) if self.random_flip else 0 if rand_flip_flag == 0: # sequence order not changed pass else: # sequence order reversed frames = frames[::-1] # Read frames img_tensors = [] depth_tensors = [] K_tensors = [] Tcw_tensors = [] for frame in frames: K = K_from_frame(frame) Tcw = np.asarray(frame['extrinsic_Tcw'], dtype=np.float32).reshape((3, 4)) img_file_name = frame['file_name'] depth_file_name = frame['depth_file_name'] # Load image img = cv2.imread(os.path.join(self.base_dir, img_file_name)) ori_H, ori_W, _ = img.shape img = cv2.cvtColor(cv2.resize(img, dsize=(W, H)), cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 # Load the depth map depth = read_sun3d_depth(os.path.join(self.base_dir, depth_file_name)) depth = cv2.resize(depth, dsize=(W, H), interpolation=cv2.INTER_NEAREST) depth[depth < 1e-5] = 1e-5 # convert to torch.tensor img_tensor = torch.from_numpy(img.transpose((2, 0, 1))) # (C, H, W) if self.transform_func: img_tensor = self.transform_func(img_tensor) depth_tensor = torch.from_numpy(depth).view(1, H, W) # (1, H, W) img_tensors.append(img_tensor) depth_tensors.append(depth_tensor) K[0, 0] *= W / ori_W K[0, 2] *= W / ori_W K[1, 1] *= H / ori_H K[1, 2] *= H / ori_H K_tensor = torch.from_numpy(K) # (3, 3) K_tensors.append(K_tensor) Tcw_tensor = torch.from_numpy(Tcw) # (3, 4) Tcw_tensors.append(Tcw_tensor) img_tensors = torch.stack(img_tensors, dim=0) # (frame_num, C, H, W) depth_tensors = torch.stack(depth_tensors, dim=0) # (frame_num, 1, H, W) K_tensors = torch.stack(K_tensors, dim=0) # (frame_num, 3, 3) Tcw_tensors = torch.stack(Tcw_tensors, dim=0) # (frame_num, 3, 4) return {'img': img_tensors, 'depth': depth_tensors, 'Tcw': Tcw_tensors, 'K': K_tensors}
for seq_name in tqdm(seq_name_list[-1:], desc='generating lmdbs for sequences'): seq_file_path = os.path.join(dataset_dir, seq_name, 'seq.json') if not os.path.exists(seq_file_path): continue seq = FrameSeqData(seq_file_path) seq_lmdb = LMDBSeqModel( os.path.join(dataset_dir, seq_name, 'rgbd.lmdb')) for frame_idx in range(0, 80, 20): frame = seq.frames[frame_idx] img_path = os.path.join(dataset_dir, seq.get_image_name(frame)) img2 = cv2.imread(img_path) depth_path = os.path.join(dataset_dir, seq.get_depth_name(frame)) depth = read_sun3d_depth(depth_path) depth = cv2.resize(depth, (320, 240), interpolation=cv2.INTER_NEAREST) img_key = seq.get_image_name(frame) depth_key = seq.get_depth_name(frame) img = seq_lmdb.read_img(img_key) depth2 = seq_lmdb.read_depth(depth_key) plt.imshow(depth, cmap='jet') plt.show() plt.imshow(depth2, cmap='jet') plt.show() seq_lmdb.close_session()
def __getitem__(self, item): C, H, W = self.output_dim triple = self.triple_list[item] anchor_frame = triple['anchor'] pos_frames = triple['positive'] neg_frames = triple['negative'] sel_idces = np.random.choice(len(pos_frames), self.sel_sample_num, replace=False) pos_frames = [pos_frames[i] for i in sel_idces] neg_frames = [neg_frames[i] for i in sel_idces] data_dict = {} pos_dict = {'img': [], 'depth': [], 'Tcw': [], 'K': [], 'ori_img': []} neg_dict = {'img': [], 'depth': [], 'Tcw': [], 'K': [], 'ori_img': []} for i, frame in enumerate([anchor_frame] + pos_frames + neg_frames): K = K_from_frame(frame) Tcw = np.asarray(frame['extrinsic_Tcw'], dtype=np.float32).reshape((3, 4)) img_file_name = frame['file_name'] depth_file_name = frame['depth_file_name'] # Load image img = cv2.imread(os.path.join(self.base_dir, img_file_name)) if img is None: raise Exception('Can not load image:%s' % img_file_name) ori_H, ori_W, _ = img.shape img = cv2.cvtColor(cv2.resize(img, dsize=(W, H)), cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 # Load the depth map depth = read_sun3d_depth(os.path.join(self.base_dir, depth_file_name)) depth = cv2.resize(depth, dsize=(W, H), interpolation=cv2.INTER_NEAREST) depth[depth < 1e-5] = 1e-5 # convert to torch.tensor ori_img_tensor = torch.from_numpy(img.transpose((2, 0, 1))) # (C, H, W) img_tensor = ori_img_tensor.clone() if self.transform_func: img_tensor = self.transform_func(img_tensor) depth_tensor = torch.from_numpy(depth).view(1, H, W) # (1, H, W) K[0, 0] *= W / ori_W K[0, 2] *= W / ori_W K[1, 1] *= H / ori_H K[1, 2] *= H / ori_H K_tensor = torch.from_numpy(K) # (3, 3) Tcw_tensor = torch.from_numpy(Tcw) # (3, 4) if i == 0: data_dict['anchor_img'] = img_tensor data_dict['anchor_depth'] = depth_tensor data_dict['anchor_Tcw'] = Tcw_tensor data_dict['anchor_K'] = K_tensor data_dict['anchor_ori_img'] = ori_img_tensor elif i < len(pos_frames) + 1: pos_dict['img'].append(img_tensor) pos_dict['depth'].append(depth_tensor) pos_dict['Tcw'].append(Tcw_tensor) pos_dict['K'].append(K_tensor) pos_dict['ori_img'].append(ori_img_tensor) else: neg_dict['img'].append(img_tensor) neg_dict['depth'].append(depth_tensor) neg_dict['Tcw'].append(Tcw_tensor) neg_dict['K'].append(K_tensor) neg_dict['ori_img'].append(ori_img_tensor) pos_dict['img'] = torch.stack(pos_dict['img'], dim=0) # (pos_num, C, H, W) pos_dict['depth'] = torch.stack(pos_dict['depth'], dim=0) # (pos_num, 1, H, W) pos_dict['Tcw'] = torch.stack(pos_dict['Tcw'], dim=0) # (pos_num, 3, 4) pos_dict['K'] = torch.stack(pos_dict['K'], dim=0) # (pos_num, 3, 3) pos_dict['ori_img'] = torch.stack(pos_dict['ori_img'], dim=0) # (pos_num, C, H, W) data_dict['pos_img'] = pos_dict['img'] data_dict['pos_depth'] = pos_dict['depth'] data_dict['pos_Tcw'] = pos_dict['Tcw'] data_dict['pos_K'] = pos_dict['K'] data_dict['pos_ori_img'] = pos_dict['ori_img'] neg_dict['img'] = torch.stack(neg_dict['img'], dim=0) # (neg_num, C, H, W) neg_dict['depth'] = torch.stack(neg_dict['depth'], dim=0) # (neg_num, 1, H, W) neg_dict['Tcw'] = torch.stack(neg_dict['Tcw'], dim=0) # (neg_num, 3, 4) neg_dict['K'] = torch.stack(neg_dict['K'], dim=0) # (neg_num, 3, 3) neg_dict['ori_img'] = torch.stack(neg_dict['ori_img'], dim=0) # (pos_num, C, H, W) data_dict['neg_img'] = neg_dict['img'] data_dict['neg_depth'] = neg_dict['depth'] data_dict['neg_Tcw'] = neg_dict['Tcw'] data_dict['neg_K'] = neg_dict['K'] data_dict['neg_ori_img'] = neg_dict['ori_img'] return data_dict
def rand_sel_subseq_sun3d(scene_frames, max_subseq_num, frames_per_subseq_num=10, dataset_base_dir=None, trans_thres=0.15, rot_thres=15, frames_range=(0, 0.7), overlap_thres=0.6, interval_skip_frames=1): """ Random select sub set of sequences from scene :param scene_frames: scene frames to extract subset :param trans_thres_range: translation threshold, based on the center of different frames :param max_subseq_num: maximum number of sub sequences :param frames_per_subseq_num: for each sub sequences, how many frames in the subset :param frames_range: range of start and end within original scene sequences, from (0, 1) :param interval_skip_frames: skip interval in original scene frames, used in iteration :return: list of selected sub sequences """ assert dataset_base_dir is not None n_frames = len(scene_frames) if interval_skip_frames < 1: interval_skip_frames = 2 # Simple selection based on trans threshold if frames_per_subseq_num * interval_skip_frames > n_frames: raise Exception('Not enough frames to be selected') rand_start_frame = np.random.randint(int(frames_range[0] * len(scene_frames)), int(frames_range[1] * len(scene_frames)), size=max_subseq_num) sub_seq_list = [] dim = scene_frames.get_frame_dim(scene_frames.frames[0]) K = scene_frames.get_K_mat(scene_frames.frames[0]) pre_cache_x2d = x_2d_coords(dim[0], dim[1]) for start_frame_idx in rand_start_frame: # print('F:', start_frame_idx) # Push start keyframe into frames sub_frames = FrameSeqData() pre_frame = scene_frames.frames[start_frame_idx] sub_frames.frames.append(copy.deepcopy(pre_frame)) # Iterate the remaining keyframes into subset cur_frame_idx = start_frame_idx no_found_flag = False while cur_frame_idx < n_frames: pre_Tcw = sub_frames.get_Tcw(pre_frame) pre_depth_path = sub_frames.get_depth_name(pre_frame) pre_depth = read_sun3d_depth(os.path.join(dataset_base_dir, pre_depth_path)) # [Deprecated] # pre_img_name = sub_frames.get_image_name(pre_frame) # pre_img = cv2.imread(os.path.join(dataset_base_dir, pre_img_name)).astype(np.float32) / 255.0 # pre_center = camera_center_from_Tcw(pre_Tcw[:3, :3], pre_Tcw[:3, 3]) pre_search_frame = scene_frames.frames[cur_frame_idx + interval_skip_frames - 1] for search_idx in range(cur_frame_idx + interval_skip_frames, n_frames, 1): cur_frame = scene_frames.frames[search_idx] cur_Tcw = sub_frames.get_Tcw(cur_frame) # [Deprecated] # cur_center = camera_center_from_Tcw(cur_Tcw[:3, :3], cur_Tcw[:3, 3]) # cur_img_name = sub_frames.get_image_name(cur_frame) # cur_img = cv2.imread(os.path.join(dataset_base_dir, cur_img_name)).astype(np.float32) / 255.0 rel_angle = rel_rot_angle(pre_Tcw, cur_Tcw) rel_dist = rel_distance(pre_Tcw, cur_Tcw) overlap = photometric_overlap(pre_depth, K, Ta=pre_Tcw, Tb=cur_Tcw, pre_cache_x2d=pre_cache_x2d) # [Deprecated] # overlap_map, x_2d = cam_opt.gen_overlap_mask_img(pre_depth, K, Ta=pre_Tcw, Tb=cur_Tcw, pre_cache_x2d=pre_cache_x2d) # rel_T = relateive_pose(pre_Tcw[:3, :3], pre_Tcw[:3, 3], cur_Tcw[:3, :3], cur_Tcw[:3, 3]) # wrap_img, _ = cam_opt.wrapping(pre_img, cur_img, pre_depth, K, rel_T[:3, :3], rel_T[:3, 3]) # img_list = [ # {'img': pre_img}, # {'img': cur_img}, # {'img': wrap_img}, # {'img': overlap_map}, # {'img': x_2d[:, :, 0], 'cmap':'gray'}, # {'img': x_2d[:, :, 1], 'cmap': 'gray'} # ] # show_multiple_img(img_list, num_cols=4) # plt.show() if rel_dist > trans_thres or overlap < overlap_thres or rel_angle > rot_thres: # Select the new keyframe that larger than the trans threshold and add the previous frame as keyframe sub_frames.frames.append(copy.deepcopy(pre_search_frame)) pre_frame = pre_search_frame cur_frame_idx = search_idx + 1 break else: pre_search_frame = cur_frame if search_idx == n_frames - 1: no_found_flag = True if no_found_flag: break if len(sub_frames) > frames_per_subseq_num - 1: break # If the subset is less than setting, ignore if len(sub_frames) >= frames_per_subseq_num: sub_seq_list.append(sub_frames) print('sel: %d', len(sub_seq_list)) return sub_seq_list
def load_frame_2_tensors(self, frame, out_frame_dim, fill_depth_holes=False): C, H, W = out_frame_dim K = K_from_frame(frame) Tcw = np.asarray(frame['extrinsic_Tcw'], dtype=np.float32).reshape( (3, 4)) Rcw, tcw = Tcw[:3, :3], Tcw[:3, 3] img_file_name = frame['file_name'] depth_file_name = frame['depth_file_name'] # Load image and depth img = cv2.imread(os.path.join(self.base_dir, img_file_name)) depth = read_sun3d_depth(os.path.join(self.base_dir, depth_file_name)) ori_H, ori_W, _ = img.shape # Post-process image and depth (fill the holes with cross bilateral filter) resize_ratio = max(H / ori_H, W / ori_W) img = cv2.resize(img, dsize=(int(resize_ratio * ori_W), int(resize_ratio * ori_H))) depth = cv2.resize(depth, dsize=(int(resize_ratio * ori_W), int(resize_ratio * ori_H)), interpolation=cv2.INTER_NEAREST) if fill_depth_holes: depth = fill_depth_cross_bf(img, depth) depth[depth < 1e-5] = 1e-5 img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0 # camera intrinsic parameters: K[0, 0] *= resize_ratio K[0, 2] = (resize_ratio * ori_W) / 2 K[1, 1] *= resize_ratio K[1, 2] = (resize_ratio * ori_H) / 2 new_K = K.copy() new_K[0, 2] = W / 2 new_K[1, 2] = H / 2 # crop and resize with new K img = crop_by_intrinsic(img, K, new_K) depth = crop_by_intrinsic(depth, K, new_K, interp_method='nearest') # camera motion representation: (center, rotation_center2world) c = camera_center_from_Tcw(Rcw, tcw) Rwc = np.eye(4) Rwc[:3, :3] = Rcw.T q = quaternion_from_matrix(Rwc) log_q = log_quat(q) pose_vector = np.concatenate((c, log_q)).astype(np.float32) # convert to torch.tensor ori_img_tensor = torch.from_numpy(img.transpose( (2, 0, 1))) # (C, H, W) img_tensor = ori_img_tensor.clone() if self.transform_func: img_tensor = self.transform_func(img_tensor) depth_tensor = torch.from_numpy(depth).view(1, H, W) # (1, H, W) pose_vector = torch.from_numpy(pose_vector) # (1, 3) Tcw_tensor = torch.from_numpy(Tcw) # (3, 4) K_tensor = torch.from_numpy(new_K) # (3, 3) return pose_vector, img_tensor, depth_tensor, K_tensor, Tcw_tensor, ori_img_tensor
def sel_pairs_with_overlap_range_sun3d(scene_frames, scene_lmdb: LMDBSeqModel, max_subseq_num, frames_per_subseq_num=10, dataset_base_dir=None, trans_thres=0.15, rot_thres=15, frames_range=(0, 0.7), overlap_thres=0.5, scene_dist_thres=(0.0, 1.0), interval_skip_frames=1, train_anchor_num=100, test_anchor_num=100): """ Random select sub set of sequences from scene :param scene_frames: scene frames to extract subset :param trans_thres_range: translation threshold, based on the center of different frames :param max_subseq_num: maximum number of sub sequences :param frames_per_subseq_num: for each sub sequences, how many frames in the subset :param frames_range: range of start and end within original scene sequences, from (0, 1) :param interval_skip_frames: skip interval in original scene frames, used in iteration :return: list of selected sub sequences """ use_lmdb_cache = True if scene_lmdb is not None else False assert dataset_base_dir is not None n_frames = len(scene_frames) if interval_skip_frames < 1: interval_skip_frames = 2 max_subseq_num = int(n_frames * max_subseq_num) # Simple selection based on trans threshold # if frames_per_subseq_num * interval_skip_frames > n_frames: # # raise Exception('Not enough frames to be selected') # return [] rand_start_frame = np.random.randint( int(frames_range[0] * len(scene_frames)), int(frames_range[1] * len(scene_frames)), size=max_subseq_num) sub_seq_list = [] dim = scene_frames.get_frame_dim(scene_frames.frames[0]) dim = list(dim) dim[0] = int(dim[0] // 4) dim[1] = int(dim[1] // 4) K = scene_frames.get_K_mat(scene_frames.frames[0]) K /= 4.0 K[2, 2] = 1.0 pre_cache_x2d = cam_opt.x_2d_coords(dim[0], dim[1]) for start_frame_idx in rand_start_frame: # print('F:', start_frame_idx) # Push start keyframe into frames sub_frames = FrameSeqData() pre_frame = scene_frames.frames[start_frame_idx] sub_frames.frames.append(copy.deepcopy(pre_frame)) sub_frames_idx = [start_frame_idx] # Iterate the remaining keyframes into subset cur_frame_idx = start_frame_idx no_found_flag = False while cur_frame_idx + interval_skip_frames < n_frames: pre_Tcw = sub_frames.get_Tcw(pre_frame) pre_depth_path = sub_frames.get_depth_name(pre_frame) # pre_depth = read_sun3d_depth(os.path.join(dataset_base_dir, pre_depth_path)) pre_depth = scene_lmdb.read_depth(pre_depth_path) if use_lmdb_cache else \ read_sun3d_depth(os.path.join(dataset_base_dir, pre_depth_path)) pre_depth = cv2.resize(pre_depth, (dim[1], dim[0]), interpolation=cv2.INTER_NEAREST) # H, W = pre_depth.shape # if float(np.sum(pre_depth <= 1e-5)) / float(H*W) > 0.2: # continue # pre_depth = torch.from_numpy(pre_depth).cuda() # pre_Tcw_gpu = torch.from_numpy(pre_Tcw).cuda() # pre_img_name = sub_frames.get_image_name(pre_frame) # pre_img = cv2.imread(os.path.join(dataset_base_dir, pre_img_name)) # pre_depth = fill_depth_cross_bf(pre_img, pre_depth) # [Deprecated] # import cv2 # pre_img_name = sub_frames.get_image_name(pre_frame) # pre_img = cv2.imread(os.path.join(dataset_base_dir, pre_img_name)).astype(np.float32) / 255.0 # pre_center = cam_opt.camera_center_from_Tcw(pre_Tcw[:3, :3], pre_Tcw[:3, 3]) pre_search_frame = scene_frames.frames[cur_frame_idx + interval_skip_frames - 1] for search_idx in range(cur_frame_idx + interval_skip_frames, n_frames, 1): cur_frame = scene_frames.frames[search_idx] cur_Tcw = sub_frames.get_Tcw(cur_frame) # cur_Tcw_gpu = torch.from_numpy(cur_Tcw).cuda() # cur_depth_path = sub_frames.get_depth_name(cur_frame) # cur_depth = read_sun3d_depth(os.path.join(dataset_base_dir, cur_depth_path)) # H, W = cur_depth.shape # [Deprecated] # cur_center = cam_opt.camera_center_from_Tcw(cur_Tcw[:3, :3], cur_Tcw[:3, 3]) # cur_img_name = sub_frames.get_image_name(cur_frame) # cur_img = cv2.imread(os.path.join(dataset_base_dir, cur_img_name)).astype(np.float32) / 255.0 rel_angle = rel_rot_angle(pre_Tcw, cur_Tcw) rel_dist = rel_distance(pre_Tcw, cur_Tcw) overlap = cam_opt.photometric_overlap( pre_depth, K, Ta=pre_Tcw, Tb=cur_Tcw, pre_cache_x2d=pre_cache_x2d) # mean scene coordinate dist # pre_Twc = cam_opt.camera_pose_inv(R=pre_Tcw[:3, :3], t=pre_Tcw[:3, 3]) # d_a = pre_depth.reshape((H * W, 1)) # x_a_2d = pre_cache_x2d.reshape((H * W, 2)) # X_3d = cam_opt.pi_inv(K, x_a_2d, d_a) # pre_X_3d = cam_opt.transpose(pre_Twc[:3, :3], pre_Twc[:3, 3], X_3d).reshape((H, W, 3)) # pre_mean = np.empty((3,), dtype=np.float) # pre_mean[0] = np.mean(pre_X_3d[pre_depth > 1e-5, 0]) # pre_mean[1] = np.mean(pre_X_3d[pre_depth > 1e-5, 1]) # pre_mean[2] = np.mean(pre_X_3d[pre_depth > 1e-5, 2]) # # cur_Twc = cam_opt.camera_pose_inv(R=cur_Tcw[:3, :3], t=cur_Tcw[:3, 3]) # d_a = cur_depth.reshape((H * W, 1)) # x_a_2d = pre_cache_x2d.reshape((H * W, 2)) # X_3d = cam_opt.pi_inv(K, x_a_2d, d_a) # cur_X_3d = cam_opt.transpose(cur_Twc[:3, :3], cur_Twc[:3, 3], X_3d).reshape((H, W, 3)) # cur_mean = np.empty((3,), dtype=np.float) # cur_mean[0] = np.mean(cur_X_3d[cur_depth > 1e-5, 0]) # cur_mean[1] = np.mean(cur_X_3d[cur_depth > 1e-5, 1]) # cur_mean[2] = np.mean(cur_X_3d[cur_depth > 1e-5, 2]) # # scene_dist = np.linalg.norm(pre_mean - cur_mean) # def keyPressEvent(obj, event): # key = obj.GetKeySym() # if key == 'Left': # tmp_img = pre_img # X_3d = pre_X_3d.reshape((H * W, 3)) # vis.set_point_cloud(X_3d, tmp_img.reshape((H * W, 3))) # # vis.add_frame_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3]) # # if key == 'Right': # tmp_img = cur_img # X_3d = cur_X_3d.reshape((H * W, 3)) # vis.set_point_cloud(X_3d, tmp_img.reshape((H * W, 3))) # # vis.add_frame_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3]) # # if key == 'Up': # vis.set_point_cloud(pre_mean.reshape((1, 3)), pt_size=10) # # if key == 'Down': # vis.set_point_cloud(cur_mean.reshape((1, 3)), pt_size=10) # return # vis = Visualizer(1280, 720) # vis.bind_keyboard_event(keyPressEvent) # vis.show() # vis.close() # [Deprecated] # overlap_map, x_2d = cam_opt.gen_overlap_mask_img(pre_depth, K, Ta=pre_Tcw, Tb=cur_Tcw, pre_cache_x2d=pre_cache_x2d) # rel_T = relateive_pose(pre_Tcw[:3, :3], pre_Tcw[:3, 3], cur_Tcw[:3, :3], cur_Tcw[:3, 3]) # wrap_img, _ = cam_opt.wrapping(pre_img, cur_img, pre_depth, K, rel_T[:3, :3], rel_T[:3, 3]) # img_list = [ # {'img': pre_img}, # {'img': cur_img}, # {'img': wrap_img}, # {'img': overlap_map}, # {'img': x_2d[:, :, 0], 'cmap':'gray'}, # {'img': x_2d[:, :, 1], 'cmap': 'gray'} # ] # show_multiple_img(img_list, num_cols=4) # plt.show() # if rel_dist > trans_thres: # print('exceed trans_thres') # elif overlap < overlap_thres: # print('exceed overlap_thres') # elif rel_angle > rot_thres: # print('exceed rot_thres') # if overlap_thres[0] <= overlap <= overlap_thres[1] and \ # rot_thres[0] <= rel_angle <= rot_thres[1]: #and \ # # scene_dist_thres[0] <= scene_dist <= scene_dist_thres[1]: # sub_frames.frames.append(copy.deepcopy(cur_frame)) if overlap < overlap_thres or rel_dist > trans_thres: #or scene_dist > scene_dist_thres[1]: # Select the new keyframe that larger than the trans threshold and add the previous frame as keyframe sub_frames.frames.append(copy.deepcopy(pre_search_frame)) pre_frame = pre_search_frame cur_frame_idx = search_idx + 1 sub_frames_idx.append(search_idx - 1) break else: pre_search_frame = cur_frame if search_idx + 1 >= n_frames: no_found_flag = True if no_found_flag: break if len(sub_frames) > frames_per_subseq_num - 1: break # If the subset is less than setting, ignore if len(sub_frames) >= frames_per_subseq_num: min_idx = min(sub_frames_idx) max_idx = max(sub_frames_idx) print(min_idx, max_idx, n_frames) # factor = (max_idx - min_idx) // 3 # # min_Tcw = sub_frames.get_Tcw(sub_frames.frames[0]) # max_Tcw = sub_frames.get_Tcw(sub_frames.frames[-1]) potential_anchor_idces = [] # for i in range(min_idx + factor, max_idx - factor, 1): # cur_frame = scene_frames.frames[i] # cur_Tcw = scene_frames.get_Tcw(cur_frame) # cur_depth_path = sub_frames.get_depth_name(cur_frame) # cur_depth = scene_lmdb.read_depth(cur_depth_path) # cur_depth = cv2.resize(cur_depth, (dim[1], dim[0]), interpolation=cv2.INTER_NEAREST) # H, W = cur_depth.shape # if float(np.sum(cur_depth <= 1e-5)) / float(H*W) > 0.2: # continue # min_overlap = cam_opt.photometric_overlap(cur_depth, K, Ta=cur_Tcw, Tb=min_Tcw, # pre_cache_x2d=pre_cache_x2d) # max_overlap = cam_opt.photometric_overlap(cur_depth, K, Ta=cur_Tcw, Tb=max_Tcw, # pre_cache_x2d=pre_cache_x2d) # min_rel_angle = rel_rot_angle(cur_Tcw, min_Tcw) # max_rel_angle = rel_rot_angle(cur_Tcw, max_Tcw) # if min_overlap < 0.65 and max_overlap < 0.65 and \ # ((0.5 < min_overlap and min_rel_angle < 20.0) or \ # (0.5 < max_overlap and max_rel_angle < 20.0)): # potential_anchor_idces.append(i) for i in range(min_idx, max_idx): if i not in sub_frames_idx: potential_anchor_idces.append(i) if len(potential_anchor_idces ) >= train_anchor_num + test_anchor_num: anchor_idces = np.random.choice( range(len(potential_anchor_idces)), size=train_anchor_num + test_anchor_num, replace=False) train_anchor_frames = [] for i in anchor_idces[:train_anchor_num]: train_anchor_frames.append( scene_frames.frames[potential_anchor_idces[i]]) test_anchor_frames = [] for i in anchor_idces[train_anchor_num:]: test_anchor_frames.append( scene_frames.frames[potential_anchor_idces[i]]) sub_seq_list.append({ 'sub_frames': sub_frames, 'train_anchor_frames': train_anchor_frames, 'test_anchor_frames': test_anchor_frames }) print('selected', len(potential_anchor_idces), len(sub_frames)) print('sel: %d', len(sub_seq_list)) return sub_seq_list
def sel_triple_sun3d(base_dir, scene_frames, max_triple_num, num_sample_per_triple, trans_thres, overlap_thres): """ Select triples (anchor, positive, negative) from a sun3d sequence :param base_dir: dataset base directory :param scene_frames: scene frames to extract triples :param max_triple_num: maximum number of triples :param num_sample_per_triple: number of positive/negative samples per triple :param trans_thres: translation threshold for positive samples, based on the center of different frames :param overlap_thres: overlap threshold for positive samples, (low, high) :return: [{'anchor': frame_dict, 'positive': FrameSeqData, 'negative': FrameSeqData}, {...}, ...] """ dim = scene_frames.get_frame_dim(scene_frames.frames[0]) K = scene_frames.get_K_mat(scene_frames.frames[0]) pre_cache_x2d = cam_opt.x_2d_coords(dim[0], dim[1]) camera_centers = np.empty((len(scene_frames), 3), dtype=np.float32) for i, frame in enumerate(scene_frames.frames): Tcw = scene_frames.get_Tcw(frame) center = cam_opt.camera_center_from_Tcw(Tcw[:3, :3], Tcw[:3, 3]) camera_centers[i, :] = center kdtree = KDTree(camera_centers) triple_list = [] anchor_idces = np.random.choice(len(scene_frames), max_triple_num, replace=False) for anchor_idx in anchor_idces: anchor_frame = scene_frames.frames[anchor_idx] anchor_Tcw = scene_frames.get_Tcw(anchor_frame) anchor_depth_path = scene_frames.get_depth_name(anchor_frame) anchor_depth = read_sun3d_depth( os.path.join(base_dir, anchor_depth_path)) anchor_depth[anchor_depth < 1e-5] = 1e-5 potential_pos_idces = kdtree.query_ball_point( camera_centers[anchor_idx], trans_thres) pos_idces = [] for potential_pos_idx in potential_pos_idces: potential_pos_frame = scene_frames.frames[potential_pos_idx] potential_pos_Tcw = scene_frames.get_Tcw(potential_pos_frame) overlap = cam_opt.photometric_overlap(anchor_depth, K, Ta=anchor_Tcw, Tb=potential_pos_Tcw, pre_cache_x2d=pre_cache_x2d) if overlap_thres[0] < overlap < overlap_thres[1]: pos_idces.append(potential_pos_idx) if len(pos_idces) < num_sample_per_triple: continue else: sel_pos_idces = np.random.choice(pos_idces, num_sample_per_triple, replace=False) neg_idces = list(set(range(len(scene_frames))) - set(pos_idces)) sel_neg_idces = np.random.choice(neg_idces, num_sample_per_triple, replace=False) triple_list.append({ 'anchor': copy.deepcopy(anchor_frame), 'positive': [ copy.deepcopy(scene_frames.frames[idx]) for idx in sorted(sel_pos_idces) ], 'negative': [ copy.deepcopy(scene_frames.frames[idx]) for idx in sorted(sel_neg_idces) ], }) # print(camera_centers[anchor_idx]) # print(camera_centers[pos_idces]) # print(camera_centers[neg_idces]) # print('----------------------------------------------------------') return triple_list
def keyPressEvent(obj, event): global frame_idx key = obj.GetKeySym() if key == 'Right': cur_frame = frames.frames[frame_idx] cur_Tcw = cur_frame['extrinsic_Tcw'] cur_name = cur_frame['file_name'] cur_depth_name = cur_frame['depth_file_name'] next_frame = frames.frames[frame_idx + 1] next_Tcw = next_frame['extrinsic_Tcw'] next_name = next_frame['file_name'] K = K_from_frame(cur_frame) # Read image cur_img = cv2.imread(os.path.join(base_dir, cur_name)).astype( np.float32) / 255.0 next_img = cv2.imread(os.path.join(base_dir, next_name)).astype( np.float32) / 255.0 cur_depth = read_sun3d_depth(os.path.join(base_dir, cur_depth_name)) h, w, c = cur_img.shape rel_T = cam_opt.relateive_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3], next_Tcw[:3, :3], next_Tcw[:3, 3]) X_3d = cam_opt.pi_inv(K, x_2d.reshape((h * w, 2)), cur_depth.reshape((h * w, 1))) cur_Twc = cam_opt.camera_pose_inv(cur_Tcw[:3, :3], cur_Tcw[:3, 3]) X_3d = cam_opt.transpose(cur_Twc[:3, :3], cur_Twc[:3, 3], X_3d) vis.set_point_cloud(X_3d, cur_img.reshape((h * w, 3))) vis.add_frame_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3]) frame_idx += 20 if key == 'Left': cur_frame = frames.frames[frame_idx] cur_Tcw = cur_frame['extrinsic_Tcw'] cur_name = cur_frame['file_name'] cur_depth_name = cur_frame['depth_file_name'] next_frame = frames.frames[frame_idx + 1] next_Tcw = next_frame['extrinsic_Tcw'] next_name = next_frame['file_name'] K = K_from_frame(cur_frame) # Read image cur_img = cv2.imread(os.path.join(base_dir, cur_name)).astype( np.float32) / 255.0 next_img = cv2.imread(os.path.join(base_dir, next_name)).astype( np.float32) / 255.0 cur_depth = read_sun3d_depth(os.path.join(base_dir, cur_depth_name)) h, w, c = cur_img.shape rel_T = cam_opt.relateive_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3], next_Tcw[:3, :3], next_Tcw[:3, 3]) X_3d = cam_opt.pi_inv(K, x_2d.reshape((h * w, 2)), cur_depth.reshape((h * w, 1))) cur_Twc = cam_opt.camera_pose_inv(cur_Tcw[:3, :3], cur_Tcw[:3, 3]) X_3d = cam_opt.transpose(cur_Twc[:3, :3], cur_Twc[:3, 3], X_3d) vis.set_point_cloud(X_3d, cur_img.reshape((h * w, 3))) vis.add_frame_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3]) frame_idx -= 20 return
cur_Tcw = cur_frame['extrinsic_Tcw'] cur_name = cur_frame['file_name'] cur_depth_name = cur_frame['depth_file_name'] next_frame = frames.frames[frame_idx + 5] next_Tcw = next_frame['extrinsic_Tcw'] next_name = next_frame['file_name'] K = K_from_frame(cur_frame) # Read image cur_img = cv2.imread(os.path.join(base_dir, cur_name)).astype( np.float32) / 255.0 next_img = cv2.imread(os.path.join(base_dir, next_name)).astype( np.float32) / 255.0 cur_depth = read_sun3d_depth(os.path.join(base_dir, cur_depth_name)) h, w, c = cur_img.shape rel_T = cam_opt.relateive_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3], next_Tcw[:3, :3], next_Tcw[:3, 3]) # Translation Cb = cam_opt.camera_center_from_Tcw(rel_T[:3, :3], rel_T[:3, 3]) baseline = np.linalg.norm(Cb) # View angle q = trans.quaternion_from_matrix(rel_T) R = trans.quaternion_matrix(q) rel_rad, rel_axis, _ = trans.rotation_from_matrix(R) rel_deg = np.rad2deg(rel_rad)