Esempio n. 1
0
def filter_seq3(seq_list: FrameSeqData, base_dir):

    for seq in seq_list.frames:

        pre_frame = seq[0]
        center_frame = seq[1]
        next_frame = seq[2]

        pre_Tcw = seq_list.get_Tcw(pre_frame)
        center_Tcw = seq_list.get_Tcw(center_frame)
        next_Tcw = seq_list.get_Tcw(next_frame)

        K_mat = seq_list.get_K_mat(center_frame)

        # Read Image
        pre_img_name = seq_list.get_image_name(pre_frame)
        center_img_name = seq_list.get_image_name(center_frame)
        next_img_name = seq_list.get_image_name(pre_frame)
        pre_img = cv2.imread(os.path.join(base_dir, pre_img_name)).astype(
            np.float32) / 255.0
        center_img = cv2.imread(os.path.join(
            base_dir, center_img_name)).astype(np.float32) / 255.0
        next_img = cv2.imread(os.path.join(base_dir, next_img_name)).astype(
            np.float32) / 255.0

        # Read depth
        pre_depth_name = seq_list.get_depth_name(pre_frame)
        center_depth_name = seq_list.get_depth_name(center_frame)
        next_depth_name = seq_list.get_depth_name(next_frame)
        pre_depth = read_sun3d_depth(pre_depth_name)
        center_depth = read_sun3d_depth(center_depth_name)
        next_depth = read_sun3d_depth(next_depth_name)
    def __getitem__(self, idx):
        frames = self.seq_list[idx].frames

        C, H, W = self.output_dim

        rand_flip_flag = np.random.randint(2) if self.random_flip else 0
        if rand_flip_flag == 0:
            # sequence order not changed
            pass
        else:
            # sequence order reversed
            frames = frames[::-1]

        # Read frames
        img_tensors = []
        depth_tensors = []
        K_tensors = []
        Tcw_tensors = []

        for frame in frames:
            K = K_from_frame(frame)
            Tcw = np.asarray(frame['extrinsic_Tcw'], dtype=np.float32).reshape((3, 4))
            img_file_name = frame['file_name']
            depth_file_name = frame['depth_file_name']

            # Load image
            img = cv2.imread(os.path.join(self.base_dir, img_file_name))
            ori_H, ori_W, _ = img.shape
            img = cv2.cvtColor(cv2.resize(img, dsize=(W, H)), cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

            # Load the depth map
            depth = read_sun3d_depth(os.path.join(self.base_dir, depth_file_name))
            depth = cv2.resize(depth, dsize=(W, H), interpolation=cv2.INTER_NEAREST)
            depth[depth < 1e-5] = 1e-5

            # convert to torch.tensor
            img_tensor = torch.from_numpy(img.transpose((2, 0, 1)))                         # (C, H, W)
            if self.transform_func:
                img_tensor = self.transform_func(img_tensor)
            depth_tensor = torch.from_numpy(depth).view(1, H, W)                            # (1, H, W)
            img_tensors.append(img_tensor)
            depth_tensors.append(depth_tensor)
            K[0, 0] *= W / ori_W
            K[0, 2] *= W / ori_W
            K[1, 1] *= H / ori_H
            K[1, 2] *= H / ori_H

            K_tensor = torch.from_numpy(K)                                                  # (3, 3)
            K_tensors.append(K_tensor)
            Tcw_tensor = torch.from_numpy(Tcw)                                              # (3, 4)
            Tcw_tensors.append(Tcw_tensor)

        img_tensors = torch.stack(img_tensors, dim=0)                                   # (frame_num, C, H, W)
        depth_tensors = torch.stack(depth_tensors, dim=0)                               # (frame_num, 1, H, W)
        K_tensors = torch.stack(K_tensors, dim=0)                                       # (frame_num, 3, 3)
        Tcw_tensors = torch.stack(Tcw_tensors, dim=0)                                   # (frame_num, 3, 4)

        return {'img': img_tensors, 'depth': depth_tensors, 'Tcw': Tcw_tensors, 'K': K_tensors}
Esempio n. 3
0
    for seq_name in tqdm(seq_name_list[-1:],
                         desc='generating lmdbs for sequences'):
        seq_file_path = os.path.join(dataset_dir, seq_name, 'seq.json')
        if not os.path.exists(seq_file_path):
            continue
        seq = FrameSeqData(seq_file_path)

        seq_lmdb = LMDBSeqModel(
            os.path.join(dataset_dir, seq_name, 'rgbd.lmdb'))
        for frame_idx in range(0, 80, 20):
            frame = seq.frames[frame_idx]
            img_path = os.path.join(dataset_dir, seq.get_image_name(frame))
            img2 = cv2.imread(img_path)
            depth_path = os.path.join(dataset_dir, seq.get_depth_name(frame))
            depth = read_sun3d_depth(depth_path)
            depth = cv2.resize(depth, (320, 240),
                               interpolation=cv2.INTER_NEAREST)

            img_key = seq.get_image_name(frame)
            depth_key = seq.get_depth_name(frame)

            img = seq_lmdb.read_img(img_key)
            depth2 = seq_lmdb.read_depth(depth_key)

            plt.imshow(depth, cmap='jet')
            plt.show()
            plt.imshow(depth2, cmap='jet')
            plt.show()

        seq_lmdb.close_session()
Esempio n. 4
0
    def __getitem__(self, item):

        C, H, W = self.output_dim

        triple = self.triple_list[item]
        anchor_frame = triple['anchor']
        pos_frames = triple['positive']
        neg_frames = triple['negative']
        sel_idces = np.random.choice(len(pos_frames), self.sel_sample_num, replace=False)
        pos_frames = [pos_frames[i] for i in sel_idces]
        neg_frames = [neg_frames[i] for i in sel_idces]

        data_dict = {}
        pos_dict = {'img': [], 'depth': [], 'Tcw': [], 'K': [], 'ori_img': []}
        neg_dict = {'img': [], 'depth': [], 'Tcw': [], 'K': [], 'ori_img': []}
        for i, frame in enumerate([anchor_frame] + pos_frames + neg_frames):
            K = K_from_frame(frame)
            Tcw = np.asarray(frame['extrinsic_Tcw'], dtype=np.float32).reshape((3, 4))
            img_file_name = frame['file_name']
            depth_file_name = frame['depth_file_name']

            # Load image
            img = cv2.imread(os.path.join(self.base_dir, img_file_name))
            if img is None:
                raise Exception('Can not load image:%s' % img_file_name)
            ori_H, ori_W, _ = img.shape
            img = cv2.cvtColor(cv2.resize(img, dsize=(W, H)), cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

            # Load the depth map
            depth = read_sun3d_depth(os.path.join(self.base_dir, depth_file_name))
            depth = cv2.resize(depth, dsize=(W, H), interpolation=cv2.INTER_NEAREST)
            depth[depth < 1e-5] = 1e-5

            # convert to torch.tensor
            ori_img_tensor = torch.from_numpy(img.transpose((2, 0, 1)))  # (C, H, W)
            img_tensor = ori_img_tensor.clone()
            if self.transform_func:
                img_tensor = self.transform_func(img_tensor)
            depth_tensor = torch.from_numpy(depth).view(1, H, W)  # (1, H, W)
            K[0, 0] *= W / ori_W
            K[0, 2] *= W / ori_W
            K[1, 1] *= H / ori_H
            K[1, 2] *= H / ori_H
            K_tensor = torch.from_numpy(K)  # (3, 3)
            Tcw_tensor = torch.from_numpy(Tcw)  # (3, 4)

            if i == 0:
                data_dict['anchor_img'] = img_tensor
                data_dict['anchor_depth'] = depth_tensor
                data_dict['anchor_Tcw'] = Tcw_tensor
                data_dict['anchor_K'] = K_tensor
                data_dict['anchor_ori_img'] = ori_img_tensor
            elif i < len(pos_frames) + 1:
                pos_dict['img'].append(img_tensor)
                pos_dict['depth'].append(depth_tensor)
                pos_dict['Tcw'].append(Tcw_tensor)
                pos_dict['K'].append(K_tensor)
                pos_dict['ori_img'].append(ori_img_tensor)
            else:
                neg_dict['img'].append(img_tensor)
                neg_dict['depth'].append(depth_tensor)
                neg_dict['Tcw'].append(Tcw_tensor)
                neg_dict['K'].append(K_tensor)
                neg_dict['ori_img'].append(ori_img_tensor)

        pos_dict['img'] = torch.stack(pos_dict['img'], dim=0)       # (pos_num, C, H, W)
        pos_dict['depth'] = torch.stack(pos_dict['depth'], dim=0)   # (pos_num, 1, H, W)
        pos_dict['Tcw'] = torch.stack(pos_dict['Tcw'], dim=0)       # (pos_num, 3, 4)
        pos_dict['K'] = torch.stack(pos_dict['K'], dim=0)           # (pos_num, 3, 3)
        pos_dict['ori_img'] = torch.stack(pos_dict['ori_img'], dim=0)       # (pos_num, C, H, W)
        data_dict['pos_img'] = pos_dict['img']
        data_dict['pos_depth'] = pos_dict['depth']
        data_dict['pos_Tcw'] = pos_dict['Tcw']
        data_dict['pos_K'] = pos_dict['K']
        data_dict['pos_ori_img'] = pos_dict['ori_img']

        neg_dict['img'] = torch.stack(neg_dict['img'], dim=0)       # (neg_num, C, H, W)
        neg_dict['depth'] = torch.stack(neg_dict['depth'], dim=0)   # (neg_num, 1, H, W)
        neg_dict['Tcw'] = torch.stack(neg_dict['Tcw'], dim=0)       # (neg_num, 3, 4)
        neg_dict['K'] = torch.stack(neg_dict['K'], dim=0)           # (neg_num, 3, 3)
        neg_dict['ori_img'] = torch.stack(neg_dict['ori_img'], dim=0)  # (pos_num, C, H, W)
        data_dict['neg_img'] = neg_dict['img']
        data_dict['neg_depth'] = neg_dict['depth']
        data_dict['neg_Tcw'] = neg_dict['Tcw']
        data_dict['neg_K'] = neg_dict['K']
        data_dict['neg_ori_img'] = neg_dict['ori_img']

        return data_dict
def rand_sel_subseq_sun3d(scene_frames,
                          max_subseq_num,
                          frames_per_subseq_num=10,
                          dataset_base_dir=None,
                          trans_thres=0.15,
                          rot_thres=15,
                          frames_range=(0, 0.7),
                          overlap_thres=0.6,
                          interval_skip_frames=1):
    """
    Random select sub set of sequences from scene
    :param scene_frames: scene frames to extract subset
    :param trans_thres_range: translation threshold, based on the center of different frames
    :param max_subseq_num: maximum number of sub sequences
    :param frames_per_subseq_num: for each sub sequences, how many frames in the subset
    :param frames_range: range of start and end within original scene sequences, from (0, 1)
    :param interval_skip_frames: skip interval in original scene frames, used in iteration
    :return: list of selected sub sequences
    """
    assert dataset_base_dir is not None
    n_frames = len(scene_frames)
    if interval_skip_frames < 1:
        interval_skip_frames = 2

    # Simple selection based on trans threshold
    if frames_per_subseq_num * interval_skip_frames > n_frames:
        raise Exception('Not enough frames to be selected')
    rand_start_frame = np.random.randint(int(frames_range[0] * len(scene_frames)),
                                         int(frames_range[1] * len(scene_frames)),
                                         size=max_subseq_num)

    sub_seq_list = []
    dim = scene_frames.get_frame_dim(scene_frames.frames[0])
    K = scene_frames.get_K_mat(scene_frames.frames[0])
    pre_cache_x2d = x_2d_coords(dim[0], dim[1])

    for start_frame_idx in rand_start_frame:
        # print('F:', start_frame_idx)

        # Push start keyframe into frames
        sub_frames = FrameSeqData()
        pre_frame = scene_frames.frames[start_frame_idx]
        sub_frames.frames.append(copy.deepcopy(pre_frame))

        # Iterate the remaining keyframes into subset
        cur_frame_idx = start_frame_idx
        no_found_flag = False
        while cur_frame_idx < n_frames:
            pre_Tcw = sub_frames.get_Tcw(pre_frame)
            pre_depth_path = sub_frames.get_depth_name(pre_frame)
            pre_depth = read_sun3d_depth(os.path.join(dataset_base_dir, pre_depth_path))

            # [Deprecated]
            # pre_img_name = sub_frames.get_image_name(pre_frame)
            # pre_img = cv2.imread(os.path.join(dataset_base_dir, pre_img_name)).astype(np.float32) / 255.0
            # pre_center = camera_center_from_Tcw(pre_Tcw[:3, :3], pre_Tcw[:3, 3])

            pre_search_frame = scene_frames.frames[cur_frame_idx + interval_skip_frames - 1]
            for search_idx in range(cur_frame_idx + interval_skip_frames, n_frames, 1):

                cur_frame = scene_frames.frames[search_idx]
                cur_Tcw = sub_frames.get_Tcw(cur_frame)
                # [Deprecated]
                # cur_center = camera_center_from_Tcw(cur_Tcw[:3, :3], cur_Tcw[:3, 3])
                # cur_img_name = sub_frames.get_image_name(cur_frame)
                # cur_img = cv2.imread(os.path.join(dataset_base_dir, cur_img_name)).astype(np.float32) / 255.0

                rel_angle = rel_rot_angle(pre_Tcw, cur_Tcw)
                rel_dist = rel_distance(pre_Tcw, cur_Tcw)

                overlap = photometric_overlap(pre_depth, K, Ta=pre_Tcw, Tb=cur_Tcw, pre_cache_x2d=pre_cache_x2d)

                # [Deprecated]
                # overlap_map, x_2d = cam_opt.gen_overlap_mask_img(pre_depth, K, Ta=pre_Tcw, Tb=cur_Tcw, pre_cache_x2d=pre_cache_x2d)
                # rel_T = relateive_pose(pre_Tcw[:3, :3], pre_Tcw[:3, 3], cur_Tcw[:3, :3], cur_Tcw[:3, 3])
                # wrap_img, _ = cam_opt.wrapping(pre_img, cur_img, pre_depth, K, rel_T[:3, :3], rel_T[:3, 3])
                # img_list = [
                #     {'img': pre_img},
                #     {'img': cur_img},
                #     {'img': wrap_img},
                #     {'img': overlap_map},
                #     {'img': x_2d[:, :, 0], 'cmap':'gray'},
                #     {'img': x_2d[:, :, 1], 'cmap': 'gray'}
                # ]
                # show_multiple_img(img_list, num_cols=4)
                # plt.show()

                if rel_dist > trans_thres or overlap < overlap_thres or rel_angle > rot_thres:
                    # Select the new keyframe that larger than the trans threshold and add the previous frame as keyframe
                    sub_frames.frames.append(copy.deepcopy(pre_search_frame))
                    pre_frame = pre_search_frame
                    cur_frame_idx = search_idx + 1
                    break
                else:
                    pre_search_frame = cur_frame

                if search_idx == n_frames - 1:
                    no_found_flag = True

            if no_found_flag:
                break

            if len(sub_frames) > frames_per_subseq_num - 1:
                break

        # If the subset is less than setting, ignore
        if len(sub_frames) >= frames_per_subseq_num:
            sub_seq_list.append(sub_frames)

    print('sel: %d', len(sub_seq_list))
    return sub_seq_list
Esempio n. 6
0
    def load_frame_2_tensors(self,
                             frame,
                             out_frame_dim,
                             fill_depth_holes=False):
        C, H, W = out_frame_dim
        K = K_from_frame(frame)
        Tcw = np.asarray(frame['extrinsic_Tcw'], dtype=np.float32).reshape(
            (3, 4))
        Rcw, tcw = Tcw[:3, :3], Tcw[:3, 3]
        img_file_name = frame['file_name']
        depth_file_name = frame['depth_file_name']

        # Load image and depth
        img = cv2.imread(os.path.join(self.base_dir, img_file_name))
        depth = read_sun3d_depth(os.path.join(self.base_dir, depth_file_name))
        ori_H, ori_W, _ = img.shape

        # Post-process image and depth (fill the holes with cross bilateral filter)
        resize_ratio = max(H / ori_H, W / ori_W)
        img = cv2.resize(img,
                         dsize=(int(resize_ratio * ori_W),
                                int(resize_ratio * ori_H)))
        depth = cv2.resize(depth,
                           dsize=(int(resize_ratio * ori_W),
                                  int(resize_ratio * ori_H)),
                           interpolation=cv2.INTER_NEAREST)
        if fill_depth_holes:
            depth = fill_depth_cross_bf(img, depth)
        depth[depth < 1e-5] = 1e-5
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

        # camera intrinsic parameters:
        K[0, 0] *= resize_ratio
        K[0, 2] = (resize_ratio * ori_W) / 2
        K[1, 1] *= resize_ratio
        K[1, 2] = (resize_ratio * ori_H) / 2
        new_K = K.copy()
        new_K[0, 2] = W / 2
        new_K[1, 2] = H / 2

        # crop and resize with new K
        img = crop_by_intrinsic(img, K, new_K)
        depth = crop_by_intrinsic(depth, K, new_K, interp_method='nearest')

        # camera motion representation: (center, rotation_center2world)
        c = camera_center_from_Tcw(Rcw, tcw)
        Rwc = np.eye(4)
        Rwc[:3, :3] = Rcw.T
        q = quaternion_from_matrix(Rwc)
        log_q = log_quat(q)
        pose_vector = np.concatenate((c, log_q)).astype(np.float32)

        # convert to torch.tensor
        ori_img_tensor = torch.from_numpy(img.transpose(
            (2, 0, 1)))  # (C, H, W)
        img_tensor = ori_img_tensor.clone()
        if self.transform_func:
            img_tensor = self.transform_func(img_tensor)
        depth_tensor = torch.from_numpy(depth).view(1, H, W)  # (1, H, W)

        pose_vector = torch.from_numpy(pose_vector)  # (1, 3)
        Tcw_tensor = torch.from_numpy(Tcw)  # (3, 4)
        K_tensor = torch.from_numpy(new_K)  # (3, 3)

        return pose_vector, img_tensor, depth_tensor, K_tensor, Tcw_tensor, ori_img_tensor
Esempio n. 7
0
def sel_pairs_with_overlap_range_sun3d(scene_frames,
                                       scene_lmdb: LMDBSeqModel,
                                       max_subseq_num,
                                       frames_per_subseq_num=10,
                                       dataset_base_dir=None,
                                       trans_thres=0.15,
                                       rot_thres=15,
                                       frames_range=(0, 0.7),
                                       overlap_thres=0.5,
                                       scene_dist_thres=(0.0, 1.0),
                                       interval_skip_frames=1,
                                       train_anchor_num=100,
                                       test_anchor_num=100):
    """
    Random select sub set of sequences from scene
    :param scene_frames: scene frames to extract subset
    :param trans_thres_range: translation threshold, based on the center of different frames
    :param max_subseq_num: maximum number of sub sequences
    :param frames_per_subseq_num: for each sub sequences, how many frames in the subset
    :param frames_range: range of start and end within original scene sequences, from (0, 1)
    :param interval_skip_frames: skip interval in original scene frames, used in iteration
    :return: list of selected sub sequences
    """
    use_lmdb_cache = True if scene_lmdb is not None else False

    assert dataset_base_dir is not None
    n_frames = len(scene_frames)
    if interval_skip_frames < 1:
        interval_skip_frames = 2
    max_subseq_num = int(n_frames * max_subseq_num)

    # Simple selection based on trans threshold
    # if frames_per_subseq_num * interval_skip_frames > n_frames:
    #     # raise Exception('Not enough frames to be selected')
    #     return []
    rand_start_frame = np.random.randint(
        int(frames_range[0] * len(scene_frames)),
        int(frames_range[1] * len(scene_frames)),
        size=max_subseq_num)

    sub_seq_list = []
    dim = scene_frames.get_frame_dim(scene_frames.frames[0])
    dim = list(dim)
    dim[0] = int(dim[0] // 4)
    dim[1] = int(dim[1] // 4)
    K = scene_frames.get_K_mat(scene_frames.frames[0])
    K /= 4.0
    K[2, 2] = 1.0
    pre_cache_x2d = cam_opt.x_2d_coords(dim[0], dim[1])

    for start_frame_idx in rand_start_frame:
        # print('F:', start_frame_idx)

        # Push start keyframe into frames
        sub_frames = FrameSeqData()
        pre_frame = scene_frames.frames[start_frame_idx]
        sub_frames.frames.append(copy.deepcopy(pre_frame))
        sub_frames_idx = [start_frame_idx]

        # Iterate the remaining keyframes into subset
        cur_frame_idx = start_frame_idx
        no_found_flag = False
        while cur_frame_idx + interval_skip_frames < n_frames:
            pre_Tcw = sub_frames.get_Tcw(pre_frame)
            pre_depth_path = sub_frames.get_depth_name(pre_frame)
            # pre_depth = read_sun3d_depth(os.path.join(dataset_base_dir, pre_depth_path))
            pre_depth = scene_lmdb.read_depth(pre_depth_path) if use_lmdb_cache else \
                read_sun3d_depth(os.path.join(dataset_base_dir, pre_depth_path))
            pre_depth = cv2.resize(pre_depth, (dim[1], dim[0]),
                                   interpolation=cv2.INTER_NEAREST)
            # H, W = pre_depth.shape
            # if float(np.sum(pre_depth <= 1e-5)) / float(H*W) > 0.2:
            #     continue
            # pre_depth = torch.from_numpy(pre_depth).cuda()
            # pre_Tcw_gpu = torch.from_numpy(pre_Tcw).cuda()
            # pre_img_name = sub_frames.get_image_name(pre_frame)
            # pre_img = cv2.imread(os.path.join(dataset_base_dir, pre_img_name))
            # pre_depth = fill_depth_cross_bf(pre_img, pre_depth)

            # [Deprecated]
            # import cv2
            # pre_img_name = sub_frames.get_image_name(pre_frame)
            # pre_img = cv2.imread(os.path.join(dataset_base_dir, pre_img_name)).astype(np.float32) / 255.0
            # pre_center = cam_opt.camera_center_from_Tcw(pre_Tcw[:3, :3], pre_Tcw[:3, 3])

            pre_search_frame = scene_frames.frames[cur_frame_idx +
                                                   interval_skip_frames - 1]
            for search_idx in range(cur_frame_idx + interval_skip_frames,
                                    n_frames, 1):

                cur_frame = scene_frames.frames[search_idx]
                cur_Tcw = sub_frames.get_Tcw(cur_frame)
                # cur_Tcw_gpu = torch.from_numpy(cur_Tcw).cuda()
                # cur_depth_path = sub_frames.get_depth_name(cur_frame)
                # cur_depth = read_sun3d_depth(os.path.join(dataset_base_dir, cur_depth_path))
                # H, W = cur_depth.shape

                # [Deprecated]
                # cur_center = cam_opt.camera_center_from_Tcw(cur_Tcw[:3, :3], cur_Tcw[:3, 3])
                # cur_img_name = sub_frames.get_image_name(cur_frame)
                # cur_img = cv2.imread(os.path.join(dataset_base_dir, cur_img_name)).astype(np.float32) / 255.0

                rel_angle = rel_rot_angle(pre_Tcw, cur_Tcw)
                rel_dist = rel_distance(pre_Tcw, cur_Tcw)

                overlap = cam_opt.photometric_overlap(
                    pre_depth,
                    K,
                    Ta=pre_Tcw,
                    Tb=cur_Tcw,
                    pre_cache_x2d=pre_cache_x2d)

                # mean scene coordinate dist
                # pre_Twc = cam_opt.camera_pose_inv(R=pre_Tcw[:3, :3], t=pre_Tcw[:3, 3])
                # d_a = pre_depth.reshape((H * W, 1))
                # x_a_2d = pre_cache_x2d.reshape((H * W, 2))
                # X_3d = cam_opt.pi_inv(K, x_a_2d, d_a)
                # pre_X_3d = cam_opt.transpose(pre_Twc[:3, :3], pre_Twc[:3, 3], X_3d).reshape((H, W, 3))
                # pre_mean = np.empty((3,), dtype=np.float)
                # pre_mean[0] = np.mean(pre_X_3d[pre_depth > 1e-5, 0])
                # pre_mean[1] = np.mean(pre_X_3d[pre_depth > 1e-5, 1])
                # pre_mean[2] = np.mean(pre_X_3d[pre_depth > 1e-5, 2])
                #
                # cur_Twc = cam_opt.camera_pose_inv(R=cur_Tcw[:3, :3], t=cur_Tcw[:3, 3])
                # d_a = cur_depth.reshape((H * W, 1))
                # x_a_2d = pre_cache_x2d.reshape((H * W, 2))
                # X_3d = cam_opt.pi_inv(K, x_a_2d, d_a)
                # cur_X_3d = cam_opt.transpose(cur_Twc[:3, :3], cur_Twc[:3, 3], X_3d).reshape((H, W, 3))
                # cur_mean = np.empty((3,), dtype=np.float)
                # cur_mean[0] = np.mean(cur_X_3d[cur_depth > 1e-5, 0])
                # cur_mean[1] = np.mean(cur_X_3d[cur_depth > 1e-5, 1])
                # cur_mean[2] = np.mean(cur_X_3d[cur_depth > 1e-5, 2])
                #
                # scene_dist = np.linalg.norm(pre_mean - cur_mean)

                # def keyPressEvent(obj, event):
                #     key = obj.GetKeySym()
                #     if key == 'Left':
                #         tmp_img = pre_img
                #         X_3d = pre_X_3d.reshape((H * W, 3))
                #         vis.set_point_cloud(X_3d, tmp_img.reshape((H * W, 3)))
                #         # vis.add_frame_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3])
                #
                #     if key == 'Right':
                #         tmp_img = cur_img
                #         X_3d = cur_X_3d.reshape((H * W, 3))
                #         vis.set_point_cloud(X_3d, tmp_img.reshape((H * W, 3)))
                #         # vis.add_frame_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3])
                #
                #     if key == 'Up':
                #         vis.set_point_cloud(pre_mean.reshape((1, 3)), pt_size=10)
                #
                #     if key == 'Down':
                #         vis.set_point_cloud(cur_mean.reshape((1, 3)), pt_size=10)
                #     return
                # vis = Visualizer(1280, 720)
                # vis.bind_keyboard_event(keyPressEvent)
                # vis.show()
                # vis.close()

                # [Deprecated]
                # overlap_map, x_2d = cam_opt.gen_overlap_mask_img(pre_depth, K, Ta=pre_Tcw, Tb=cur_Tcw, pre_cache_x2d=pre_cache_x2d)
                # rel_T = relateive_pose(pre_Tcw[:3, :3], pre_Tcw[:3, 3], cur_Tcw[:3, :3], cur_Tcw[:3, 3])
                # wrap_img, _ = cam_opt.wrapping(pre_img, cur_img, pre_depth, K, rel_T[:3, :3], rel_T[:3, 3])
                # img_list = [
                #     {'img': pre_img},
                #     {'img': cur_img},
                #     {'img': wrap_img},
                #     {'img': overlap_map},
                #     {'img': x_2d[:, :, 0], 'cmap':'gray'},
                #     {'img': x_2d[:, :, 1], 'cmap': 'gray'}
                # ]
                # show_multiple_img(img_list, num_cols=4)
                # plt.show()
                # if rel_dist > trans_thres:
                #     print('exceed trans_thres')
                # elif overlap < overlap_thres:
                #     print('exceed overlap_thres')
                # elif rel_angle > rot_thres:
                #     print('exceed rot_thres')

                # if overlap_thres[0] <= overlap <= overlap_thres[1] and \
                #    rot_thres[0] <= rel_angle <= rot_thres[1]: #and \
                #     # scene_dist_thres[0] <= scene_dist <= scene_dist_thres[1]:
                #     sub_frames.frames.append(copy.deepcopy(cur_frame))

                if overlap < overlap_thres or rel_dist > trans_thres:  #or scene_dist > scene_dist_thres[1]:
                    # Select the new keyframe that larger than the trans threshold and add the previous frame as keyframe
                    sub_frames.frames.append(copy.deepcopy(pre_search_frame))
                    pre_frame = pre_search_frame
                    cur_frame_idx = search_idx + 1
                    sub_frames_idx.append(search_idx - 1)
                    break
                else:
                    pre_search_frame = cur_frame

                if search_idx + 1 >= n_frames:
                    no_found_flag = True

            if no_found_flag:
                break

            if len(sub_frames) > frames_per_subseq_num - 1:
                break

        # If the subset is less than setting, ignore
        if len(sub_frames) >= frames_per_subseq_num:
            min_idx = min(sub_frames_idx)
            max_idx = max(sub_frames_idx)
            print(min_idx, max_idx, n_frames)
            # factor = (max_idx - min_idx) // 3
            #
            # min_Tcw = sub_frames.get_Tcw(sub_frames.frames[0])
            # max_Tcw = sub_frames.get_Tcw(sub_frames.frames[-1])
            potential_anchor_idces = []
            # for i in range(min_idx + factor, max_idx - factor, 1):
            #     cur_frame = scene_frames.frames[i]
            #     cur_Tcw = scene_frames.get_Tcw(cur_frame)
            #     cur_depth_path = sub_frames.get_depth_name(cur_frame)
            #     cur_depth = scene_lmdb.read_depth(cur_depth_path)
            #     cur_depth = cv2.resize(cur_depth, (dim[1], dim[0]), interpolation=cv2.INTER_NEAREST)
            #     H, W = cur_depth.shape
            #     if float(np.sum(cur_depth <= 1e-5)) / float(H*W) > 0.2:
            #         continue
            #     min_overlap = cam_opt.photometric_overlap(cur_depth, K, Ta=cur_Tcw, Tb=min_Tcw,
            #                                               pre_cache_x2d=pre_cache_x2d)
            #     max_overlap = cam_opt.photometric_overlap(cur_depth, K, Ta=cur_Tcw, Tb=max_Tcw,
            #                                               pre_cache_x2d=pre_cache_x2d)
            #     min_rel_angle = rel_rot_angle(cur_Tcw, min_Tcw)
            #     max_rel_angle = rel_rot_angle(cur_Tcw, max_Tcw)
            #     if min_overlap < 0.65 and max_overlap < 0.65 and \
            #        ((0.5 < min_overlap and min_rel_angle < 20.0) or \
            #        (0.5 < max_overlap and max_rel_angle < 20.0)):
            #         potential_anchor_idces.append(i)
            for i in range(min_idx, max_idx):
                if i not in sub_frames_idx:
                    potential_anchor_idces.append(i)

            if len(potential_anchor_idces
                   ) >= train_anchor_num + test_anchor_num:
                anchor_idces = np.random.choice(
                    range(len(potential_anchor_idces)),
                    size=train_anchor_num + test_anchor_num,
                    replace=False)

                train_anchor_frames = []
                for i in anchor_idces[:train_anchor_num]:
                    train_anchor_frames.append(
                        scene_frames.frames[potential_anchor_idces[i]])

                test_anchor_frames = []
                for i in anchor_idces[train_anchor_num:]:
                    test_anchor_frames.append(
                        scene_frames.frames[potential_anchor_idces[i]])

                sub_seq_list.append({
                    'sub_frames': sub_frames,
                    'train_anchor_frames': train_anchor_frames,
                    'test_anchor_frames': test_anchor_frames
                })
                print('selected', len(potential_anchor_idces), len(sub_frames))

    print('sel: %d', len(sub_seq_list))
    return sub_seq_list
Esempio n. 8
0
def sel_triple_sun3d(base_dir, scene_frames, max_triple_num,
                     num_sample_per_triple, trans_thres, overlap_thres):
    """
    Select triples (anchor, positive, negative) from a sun3d sequence
    :param base_dir: dataset base directory
    :param scene_frames: scene frames to extract triples
    :param max_triple_num: maximum number of triples
    :param num_sample_per_triple: number of positive/negative samples per triple
    :param trans_thres: translation threshold for positive samples, based on the center of different frames
    :param overlap_thres: overlap threshold for positive samples, (low, high)
    :return: [{'anchor': frame_dict, 'positive': FrameSeqData, 'negative': FrameSeqData}, {...}, ...]
    """
    dim = scene_frames.get_frame_dim(scene_frames.frames[0])
    K = scene_frames.get_K_mat(scene_frames.frames[0])
    pre_cache_x2d = cam_opt.x_2d_coords(dim[0], dim[1])

    camera_centers = np.empty((len(scene_frames), 3), dtype=np.float32)
    for i, frame in enumerate(scene_frames.frames):
        Tcw = scene_frames.get_Tcw(frame)
        center = cam_opt.camera_center_from_Tcw(Tcw[:3, :3], Tcw[:3, 3])
        camera_centers[i, :] = center

    kdtree = KDTree(camera_centers)

    triple_list = []
    anchor_idces = np.random.choice(len(scene_frames),
                                    max_triple_num,
                                    replace=False)
    for anchor_idx in anchor_idces:
        anchor_frame = scene_frames.frames[anchor_idx]
        anchor_Tcw = scene_frames.get_Tcw(anchor_frame)
        anchor_depth_path = scene_frames.get_depth_name(anchor_frame)
        anchor_depth = read_sun3d_depth(
            os.path.join(base_dir, anchor_depth_path))
        anchor_depth[anchor_depth < 1e-5] = 1e-5

        potential_pos_idces = kdtree.query_ball_point(
            camera_centers[anchor_idx], trans_thres)
        pos_idces = []
        for potential_pos_idx in potential_pos_idces:
            potential_pos_frame = scene_frames.frames[potential_pos_idx]
            potential_pos_Tcw = scene_frames.get_Tcw(potential_pos_frame)
            overlap = cam_opt.photometric_overlap(anchor_depth,
                                                  K,
                                                  Ta=anchor_Tcw,
                                                  Tb=potential_pos_Tcw,
                                                  pre_cache_x2d=pre_cache_x2d)
            if overlap_thres[0] < overlap < overlap_thres[1]:
                pos_idces.append(potential_pos_idx)

        if len(pos_idces) < num_sample_per_triple:
            continue
        else:
            sel_pos_idces = np.random.choice(pos_idces,
                                             num_sample_per_triple,
                                             replace=False)

        neg_idces = list(set(range(len(scene_frames))) - set(pos_idces))
        sel_neg_idces = np.random.choice(neg_idces,
                                         num_sample_per_triple,
                                         replace=False)

        triple_list.append({
            'anchor':
            copy.deepcopy(anchor_frame),
            'positive': [
                copy.deepcopy(scene_frames.frames[idx])
                for idx in sorted(sel_pos_idces)
            ],
            'negative': [
                copy.deepcopy(scene_frames.frames[idx])
                for idx in sorted(sel_neg_idces)
            ],
        })

        # print(camera_centers[anchor_idx])
        # print(camera_centers[pos_idces])
        # print(camera_centers[neg_idces])
        # print('----------------------------------------------------------')

    return triple_list
Esempio n. 9
0
def keyPressEvent(obj, event):
    global frame_idx
    key = obj.GetKeySym()
    if key == 'Right':
        cur_frame = frames.frames[frame_idx]
        cur_Tcw = cur_frame['extrinsic_Tcw']
        cur_name = cur_frame['file_name']
        cur_depth_name = cur_frame['depth_file_name']

        next_frame = frames.frames[frame_idx + 1]
        next_Tcw = next_frame['extrinsic_Tcw']
        next_name = next_frame['file_name']

        K = K_from_frame(cur_frame)

        # Read image
        cur_img = cv2.imread(os.path.join(base_dir, cur_name)).astype(
            np.float32) / 255.0
        next_img = cv2.imread(os.path.join(base_dir, next_name)).astype(
            np.float32) / 255.0
        cur_depth = read_sun3d_depth(os.path.join(base_dir, cur_depth_name))
        h, w, c = cur_img.shape

        rel_T = cam_opt.relateive_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3],
                                       next_Tcw[:3, :3], next_Tcw[:3, 3])
        X_3d = cam_opt.pi_inv(K, x_2d.reshape((h * w, 2)),
                              cur_depth.reshape((h * w, 1)))
        cur_Twc = cam_opt.camera_pose_inv(cur_Tcw[:3, :3], cur_Tcw[:3, 3])
        X_3d = cam_opt.transpose(cur_Twc[:3, :3], cur_Twc[:3, 3], X_3d)

        vis.set_point_cloud(X_3d, cur_img.reshape((h * w, 3)))
        vis.add_frame_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3])

        frame_idx += 20
    if key == 'Left':
        cur_frame = frames.frames[frame_idx]
        cur_Tcw = cur_frame['extrinsic_Tcw']
        cur_name = cur_frame['file_name']
        cur_depth_name = cur_frame['depth_file_name']

        next_frame = frames.frames[frame_idx + 1]
        next_Tcw = next_frame['extrinsic_Tcw']
        next_name = next_frame['file_name']

        K = K_from_frame(cur_frame)

        # Read image
        cur_img = cv2.imread(os.path.join(base_dir, cur_name)).astype(
            np.float32) / 255.0
        next_img = cv2.imread(os.path.join(base_dir, next_name)).astype(
            np.float32) / 255.0
        cur_depth = read_sun3d_depth(os.path.join(base_dir, cur_depth_name))
        h, w, c = cur_img.shape

        rel_T = cam_opt.relateive_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3],
                                       next_Tcw[:3, :3], next_Tcw[:3, 3])
        X_3d = cam_opt.pi_inv(K, x_2d.reshape((h * w, 2)),
                              cur_depth.reshape((h * w, 1)))
        cur_Twc = cam_opt.camera_pose_inv(cur_Tcw[:3, :3], cur_Tcw[:3, 3])
        X_3d = cam_opt.transpose(cur_Twc[:3, :3], cur_Twc[:3, 3], X_3d)

        vis.set_point_cloud(X_3d, cur_img.reshape((h * w, 3)))
        vis.add_frame_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3])

        frame_idx -= 20
    return
Esempio n. 10
0
    cur_Tcw = cur_frame['extrinsic_Tcw']
    cur_name = cur_frame['file_name']
    cur_depth_name = cur_frame['depth_file_name']

    next_frame = frames.frames[frame_idx + 5]
    next_Tcw = next_frame['extrinsic_Tcw']
    next_name = next_frame['file_name']

    K = K_from_frame(cur_frame)

    # Read image
    cur_img = cv2.imread(os.path.join(base_dir, cur_name)).astype(
        np.float32) / 255.0
    next_img = cv2.imread(os.path.join(base_dir, next_name)).astype(
        np.float32) / 255.0
    cur_depth = read_sun3d_depth(os.path.join(base_dir, cur_depth_name))
    h, w, c = cur_img.shape

    rel_T = cam_opt.relateive_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3],
                                   next_Tcw[:3, :3], next_Tcw[:3, 3])

    # Translation
    Cb = cam_opt.camera_center_from_Tcw(rel_T[:3, :3], rel_T[:3, 3])
    baseline = np.linalg.norm(Cb)

    # View angle
    q = trans.quaternion_from_matrix(rel_T)
    R = trans.quaternion_matrix(q)
    rel_rad, rel_axis, _ = trans.rotation_from_matrix(R)
    rel_deg = np.rad2deg(rel_rad)