Beispiel #1
0
    def load_frame_2_tensors(self, frame, out_frame_dim):
        C, H, W = out_frame_dim
        K = K_from_frame(frame)
        Tcw = np.asarray(frame['extrinsic_Tcw'], dtype=np.float32).reshape(
            (3, 4))
        Rcw, tcw = Tcw[:3, :3], Tcw[:3, 3]
        img_file_name = frame['file_name']
        depth_file_name = frame['depth_file_name']

        # Load image
        img = cv2.imread(os.path.join(self.base_dir, img_file_name))
        ori_H, ori_W, _ = img.shape
        img = cv2.cvtColor(cv2.resize(img, dsize=(W, H)),
                           cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

        # Load the depth map:
        # depth = read_sun3d_depth(os.path.join(self.base_dir, depth_file_name))
        # depth = cv2.resize(depth, dsize=(W, H), interpolation=cv2.INTER_NEAREST)
        # depth[depth < 1e-5] = 1e-5

        # camera intrinsic parameters:
        # K[0, 0] *= W / ori_W
        # K[0, 2] *= W / ori_W
        # K[1, 1] *= H / ori_H
        # K[1, 2] *= H / ori_H
        # K_tensor = torch.from_numpy(K)  # (3, 3)

        # camera motion representation: (center, rotation_center2world)
        c = camera_center_from_Tcw(Rcw, tcw)
        Rwc = np.eye(4)
        Rwc[:3, :3] = Rcw.T
        q = quaternion_from_matrix(Rwc)
        log_q = log_quat(q)
        pose_vector = np.concatenate((c, log_q)).astype(np.float32)

        # convert to torch.tensor
        img_tensor = torch.from_numpy(img.transpose((2, 0, 1)))  # (C, H, W)
        if self.transform_func:
            img_tensor = self.transform_func(img_tensor)
        # depth_tensor = torch.from_numpy(depth).view(1, H, W)  # (1, H, W)

        pose_vector = torch.from_numpy(pose_vector)  # (1, 3)

        return pose_vector, img_tensor
Beispiel #2
0
def export_to_tum_format(frames: FrameSeqData,
                         output_path,
                         comment=None,
                         write_img_info=False):
    """
    Export the frame collection into tum format
    :param frames: frame collection, instance of FrameSeqData
    :param output_path: file with frames, in tum format
    :param comment: comment string put into the header line after '#'
    """
    with open(output_path, 'w') as out_f:
        if comment is not None:
            out_f.write('# ' + comment + '\n')

        for frame in frames.frames:
            Tcw = frame['extrinsic_Tcw']
            timestamp = str(frame['timestamp'])
            if timestamp == 'None':
                continue
            img_name = frame['file_name']
            depth_name = frame['depth_file_name']
            Twc = cam_opt.camera_pose_inv(Tcw[:3, :3], Tcw[:3, 3])

            t = Twc[:3, 3]
            q = trans.quaternion_from_matrix(Twc)

            if write_img_info:
                out_f.write(timestamp + ' ')
                out_f.write(img_name + ' ')
                out_f.write(timestamp + ' ')
                out_f.write(depth_name + ' ')

            out_f.write(timestamp + ' ')
            for t_idx in range(0, 3):
                out_f.write(str(t[t_idx]) + ' ')
            for q_idx in range(1, 4):
                out_f.write(str(q[q_idx]) + ' ')
            out_f.write(str(q[0]))  # qw in the end
            out_f.write('\n')
Beispiel #3
0
def rel_rot_angle(T1, T2):
    R1 = T1[:3, :3]
    R2 = T2[:3, :3]
    q1 = trans.quaternion_from_matrix(R1)
    q2 = trans.quaternion_from_matrix(R2)
    return rel_rot_quaternion_deg(q1, q2)
    cur_img = cv2.imread(os.path.join(base_dir, cur_name)).astype(
        np.float32) / 255.0
    next_img = cv2.imread(os.path.join(base_dir, next_name)).astype(
        np.float32) / 255.0
    cur_depth = load_depth_from_png(os.path.join(base_dir, cur_depth_name))
    h, w, c = cur_img.shape

    rel_T = cam_opt.relateive_pose(cur_Tcw[:3, :3], cur_Tcw[:3, 3],
                                   next_Tcw[:3, :3], next_Tcw[:3, 3])

    # Translation
    Cb = cam_opt.camera_center_from_Tcw(rel_T[:3, :3], rel_T[:3, 3])
    baseline = np.linalg.norm(Cb)

    # View angle
    q = trans.quaternion_from_matrix(rel_T)
    R = trans.quaternion_matrix(q)
    rel_rad, rel_axis, _ = trans.rotation_from_matrix(R)
    rel_deg = np.rad2deg(rel_rad)

    next2cur, _ = cam_opt.wrapping(cur_img, next_img, cur_depth, K,
                                   rel_T[:3, :3], rel_T[:3, 3])
    show_multiple_img([{
        'img': cur_img,
        'title': 'a'
    }, {
        'img': next2cur,
        'title': 'wrap_b2a'
    }, {
        'img': next_img,
        'title': 'b'
    def load_frame_2_tensors(self, frame, out_frame_dim):
        C, H, W = out_frame_dim
        K = self.depth_k.copy()

        Tcw = np.asarray(frame['extrinsic_Tcw'], dtype=np.float32).reshape(
            (3, 4))
        Rcw, tcw = Tcw[:3, :3], Tcw[:3, 3]
        img_file_name = frame['file_name']
        depth_file_name = frame['depth_file_name']

        # Load image and depth
        img = cv2.imread(os.path.join(self.base_dir, img_file_name))
        ori_H, ori_W, _ = img.shape
        img = crop_by_intrinsic(img, self.img_k, self.depth_k)
        img = cv2.resize(img, (ori_W, ori_H))
        depth = read_7scenese_depth(
            os.path.join(self.base_dir, depth_file_name))

        # Post-process image and depth (fill the holes with cross bilateral filter)
        resize_ratio = max(H / ori_H, W / ori_W)
        img = cv2.resize(img,
                         dsize=(int(resize_ratio * ori_W),
                                int(resize_ratio * ori_H)))
        depth = cv2.resize(depth,
                           dsize=(int(resize_ratio * ori_W),
                                  int(resize_ratio * ori_H)),
                           interpolation=cv2.INTER_NEAREST)
        if self.fill_depth_holes:
            depth = fill_depth_cross_bf(img, depth)
        depth[depth < 1e-5] = 1e-5
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0

        # camera intrinsic parameters:
        K[0, 0] *= resize_ratio
        K[0, 2] = (resize_ratio * ori_W) / 2
        K[1, 1] *= resize_ratio
        K[1, 2] = (resize_ratio * ori_H) / 2
        new_K = K.copy()
        new_K[0, 2] = W / 2
        new_K[1, 2] = H / 2

        # crop and resize with new K
        img = crop_by_intrinsic(img, K, new_K)
        depth = crop_by_intrinsic(depth, K, new_K, interp_method='nearest')

        # camera motion representation: (center, rotation_center2world)
        c = camera_center_from_Tcw(Rcw, tcw)
        Rwc = np.eye(4)
        Rwc[:3, :3] = Rcw.T
        q = quaternion_from_matrix(Rwc)
        log_q = log_quat(q)
        pose_vector = np.concatenate((c, log_q)).astype(np.float32)

        # convert to torch.tensor
        ori_img_tensor = torch.from_numpy(img.transpose(
            (2, 0, 1)))  # (C, H, W)
        img_tensor = ori_img_tensor.clone()
        if self.transform_func:
            img_tensor = self.transform_func(img_tensor)
        depth_tensor = torch.from_numpy(depth).view(1, H, W)  # (1, H, W)

        pose_vector = torch.from_numpy(pose_vector)  # (1, 3)
        Tcw_tensor = torch.from_numpy(Tcw)  # (3, 4)
        K_tensor = torch.from_numpy(new_K)  # (3, 3)

        return pose_vector, img_tensor, depth_tensor, K_tensor, Tcw_tensor, ori_img_tensor
def lstm_preprocess(seq_dict,
                    num_pyramid=3,
                    M=2000,
                    add_noise_func=add_drift_noise,
                    rot_noise_deg=10.0,
                    displacement_dist_std=0.1):
    """
    preprocess SUN3DSeqDataset for LSTMNet, generate selected gradient pixel indices
    :param seq_dict: dict returned from dataloader
    :param num_pyramid: number of feature map pyramids used in ba_tracknet and lstm_net
    :param M: the maximum number of pixels we want to select in one feature map
    :return: all variables used in LSTMNet
                    I: frame images of the sequence, (N, F, C, H, W)
                    d: depth maps of the sequence, (N, F, 1, H, W)
                    sel_indices: selected indices of pixels of each frame, (N, F, num_pyramid, M)
                    K: intrinsic matrix at level 0: dim: (N, F, 3, 3)
                    T: noised pose, (N, F, 4, 4)
                    T_gt: ground truth pose, (N, F. 4, 4)
    """
    I = seq_dict['img']
    d = seq_dict['depth']
    K = seq_dict['K']
    Tcw = seq_dict['Tcw']
    Tcw_np = Tcw.numpy()
    gray_img_tensor = I[:, :, 0:
                        1, :, :] * 0.299 + I[:, :, 1:
                                             2, :, :] * 0.587 + I[:, :, 2:
                                                                  3, :, :] * 0.114

    N, L, C, H, W = I.shape

    # add noise to camera pose
    T_gt = np.eye(4, dtype=np.float32).reshape(
        (1, 4, 4)).repeat(N * L, axis=0).reshape((N, L, 4, 4))
    T_gt[:, :, :3, :] = Tcw_np
    T_np = np.eye(4, dtype=np.float32).reshape(
        (1, 4, 4)).repeat(N * L, axis=0).reshape((N, L, 4, 4))
    for i in range(N):
        noise_T, rand_std_radius = add_noise_func(
            Tcw_np[i],
            rot_noise_deg=rot_noise_deg,
            displacement_dist_std=displacement_dist_std)
        T_np[i, :, :3, :] = noise_T
    T_gt = torch.from_numpy(T_gt)
    T = torch.from_numpy(T_np)

    # convert rotation to quaternion
    q = np.empty((N, L, 4), dtype=np.float32)
    t = np.empty((N, L, 3), dtype=np.float32)
    q_gt = np.empty((N, L, 4), dtype=np.float32)
    t_gt = np.empty((N, L, 3), dtype=np.float32)
    for i in range(N):
        for j in range(L):
            q_gt[i, j, :] = quaternion_from_matrix(Tcw_np[i, j, :3, :3].copy())
            t_gt[i, j, :] = Tcw_np[i, j, :3, 3]
            q[i, j, :] = quaternion_from_matrix(T_np[i, j, :3, :3].copy())
            t[i, j, :] = T_np[i, j, :3, 3]
    q_gt = torch.from_numpy(q_gt)
    t_gt = torch.from_numpy(t_gt)
    tq_gt = torch.cat([t_gt, q_gt], dim=2)
    q = torch.from_numpy(q)
    t = torch.from_numpy(t)
    tq = torch.cat([t, q], dim=2)

    # test
    # Compute Accuracy, noise level
    # init_q_accu = 0.0
    # init_t_accu = 0.0
    # for i in range(1, tq.shape[0]):
    #     cur_gt_abs_tq = q_module.invert_pose_quaternion(tq_gt[i, :, :])
    #     cur_init_abs_tq = q_module.invert_pose_quaternion(tq[i, :, :])
    #     init_q_accu = torch.mean(relative_angle(cur_init_abs_tq[:, 3:], cur_gt_abs_tq[:, 3:]))
    #     init_t_accu = torch.sqrt(F.mse_loss(cur_init_abs_tq[:, :3], cur_gt_abs_tq[:, :3]))
    #     print(init_q_accu)
    #     print(init_t_accu)

    # init_q_accu /= (tq.shape[0] - 1)
    # init_t_accu /= (tq.shape[0] - 1)

    # print(Tcw_np[0, 1])
    # print(T_gt[0, 1])
    # rec_R = batched_quaternion2rot(q.view(N * F, 4))
    # print(rec_R[0], T[0, 0, :3, :3])
    # print(t[0, 0], T[0, 0, :3, 3])
    # rec_R_gt = batched_quaternion2rot(q_gt.view(N * F, 4))
    # print(rec_R_gt[0], T_gt[0, 0, :3, :3])
    # print(t_gt[0, 0], T_gt[0, 0, :3, 3])

    # select pixels at gradient edge
    sel_indices = torch.empty(N, L, num_pyramid, M, dtype=torch.long)
    # for i in range(1, F):
    #     rel_T = batched_relative_pose(Tcw[:, i, :3, :3], Tcw[:, i, :3, 3], Tcw[:, i - 1, :3, :3], Tcw[:, i - 1, :3, 3])
    #     sel_indices[:, i, :, :] = batched_select_gradient_pixels(gray_img_tensor[:, i, :, :, :], d[:, i, :, :, :],
    #                                                              I[:, i - 1, :, :, :], K[:, i, :, :], rel_T[:, :, :3], rel_T[:, :, 3],
    #                                                              grad_thres=15.0 / 255.0,
    #                                                              num_pyramid=num_pyramid, num_gradient_pixels=M, visualize=False)
    # rel_T = batched_relative_pose(Tcw[:, 0, :3, :3], Tcw[:, 0, :3, 3], Tcw[:, 1, :3, :3], Tcw[:, 1, :3, 3])
    # sel_indices[:, 0, :, :] = batched_select_gradient_pixels(gray_img_tensor[:, 0, :, :, :], d[:, 0, :, :, :],
    #                                                          I[:, 1, :, :, :], K[:, 0, :, :], rel_T[:, :, :3], rel_T[:, :, 3],
    #                                                          grad_thres=15.0 / 255.0,
    #                                                          num_pyramid=num_pyramid, num_gradient_pixels=M, visualize=False)
    return I.transpose(0, 1).contiguous(), d.transpose(0, 1).contiguous(), sel_indices.transpose(0, 1).contiguous(),\
           K.transpose(0, 1).contiguous(), T.transpose(0, 1).contiguous(), T_gt.transpose(0, 1).contiguous(),\
           tq.transpose(0, 1).contiguous(), tq_gt.transpose(0, 1).contiguous()
def ba_tracknet_preprocess(frames_dict, num_pyramid=3, M=2000):
    """
    preprocess ImagePairDataset for ba_tracknet, generate selected gradient pixel indices
    :param frames_dict: dict returned from dataloader
    :param num_pyramid: number of feature map pyramids used in ba_tracknet
    :param M: the maximum number of pixels we want to select in one feature map
    :return: all variables used in ba_tracknet
                    I_a: Image of frame A, dim: (N, C, H, W)
                    d_a: Depth of frame A, dim: (N, 1, H, W)
                    sel_a_indices: selected point index on num_pyramid-level, dim: (N, num_pyramid, M)
                    K: intrinsic matrix at level 0: dim: (N, 3, 3)
                    I_b: Image of frame B, dim: (N, C, H, W)
                    q_gt : Groundtruth of quaternion, dim: (N, 4)
                    t_gt: Groundtruth of translation, dim: (N, 3)
                    se3_gt: Groundtruth of se3, dim: (N, 6)
    """
    N, C, H, W = frames_dict['frame0']['img'].shape
    I_a = frames_dict['frame0']['img']
    # I_a = F.interpolate(I_a, scale_factor=0.5)
    gray_img_tensor = I_a[:, 0:
                          1, :, :] * 0.299 + I_a[:, 1:
                                                 2, :, :] * 0.587 + I_a[:, 2:
                                                                        3, :, :] * 0.114
    d_a = frames_dict['frame0']['depth']
    # d_a = F.interpolate(d_a, scale_factor=0.5)
    K = frames_dict['frame0']['K']
    I_b = frames_dict['frame1']['img']
    # I_b = F.interpolate(I_b, scale_factor=0.5)
    Tcw = frames_dict['frame1']['Tcw']
    sel_a_indices = batched_select_gradient_pixels(gray_img_tensor,
                                                   d_a,
                                                   I_b,
                                                   K,
                                                   Tcw[:, :, :3],
                                                   Tcw[:, :, 3],
                                                   grad_thres=15.0 / 255.0,
                                                   num_pyramid=num_pyramid,
                                                   num_gradient_pixels=M,
                                                   visualize=False)
    Tcw_np = Tcw.numpy()
    # print(Tcw[0])
    q_gt = torch.empty((N, 4), dtype=torch.float, device=torch.device('cpu'))
    t_gt = torch.empty((N, 3), dtype=torch.float, device=torch.device('cpu'))
    se3_gt = torch.empty((N, 6), dtype=torch.float, device=torch.device('cpu'))
    for i in range(N):
        R_mat = np.eye(4)
        R_mat[:3, :3] = Tcw_np[i, :3, :3]
        q = quaternion_from_matrix(R_mat)
        q_gt[i, :] = torch.Tensor(q)
        t_gt[i, :] = torch.Tensor(Tcw_np[i, :3, 3])

        T_mat = R_mat
        T_mat[:3, 3] = Tcw_np[i, :3, 3]
        # T = SE3(T_mat)
        T = None
        t = T.log().ravel()
        se3_gt[i, :3] = torch.Tensor(t[3:])
        se3_gt[i, 3:] = torch.Tensor(t[:3])
    # print(quaternion_matrix(q_gt[0].numpy()), t_gt[0].numpy())
    # R, t = se3_exp(se3_gt)
    # print(R[0].numpy(), t[0].numpy())

    return I_a, d_a, sel_a_indices, K, I_b, q_gt, t_gt, se3_gt, Tcw
Beispiel #8
0
    def load_frame_2_tensors(self, frame, out_frame_dim):
        C, H, W = out_frame_dim

        tag = frame['tag']
        if tag is not None:
            is_neg = True if 'n' in tag else False
        else:
            is_neg = False

        K = K_from_frame(frame)
        Tcw = np.asarray(frame['extrinsic_Tcw'][:3, :],
                         dtype=np.float32).reshape((3, 4))
        Rcw, tcw = Tcw[:3, :3], Tcw[:3, 3]
        img_file_name = frame['file_name']
        depth_file_name = frame['depth_file_name']

        # Load image and depth
        img = cv2.imread(os.path.join(self.base_dir, img_file_name))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        if self.adjust_gamma:

            def enhance_equal_hist(img, rgb=False):
                if rgb is True:
                    for channel in range(
                            img.shape[2]):  # equalizing each channel
                        img[:, :,
                            channel] = exposure.equalize_hist(img[:, :,
                                                                  channel])
                else:
                    img = exposure.equalize_hist(img)
                return img.astype(np.float32)

            img = enhance_equal_hist(img, rgb=True)
        depth = load_depth_from_tiff((os.path.join(self.base_dir,
                                                   depth_file_name)))
        ori_H, ori_W, _ = img.shape

        # Post-process image and depth (fill the holes with cross bilateral filter)
        img = cv2.resize(img, dsize=(int(W), int(H)))
        depth = cv2.resize(depth,
                           dsize=(int(W), int(H)),
                           interpolation=cv2.INTER_NEAREST)
        depth[depth < 1e-5] = 1e-5

        # camera intrinsic parameters:
        K[0, 0] *= (W / ori_W)
        K[0, 2] *= (W / ori_W)
        K[1, 1] *= (H / ori_H)
        K[1, 2] *= (H / ori_H)

        # camera motion representation: (center, rotation_center2world)
        c = camera_center_from_Tcw(Rcw, tcw)
        Rwc = np.eye(4)
        Rwc[:3, :3] = Rcw.T
        q = quaternion_from_matrix(Rwc)
        log_q = log_quat(q)
        pose_vector = np.concatenate((c, log_q)).astype(np.float32)

        # convert to torch.tensor
        ori_img_tensor = torch.from_numpy(img.transpose(
            (2, 0, 1)))  # (C, H, W)
        img_tensor = ori_img_tensor.clone()
        if self.transform_func:
            img_tensor = self.transform_func(img_tensor)
        depth_tensor = torch.from_numpy(depth).view(1, H, W)  # (1, H, W)
        pose_vector = torch.from_numpy(pose_vector)  # (1, 3)
        Tcw_tensor = torch.from_numpy(Tcw)  # (3, 4)
        K_tensor = torch.from_numpy(K)  # (3, 3)
        neg_tensor = torch.from_numpy(np.asarray([1], dtype=np.int32)) if is_neg is True else \
            torch.from_numpy(np.asarray([0], dtype=np.int32))

        return pose_vector, img_tensor, depth_tensor, K_tensor, Tcw_tensor, ori_img_tensor, neg_tensor
    def load_frame_2_tensors(self, frame, out_frame_dim):
        C, H, W = out_frame_dim

        tag = frame['tag']
        if tag is not None:
            is_neg = True if 'n' in tag else False
        else:
            is_neg = False

        K = K_from_frame(frame)
        Tcw = np.asarray(frame['extrinsic_Tcw'][:3, :],
                         dtype=np.float32).reshape((3, 4))
        Rcw, tcw = Tcw[:3, :3], Tcw[:3, 3]
        img_file_name = frame['file_name']
        depth_file_name = frame['depth_file_name']

        # Load image and depth
        img = cv2.imread(os.path.join(self.cambridge_base_dir, img_file_name))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.0
        depth = load_depth_from_tiff((os.path.join(self.cambridge_base_dir,
                                                   depth_file_name)))
        ori_H, ori_W, _ = img.shape

        # Post-process image and depth (fill the holes with cross bilateral filter)
        img = cv2.resize(img, dsize=(int(W), int(H)))
        if self.random_gamma:
            gamma = np.random.uniform(low=self.random_gamma_thres[0],
                                      high=self.random_gamma_thres[1])
            img = adjust_gamma(img, gamma)

        depth = cv2.resize(depth,
                           dsize=(int(W), int(H)),
                           interpolation=cv2.INTER_NEAREST)
        if self.remove_depth_outlier > 0:
            depth = clamp_data_with_ratio(depth,
                                          ratio=self.remove_depth_outlier,
                                          fill_value=1e-5)
        depth[depth < 1e-5] = 1e-5

        # camera intrinsic parameters:
        K[0, 0] *= (W / ori_W)
        K[0, 2] *= (W / ori_W)
        K[1, 1] *= (H / ori_H)
        K[1, 2] *= (H / ori_H)

        # camera motion representation: (center, rotation_center2world)
        c = camera_center_from_Tcw(Rcw, tcw)
        Rwc = np.eye(4)
        Rwc[:3, :3] = Rcw.T
        q = quaternion_from_matrix(Rwc)
        log_q = log_quat(q)
        pose_vector = np.concatenate((c, log_q)).astype(np.float32)

        # convert to torch.tensor
        ori_img_tensor = torch.from_numpy(img.transpose(
            (2, 0, 1)))  # (C, H, W)
        img_tensor = ori_img_tensor.clone()
        if self.transform_func:
            img_tensor = self.transform_func(img_tensor)
        depth_tensor = torch.from_numpy(depth).view(1, H, W)  # (1, H, W)
        pose_vector = torch.from_numpy(pose_vector)  # (1, 3)
        Tcw_tensor = torch.from_numpy(Tcw)  # (3, 4)
        K_tensor = torch.from_numpy(K)  # (3, 3)
        neg_tensor = torch.from_numpy(np.asarray([1], dtype=np.int32)) if is_neg is True else \
            torch.from_numpy(np.asarray([0], dtype=np.int32))

        return pose_vector, img_tensor, depth_tensor, K_tensor, Tcw_tensor, ori_img_tensor, neg_tensor