Esempio n. 1
0
    def __init__(self):
        """
        :param annotation_path: path for annotation file
        :param bounding_box_path: path for player bounding box mat file
        :param image_path: path for image folder
        """

        # global rays and covariance matrix
        self.rays = np.ndarray([0, 2])
        self.state_cov = np.zeros([3, 3])

        # the information for previous frame: image matrix, keypoints and keypoints global index.
        self.previous_img = None
        self.previous_keypoints = None
        self.previous_keypoints_index = None

        # descriptor for rays
        self.des = np.ndarray([0, 128])

        # camera object for current frame
        self.current_camera = None

        # map
        self.keyframe_map = Map('sift')

        self.rf_map = RandomForestMap()

        # a camera list for whole sequence.
        self.cameras = []

        # speed of camera, for pan, tilt and focal length
        self.velocity = np.zeros(3)

        # state: whether current frame is new keyframe.
        self.new_keyframe = False

        # state: whether current frame is lost.
        self.tracking_lost = False

        # count for bad tracking frame number. If larger than a threshold, we say this frame is lost.
        self.bad_tracking_cnt = 0

        # hyper params soccer:300 basketball: 500
        self.keypoint_num = 500

        # previous set to 2
        self.observe_var = 0.1

        self.angle_var = 0.001
        self.f_var = 1
    def __init__(self):
        """
        :param annotation_path: path for annotation file
        :param bounding_box_path: path for player bounding box mat file
        :param image_path: path for image folder
        """

        # global rays and covariance matrix
        self.rays = np.ndarray([0, 2])
        self.state_cov = np.zeros([3, 3])

        # the information for previous frame: image matrix, keypoints and keypoints global index.
        self.previous_img = None
        self.previous_keypoints = None
        self.previous_keypoints_index = None

        # camera object for current frame
        self.current_camera = None

        # map
        self.keyframe_map = Map('sift')

        # a camera list for whole sequence.
        self.cameras = []

        # speed of camera, for pan, tilt and focal length
        self.velocity = np.zeros(3)

        # state: whether current frame is new keyframe.
        self.new_keyframe = False

        # state: whether current frame is lost.
        self.tracking_lost = False

        # count for bad tracking frame number. If larger than a threshold, we say this frame is lost.
        self.bad_tracking_cnt = 0
Esempio n. 3
0
def ut_relocalization():
    """unit test for relocalization"""
    obj = SequenceManager(
        "../../dataset/basketball/basketball_anno.mat",
        "../../dataset/basketball/images",
        "../../dataset/basketball/basketball_ground_truth.mat",
        "../../dataset/basketball/objects_basketball.mat")
    img1 = 0
    img2 = 390

    gray1 = obj.get_image_gray(img1)
    gray2 = obj.get_image_gray(img2)

    pose1 = obj.get_ptz(img1)
    pose2 = obj.get_ptz(img2)

    mask1 = obj.get_bounding_box_mask(img1)
    mask2 = obj.get_bounding_box_mask(img2)

    camera = obj.get_camera(0)

    keyframe1 = KeyFrame(gray1, img1, camera.camera_center,
                         camera.base_rotation, camera.principal_point[0],
                         camera.principal_point[1], pose1[0], pose1[1],
                         pose1[2])
    keyframe2 = KeyFrame(gray2, img2, camera.camera_center,
                         camera.base_rotation, camera.principal_point[0],
                         camera.principal_point[1], pose2[0], pose2[1],
                         pose2[2])

    kp1, des1 = detect_compute_sift(gray1, 100)
    after_removed_index1 = keypoints_masking(kp1, mask1)
    kp1 = list(np.array(kp1)[after_removed_index1])
    des1 = des1[after_removed_index1]

    kp2, des2 = detect_compute_sift(gray2, 100)
    after_removed_index2 = keypoints_masking(kp2, mask2)
    kp2 = list(np.array(kp2)[after_removed_index2])
    des2 = des2[after_removed_index2]

    keyframe1.feature_pts = kp1
    keyframe1.feature_des = des1

    keyframe2.feature_pts = kp2
    keyframe2.feature_des = des2

    kp1_inlier, index1, kp2_inlier, index2 = match_sift_features(
        kp1, des1, kp2, des2)

    cv.imshow(
        "test",
        draw_matches(obj.get_image(img1), obj.get_image(img2), kp1_inlier,
                     kp2_inlier))
    cv.waitKey(0)

    map = Map()
    """first frame"""
    for i in range(len(kp1)):
        theta, phi = TransFunction.from_image_to_ray(obj.u, obj.v, pose1[2],
                                                     pose1[0], pose1[1],
                                                     kp1[i].pt[0],
                                                     kp1[i].pt[1])

        map.global_ray.append(np.array([theta, phi]))

    keyframe1.landmark_index = np.array([i for i in range(len(kp1))])
    """second frame"""
    keyframe2.landmark_index = np.ndarray([len(kp2)], dtype=np.int32)
    for i in range(len(kp2_inlier)):
        keyframe2.landmark_index[index2[i]] = index1[i]

    kp2_outlier_index = list(set([i for i in range(len(des2))]) - set(index2))

    for i in range(len(kp2_outlier_index)):
        theta, phi = TransFunction.from_image_to_ray(
            obj.u, obj.v, pose2[2], pose2[0], pose2[1],
            kp2[kp2_outlier_index[i]].pt[0], kp2[kp2_outlier_index[i]].pt[1])
        map.global_ray.append(np.array([theta, phi]))

        keyframe2.landmark_index[kp2_outlier_index[i]] = len(
            map.global_ray) - 1

    map.keyframe_list.append(keyframe1)
    map.keyframe_list.append(keyframe2)

    pose_test = obj.get_ptz(142)

    optimized = relocalization_camera(map=map,
                                      img=obj.get_image_gray(142),
                                      pose=np.array([20, -16, 3000]))

    print(pose_test)
    print(optimized.x)
Esempio n. 4
0
class PtzSlam:
    def __init__(self):
        """
        :param annotation_path: path for annotation file
        :param bounding_box_path: path for player bounding box mat file
        :param image_path: path for image folder
        """

        # global rays and covariance matrix
        self.rays = np.ndarray([0, 2])
        self.state_cov = np.zeros([3, 3])

        # the information for previous frame: image matrix, keypoints and keypoints global index.
        self.previous_img = None
        self.previous_keypoints = None
        self.previous_keypoints_index = None

        # descriptor for rays
        self.des = np.ndarray([0, 128])

        # camera object for current frame
        self.current_camera = None

        # map
        self.keyframe_map = Map('sift')

        self.rf_map = RandomForestMap()

        # a camera list for whole sequence.
        self.cameras = []

        # speed of camera, for pan, tilt and focal length
        self.velocity = np.zeros(3)

        # state: whether current frame is new keyframe.
        self.new_keyframe = False

        # state: whether current frame is lost.
        self.tracking_lost = False

        # count for bad tracking frame number. If larger than a threshold, we say this frame is lost.
        self.bad_tracking_cnt = 0

        # hyper params soccer:300 basketball: 500
        self.keypoint_num = 500

        # previous set to 2
        self.observe_var = 0.1

        self.angle_var = 0.001
        self.f_var = 1

    def compute_h_jacobian(self, pan, tilt, focal_length, rays):
        """
        This function computes the jacobian matrix H for h(x).
        h(x) is the function from predicted state(camera pose and ray landmarks) to predicted observations.
        H helps to compute Kalman gain for the EKF.

        :param pan: pan angle of predicted camera pose
        :param tilt: tilt angle of predicted camera pose
        :param focal_length: focal length of predicted camera pose
        :param rays: predicted ray landmarks, [RayNumber * 2]
        :return: Jacobian matrix H, [2 * RayNumber, 3 + 2 * RayNumber]
        """
        ray_num = len(rays)

        delta_angle = 0.001
        delta_f = 0.1

        jacobi_h = np.zeros([2 * ray_num, 3 + 2 * ray_num])

        camera = copy.deepcopy(self.cameras[0])
        """use approximate method to compute partial derivative."""
        for i in range(ray_num):
            camera.set_ptz([pan - delta_angle, tilt, focal_length])
            x_delta_pan1, y_delta_pan1 = camera.project_ray(rays[i])

            camera.set_ptz([pan + delta_angle, tilt, focal_length])
            x_delta_pan2, y_delta_pan2 = camera.project_ray(rays[i])

            camera.set_ptz([pan, tilt - delta_angle, focal_length])
            x_delta_tilt1, y_delta_tilt1 = camera.project_ray(rays[i])

            camera.set_ptz([pan, tilt + delta_angle, focal_length])
            x_delta_tilt2, y_delta_tilt2 = camera.project_ray(rays[i])

            camera.set_ptz([pan, tilt, focal_length - delta_f])
            x_delta_f1, y_delta_f1 = camera.project_ray(rays[i])

            camera.set_ptz([pan, tilt, focal_length + delta_f])
            x_delta_f2, y_delta_f2 = camera.project_ray(rays[i])

            camera.set_ptz([pan, tilt, focal_length])
            x_delta_theta1, y_delta_theta1 = camera.project_ray(
                [rays[i, 0] - delta_angle, rays[i, 1]])
            x_delta_theta2, y_delta_theta2 = camera.project_ray(
                [rays[i, 0] + delta_angle, rays[i, 1]])
            x_delta_phi1, y_delta_phi1 = camera.project_ray(
                [rays[i, 0], rays[i, 1] - delta_angle])
            x_delta_phi2, y_delta_phi2 = camera.project_ray(
                [rays[i, 0], rays[i, 1] + delta_angle])

            jacobi_h[2 *
                     i][0] = (x_delta_pan2 - x_delta_pan1) / (2 * delta_angle)
            jacobi_h[2 * i][1] = (x_delta_tilt2 -
                                  x_delta_tilt1) / (2 * delta_angle)
            jacobi_h[2 * i][2] = (x_delta_f2 - x_delta_f1) / (2 * delta_f)

            jacobi_h[2 * i +
                     1][0] = (y_delta_pan2 - y_delta_pan1) / (2 * delta_angle)
            jacobi_h[2 * i + 1][1] = (y_delta_tilt2 -
                                      y_delta_tilt1) / (2 * delta_angle)
            jacobi_h[2 * i + 1][2] = (y_delta_f2 - y_delta_f1) / (2 * delta_f)

            for j in range(ray_num):
                """only j == i, the element of H is not zero.
                the partial derivative of one 2D point to a different landmark is always zero."""
                if j == i:
                    jacobi_h[2 *
                             i][3 +
                                2 * j] = (x_delta_theta2 -
                                          x_delta_theta1) / (2 * delta_angle)
                    jacobi_h[2 * i][3 + 2 * j +
                                    1] = (x_delta_phi2 -
                                          x_delta_phi1) / (2 * delta_angle)

                    jacobi_h[2 * i +
                             1][3 +
                                2 * j] = (y_delta_theta2 -
                                          y_delta_theta1) / (2 * delta_angle)
                    jacobi_h[2 * i + 1][3 + 2 * j +
                                        1] = (y_delta_phi2 -
                                              y_delta_phi1) / (2 * delta_angle)

        return jacobi_h

    def init_system(self, img, camera, bounding_box=None):
        """
        This function initializes tracking component.
        It is called: 1. At the first frame. 2. after relocalization
        :param img: image to initialize system.
        :param camera:  first camera pose to initialize system.
        :param bounding_box: first bounding box matrix (optional).
        """

        # step 1: detect keypoints from image
        # first_img_kp = detect_sift(img, self.keypoint_num)
        first_img_kp, first_des = detect_compute_sift_array(
            img, self.keypoint_num)
        # first_img_kp = detect_orb(img, 300)
        # first_img_kp = add_gauss(first_img_kp, 50, 1280, 720)

        # x1 = copy.copy(img)
        # x2 = copy.copy(img)
        #
        # visualize_points(img, first_img_kp, (255,0,0), 5)
        # cv.imshow("test", img)
        # cv.waitKey(0)

        # # for baseline2
        # delete_index = []
        # for i in range(first_img_kp.shape[0]):
        #     world_x, world_y, _ = camera.back_project_to_3d_point(first_img_kp[i, 0], first_img_kp[i, 1])
        #     # if world_x < 0 or world_x > 118 or world_y < 0 or world_y > 70:
        #     if world_x < 0 or world_x > 25 or world_y < 0 or world_y > 18:
        #         delete_index.append(i)
        # first_img_kp = np.delete(first_img_kp, delete_index, axis=0)
        # first_des = np.delete(first_des, delete_index, axis=0)

        # visualize_points(x1, first_img_kp, (255,0,0), 5)
        # cv.imshow("test2", x1)
        # cv.waitKey(0)

        # remove keypoints on players if bounding box mask is provided
        if bounding_box is not None:
            masked_index = keypoints_masking(first_img_kp, bounding_box)
            first_img_kp = first_img_kp[masked_index]
            first_des = first_des[masked_index]

        # visualize_points(x2, first_img_kp, (255,0,0), 5)
        # cv.imshow("test3", x2)
        # cv.waitKey(0)

        # step 2: back-project keypoint locations to rays by a known camera pose
        # use key points in first frame to get init rays
        init_rays = camera.back_project_to_rays(first_img_kp)

        # initialize rays
        self.rays = np.ndarray([0, 2])
        self.rays = np.row_stack([self.rays, init_rays])

        self.des = first_des

        # step 3: initialize convariance matrix of states
        # some parameters are manually selected
        # Note 0.001 and 1 are two parameters
        self.state_cov = self.angle_var * np.eye(3 + 2 * len(self.rays))
        self.state_cov[2][2] = self.f_var  # covariance for focal length

        # the previous frame information
        self.previous_img = img
        self.previous_keypoints = first_img_kp
        self.previous_keypoints_index = np.array(
            [i for i in range(len(self.rays))])

        # append the first camera to camera list
        self.cameras.append(camera)

    def ekf_update(self, observed_keypoints, observed_keypoint_index, height,
                   width):
        """
        This function update global rays and covariance matrix.
        @This function is important. Please add Math and add note for variables
        @ for example: y_k, dimension, y_k is xxxx in the equation xxx
        :param observed_keypoints: matched keypoint in that frame
        :param observed_keypoint_index: matched keypoint index in global ray
        :param height: image height
        :param width: image width
        """

        # step 1: get 2d points and indexes in all landmarks with predicted camera pose
        predicted_camera = self.current_camera
        predict_keypoints, predict_keypoint_index = predicted_camera.project_rays(
            self.rays, height, width)

        # step 2: an intersection of observed keypoints and predicted keypoints
        # compute y_k: residual
        overlap1, overlap2 = get_overlap_index(observed_keypoint_index,
                                               predict_keypoint_index)
        y_k = observed_keypoints[overlap1] - predict_keypoints[overlap2]
        y_k = y_k.flatten()  # to one dimension

        # index of inlier (frame-to-frame marching) rays that from previous frame to current frame
        matched_ray_index = observed_keypoint_index[overlap1]

        # p_index is the index of rows(or cols) in p which need to be update (part of p matrix!)
        # for example, p_index = [0,1,2(pose), 3,4(ray 1), 7,8(ray 3)] means get the first and third ray.
        # step 3: extract camera pose index, and ray index in the covariance matrix
        num_ray = len(matched_ray_index)
        pose_index = np.array([0, 1, 2])
        ray_index = np.zeros(num_ray * 2)
        for j in range(num_ray):
            ray_index[2 * j + 0], ray_index[2 * j + 1] = 2 * matched_ray_index[
                j] + 3 + 0, 2 * matched_ray_index[j] + 3 + 1
        pose_ray_index = np.concatenate((pose_index, ray_index), axis=0)
        pose_ray_index = pose_ray_index.astype(np.int32)
        predicted_cov = self.state_cov[pose_ray_index][:, pose_ray_index]
        assert predicted_cov.shape[0] == pose_ray_index.shape[
            0] and predicted_cov.shape[1] == pose_ray_index.shape[0]

        # compute jacobi
        updated_ray = self.rays[matched_ray_index.astype(int)]
        jacobi = self.compute_h_jacobian(
            pan=predicted_camera.pan,
            tilt=predicted_camera.tilt,
            focal_length=predicted_camera.focal_length,
            rays=updated_ray)
        # get Kalman gain
        r_k = self.observe_var * np.eye(
            2 * num_ray)  # todo 2 is a constant value
        s_k = np.dot(np.dot(jacobi, predicted_cov), jacobi.T) + r_k

        k_k = np.dot(np.dot(predicted_cov, jacobi.T), np.linalg.inv(s_k))

        # updated state estimate. The difference between the predicted states and the final states
        k_mul_y = np.dot(k_k, y_k)

        # update camera pose
        cur_camera = predicted_camera
        cur_camera.pan += k_mul_y[0]
        cur_camera.tilt += k_mul_y[1]
        cur_camera.focal_length += k_mul_y[2]

        self.current_camera = cur_camera  # redundant code as it is a reference

        # update speed model
        self.velocity = k_mul_y[0:3]

        # update global rays: overwrite updated ray to ray_global
        for j in range(num_ray):
            self.rays[int(
                matched_ray_index[j])][0:2] += k_mul_y[2 * j + 3:2 * j + 3 + 2]

        # update global p: overwrite updated p to the p_global
        update_p = np.dot(
            np.eye(3 + 2 * num_ray) - np.dot(k_k, jacobi), predicted_cov)
        self.state_cov[0:3, 0:3] = update_p[0:3, 0:3]
        for j in range(num_ray):
            row1 = 3 + 2 * int(matched_ray_index[j])
            row2 = row1 + 1
            for k in range(num_ray):
                col1 = 3 + 2 * int(matched_ray_index[k])
                col2 = col1 + 1
                self.state_cov[row1, col1] = update_p[3 + 2 * j, 3 + 2 * k]
                self.state_cov[row2, col2] = update_p[3 + 2 * j + 1,
                                                      3 + 2 * k + 1]

    def remove_rays(self, index):
        """
        remove_rays
        delete ransac outliers from global ray
        The ray is initialized by keypoint detection in the first frame.
        In the next frame, some of the keypoints are corrected matched as inliers,
        others are outliers. The outlier is associated with a ray, that ray will be removed
        Note the ray is different from the ray in the Map().

        :param index: index in rays to be removed
        """

        # delete ray_global
        delete_index = np.array(index)
        self.rays = np.delete(self.rays, delete_index, axis=0)
        self.des = np.delete(self.des, delete_index, axis=0)

        # delete p_global
        p_delete_index = np.ndarray([0])
        for j in range(len(delete_index)):
            p_delete_index = np.append(
                p_delete_index,
                np.array([2 * delete_index[j] + 3, 2 * delete_index[j] + 4]))

        self.state_cov = np.delete(self.state_cov, p_delete_index, axis=0)
        self.state_cov = np.delete(self.state_cov, p_delete_index, axis=1)

    def add_rays(self, img, bounding_box):
        """
        Detect new keypoints in the current frame and add associated rays.
        In each frame, a number of keypoints are detected. These keypoints will
        be associated with new rays (given the camera pose). These new rays are
        added to the global ray to maintain the number of visible rays in the image.
        Otherwise, the number of rays will drop.
        :param img: current image
        :param bounding_box: matrix same size as img. 0 is on players, 1 is out of players.
        :return: keypoints and corresponding global indexes
        """

        # get height width of image
        height, width = img.shape[0:2]

        # project global_ray to image. Get existing keypoints
        keypoints, keypoints_index = self.current_camera.project_rays(
            self.rays, height, width)

        # new_keypoints = detect_sift(img, self.keypoint_num)
        new_keypoints, new_des = detect_compute_sift_array(
            img, self.keypoint_num)
        # new_keypoints = detect_orb(img, 300)
        # new_keypoints = add_gauss(new_keypoints, 50, 1280, 720)

        # # for baseline2
        # delete_index = []
        # for i in range(new_keypoints.shape[0]):
        #     world_x, world_y, _ = self.current_camera.back_project_to_3d_point(new_keypoints[i, 0], new_keypoints[i, 1])
        #     # if world_x < 0 or world_x > 118 or world_y < 0 or world_y > 70:
        #     if world_x < 0 or world_x > 25 or world_y < 0 or world_y > 18:
        #         delete_index.append(i)
        # new_keypoints = np.delete(new_keypoints, delete_index, axis=0)
        # new_des = np.delete(new_des, delete_index, axis=0)

        # remove keypoints in player bounding boxes
        if bounding_box is not None:
            bounding_box_mask_index = keypoints_masking(
                new_keypoints, bounding_box)
            new_keypoints = new_keypoints[bounding_box_mask_index]
            new_des = new_des[bounding_box_mask_index]

        # remove keypoints near existing keypoints
        mask = np.ones(img.shape[0:2], np.uint8)

        for j in range(len(keypoints)):
            x, y = keypoints[j]
            up_bound = int(max(0, y - 50))
            low_bound = int(min(height, y + 50))
            left_bound = int(max(0, x - 50))
            right_bound = int(min(width, x + 50))
            mask[up_bound:low_bound, left_bound:right_bound] = 0

        existing_keypoints_mask_index = keypoints_masking(new_keypoints, mask)
        new_keypoints = new_keypoints[existing_keypoints_mask_index]
        new_des = new_des[existing_keypoints_mask_index]

        # check if exist new keypoints after masking.
        if new_keypoints is not None:
            new_rays = self.current_camera.back_project_to_rays(new_keypoints)

            # add new ray to ray_global, and add new rows and cols to p_global
            for j in range(len(new_rays)):
                self.rays = np.row_stack([self.rays, new_rays[j]])
                self.des = np.row_stack([self.des, new_des[j]])
                self.state_cov = np.row_stack(
                    [self.state_cov,
                     np.zeros([2, self.state_cov.shape[1]])])
                self.state_cov = np.column_stack(
                    [self.state_cov,
                     np.zeros([self.state_cov.shape[0], 2])])
                self.state_cov[self.state_cov.shape[0] - 2,
                               self.state_cov.shape[1] - 2] = self.angle_var
                self.state_cov[self.state_cov.shape[0] - 1,
                               self.state_cov.shape[1] - 1] = self.angle_var
                keypoints_index = np.append(keypoints_index,
                                            len(self.rays) - 1)

            keypoints = np.concatenate([keypoints, new_keypoints], axis=0)

        return keypoints, keypoints_index

    def tracking(self, next_img, bad_tracking_percentage, bounding_box=None):
        """
        This is function for tracking using sparse optical flow matching.
        :param next_img: image for next tracking frame
        :param bounding_box: bounding box matrix (optional)
        """

        inlier_keypoints, inlier_index, outlier_index = matching_and_ransac(
            self.previous_img, next_img, self.previous_keypoints,
            self.previous_keypoints_index)

        # inlier_keypoints = add_gauss(inlier_keypoints, 50, 1280, 720)

        # compute inlier percentage as the measurement for tracking quality
        tracking_percentage = len(inlier_index) / len(
            self.previous_keypoints) * 100
        if tracking_percentage < bad_tracking_percentage:
            self.bad_tracking_cnt += 1

        if self.bad_tracking_cnt > 3:
            # if len(self.rf_map.keyframe_list) > 8:
            self.tracking_lost = True
            self.bad_tracking_cnt = 0
        """
        ===============================
        1. predict step
        ===============================
        """

        # update camera pose with constant speed model
        self.current_camera = copy.deepcopy(self.cameras[-1])
        self.current_camera.set_ptz(self.current_camera.get_ptz() +
                                    self.velocity)

        if not self.tracking_lost:
            self.cameras.append(self.current_camera)

        # update p_global
        q_k = 5 * np.diag([self.angle_var, self.angle_var, self.f_var])
        self.state_cov[0:3, 0:3] = self.state_cov[0:3, 0:3] + q_k
        """
        ===============================
        2. update step
        ===============================
        """

        height, width = next_img.shape[0:2]
        self.ekf_update(inlier_keypoints, inlier_index, height, width)
        """
        ===============================
        3. delete outlier_index
        ===============================
        """

        self.remove_rays(outlier_index)
        """
        ===============================
        4.  add new features & update previous frame
        ===============================
        """

        self.previous_img = next_img
        self.previous_keypoints, self.previous_keypoints_index = self.add_rays(
            next_img, bounding_box)

        print("tracking", tracking_percentage)

        # if tracking_percentage > bad_tracking_percentage:
        # basketball set to (10, 25), soccer maybe (10, 15)
        if self.keyframe_map.good_new_keyframe(self.current_camera.get_ptz(),
                                               10, 15):
            self.new_keyframe = True

        # if self.rf_map.good_keyframe(self.current_camera.get_ptz(), 10, 15):
        #     self.new_keyframe = True

    def relocalize(self, img, camera, enable_rf=False, bounding_box=None):
        """
        :param img: image to relocalize
        :param camera: lost camera to relocaize
        :return: camera after relocalize
        """

        if enable_rf:
            c = camera.camera_center
            r = camera.base_rotation
            u = camera.principal_point[0]
            v = camera.principal_point[1]
            pan = camera.pan
            tilt = camera.tilt
            focal_length = camera.focal_length
            relocalize_frame = KeyFrame(img, -1, c, r, u, v, pan, tilt,
                                        focal_length)

            kp, des = detect_compute_sift_array(img, 500)

            if bounding_box is not None:
                masked_index = keypoints_masking(kp, bounding_box)
                kp = kp[masked_index]
                des = des[masked_index]

            relocalize_frame.feature_pts = kp
            relocalize_frame.feature_des = des

            ptz = self.rf_map.relocalize(relocalize_frame,
                                         [pan, tilt, focal_length])
            camera.set_ptz(ptz)

        else:
            if len(self.keyframe_map.keyframe_list) > 1:
                lost_pose = camera.pan, camera.tilt, camera.focal_length
                relocalize_pose = relocalization_camera(
                    self.keyframe_map, img, lost_pose)
                camera.set_ptz(relocalize_pose)
            else:
                print("Warning: Not enough keyframes for relocalization.")

        self.tracking_lost = False

        return camera

    def add_keyframe(self, img, camera, frame_index, enable_rf=False):
        """
        add new key frame.
        @todo now have not changed the KeyFrame's parameter to camera object.
        @todo Many places need to be changed if this change.
        :param img: image
        :param camera: camera object for key frame
        :param frame_index: frame index in sequence
        """
        c = camera.camera_center
        r = camera.base_rotation
        u = camera.principal_point[0]
        v = camera.principal_point[1]
        pan = camera.pan
        tilt = camera.tilt
        focal_length = camera.focal_length

        new_keyframe = KeyFrame(img, frame_index, c, r, u, v, pan, tilt,
                                focal_length)

        if enable_rf:
            # new_keyframe.feature_pts, new_keyframe.feature_des = detect_compute_sift_array(img, 1500)
            new_keyframe.feature_pts = self.previous_keypoints
            new_keyframe.feature_des = self.des[
                self.previous_keypoints_index.astype(np.int)]

            self.rf_map.add_keyframe(new_keyframe)
            self.new_keyframe = False

        else:
            if frame_index == 0:
                self.keyframe_map.add_first_keyframe(new_keyframe,
                                                     verbose=True)
            else:
                self.keyframe_map.add_keyframe_with_ba(new_keyframe,
                                                       "./bundle_result/",
                                                       verbose=True)
                self.new_keyframe = False