예제 #1
0
파일: superpoint.py 프로젝트: borglab/gtsfm
    def detect_and_describe(self,
                            image: Image) -> Tuple[Keypoints, np.ndarray]:
        """Jointly generate keypoint detections and their associated descriptors from a single image."""
        # TODO(ayushbaid): fix inference issue #110
        device = torch.device("cuda" if self._use_cuda else "cpu")
        model = SuperPoint(self._config).to(device)
        model.eval()

        # Compute features.
        image_tensor = torch.from_numpy(
            np.expand_dims(
                image_utils.rgb_to_gray_cv(image).value_array.astype(
                    np.float32) / 255.0, (0, 1))).to(device)
        with torch.no_grad():
            model_results = model({"image": image_tensor})
        torch.cuda.empty_cache()

        # Unpack results.
        coordinates = model_results["keypoints"][0].detach().cpu().numpy()
        scores = model_results["scores"][0].detach().cpu().numpy()
        keypoints = Keypoints(coordinates, scales=None, responses=scores)
        descriptors = model_results["descriptors"][0].detach().cpu().numpy().T

        # Filter features.
        if image.mask is not None:
            keypoints, valid_idxs = keypoints.filter_by_mask(image.mask)
            descriptors = descriptors[valid_idxs]
        keypoints, selection_idxs = keypoints.get_top_k(self.max_keypoints)
        descriptors = descriptors[selection_idxs]

        return keypoints, descriptors
예제 #2
0
파일: orb.py 프로젝트: borglab/gtsfm
    def detect_and_describe(self,
                            image: Image) -> Tuple[Keypoints, np.ndarray]:
        """Perform feature detection as well as their description.

        Refer to detect() in DetectorBase and describe() in DescriptorBase for details about the output format.

        Args:
            image: the input image.

        Returns:
            Detected keypoints, with length N <= max_keypoints.
            Corr. descriptors, of shape (N, D) where D is the dimension of each descriptor.
        """

        # Convert to grayscale.
        gray_image = image_utils.rgb_to_gray_cv(image)

        # Create OpenCV object.
        opencv_obj = cv.ORB_create()

        # Run the OpenCV code.
        cv_keypoints, descriptors = opencv_obj.detectAndCompute(
            gray_image.value_array, image.mask)

        # Convert to GTSFM's keypoints.
        keypoints = feature_utils.cast_to_gtsfm_keypoints(cv_keypoints)

        # Filter features.
        keypoints, selection_idxs = keypoints.get_top_k(self.max_keypoints)
        descriptors = descriptors[selection_idxs]

        return keypoints, descriptors
예제 #3
0
파일: dog.py 프로젝트: borglab/gtsfm
    def detect(self, image: Image) -> Keypoints:
        """Detect the features in an image.

        Args:
            image: input image.

        Returns:
            detected keypoints, with maximum length of max_keypoints.
        """
        # init the opencv object
        opencv_obj = cv.SIFT_create()

        gray_image = image_utils.rgb_to_gray_cv(image)
        cv_keypoints = opencv_obj.detect(gray_image.value_array, None)
        keypoints = feature_utils.cast_to_gtsfm_keypoints(cv_keypoints)

        # limit number of keypoints
        keypoints, _ = keypoints.get_top_k(self.max_keypoints)

        return keypoints
예제 #4
0
    def detect_and_describe(self,
                            image: Image) -> Tuple[Keypoints, np.ndarray]:
        """Perform feature detection as well as their description.

        Refer to detect() in DetectorBase and describe() in DescriptorBase for details about the output format.

        Args:
            image: the input image.

        Returns:
            Detected keypoints, with length N <= max_keypoints.
            Corr. descriptors, of shape (N, D) where D is the dimension of each descriptor.
        """

        # conert to grayscale
        gray_image = image_utils.rgb_to_gray_cv(image)

        # Creating OpenCV object
        opencv_obj = cv.SIFT_create()

        # Run the opencv code
        cv_keypoints, descriptors = opencv_obj.detectAndCompute(
            gray_image.value_array, None)

        # convert to GTSFM's keypoints
        keypoints = feature_utils.cast_to_gtsfm_keypoints(cv_keypoints)

        # sort the features and descriptors by the score
        # (need to sort here as we need the sorting order for descriptors)
        sort_idx = np.argsort(-keypoints.responses)[:self.max_keypoints]

        keypoints = Keypoints(
            coordinates=keypoints.coordinates[sort_idx],
            scales=keypoints.scales[sort_idx],
            responses=keypoints.responses[sort_idx],
        )

        descriptors = descriptors[sort_idx]

        return keypoints, descriptors
예제 #5
0
    def describe(self, image: Image, keypoints: Keypoints) -> np.ndarray:
        """Assign descriptors to detected features in an image.

        Arguments:
            image: the input image.
            keypoints: the keypoints to describe, of length N.

        Returns:
            Descriptors for the input features, of shape (N, D) where D is the dimension of each descriptor.
        """
        if len(keypoints) == 0:
            return np.array([])

        gray_image = image_utils.rgb_to_gray_cv(image)

        opencv_obj = cv.SIFT_create()

        # TODO(ayush): what to do about new set of keypoints
        _, descriptors = opencv_obj.compute(
            gray_image.value_array, keypoints.cast_to_opencv_keypoints())

        return descriptors
예제 #6
0
def get_nonzero_intensity_mask(
    img: Image,
    eps: int = 5,
    kernel_size: Tuple[int, int] = (15, 15)) -> np.ndarray:
    """Generate mask of where image intensity values are non-zero.

    After thresholding the image, we use an erosion kernel to add a buffer between the foreground and background.

    Args:
        img: input Image to be masked (values in range [0, 255]).
        eps: minimum allowable intensity value, i.e., values below this value will be masked out.
        kernel_size: size of erosion kernel.

    Returns:
        Mask (as an integer array) of Image where with a value of 1 where the intensity value is above `eps` and 0
        otherwise.
    """
    gray_image = image_utils.rgb_to_gray_cv(img)
    _, binary_image = cv.threshold(gray_image.value_array, eps, 255,
                                   cv.THRESH_BINARY)
    mask = cv.erode(binary_image, np.ones(kernel_size, np.uint8)) // 255

    return mask