def detect_and_describe(self, image: Image) -> Tuple[Keypoints, np.ndarray]: """Jointly generate keypoint detections and their associated descriptors from a single image.""" # TODO(ayushbaid): fix inference issue #110 device = torch.device("cuda" if self._use_cuda else "cpu") model = SuperPoint(self._config).to(device) model.eval() # Compute features. image_tensor = torch.from_numpy( np.expand_dims( image_utils.rgb_to_gray_cv(image).value_array.astype( np.float32) / 255.0, (0, 1))).to(device) with torch.no_grad(): model_results = model({"image": image_tensor}) torch.cuda.empty_cache() # Unpack results. coordinates = model_results["keypoints"][0].detach().cpu().numpy() scores = model_results["scores"][0].detach().cpu().numpy() keypoints = Keypoints(coordinates, scales=None, responses=scores) descriptors = model_results["descriptors"][0].detach().cpu().numpy().T # Filter features. if image.mask is not None: keypoints, valid_idxs = keypoints.filter_by_mask(image.mask) descriptors = descriptors[valid_idxs] keypoints, selection_idxs = keypoints.get_top_k(self.max_keypoints) descriptors = descriptors[selection_idxs] return keypoints, descriptors
def detect_and_describe(self, image: Image) -> Tuple[Keypoints, np.ndarray]: """Perform feature detection as well as their description. Refer to detect() in DetectorBase and describe() in DescriptorBase for details about the output format. Args: image: the input image. Returns: Detected keypoints, with length N <= max_keypoints. Corr. descriptors, of shape (N, D) where D is the dimension of each descriptor. """ # Convert to grayscale. gray_image = image_utils.rgb_to_gray_cv(image) # Create OpenCV object. opencv_obj = cv.ORB_create() # Run the OpenCV code. cv_keypoints, descriptors = opencv_obj.detectAndCompute( gray_image.value_array, image.mask) # Convert to GTSFM's keypoints. keypoints = feature_utils.cast_to_gtsfm_keypoints(cv_keypoints) # Filter features. keypoints, selection_idxs = keypoints.get_top_k(self.max_keypoints) descriptors = descriptors[selection_idxs] return keypoints, descriptors
def detect(self, image: Image) -> Keypoints: """Detect the features in an image. Args: image: input image. Returns: detected keypoints, with maximum length of max_keypoints. """ # init the opencv object opencv_obj = cv.SIFT_create() gray_image = image_utils.rgb_to_gray_cv(image) cv_keypoints = opencv_obj.detect(gray_image.value_array, None) keypoints = feature_utils.cast_to_gtsfm_keypoints(cv_keypoints) # limit number of keypoints keypoints, _ = keypoints.get_top_k(self.max_keypoints) return keypoints
def detect_and_describe(self, image: Image) -> Tuple[Keypoints, np.ndarray]: """Perform feature detection as well as their description. Refer to detect() in DetectorBase and describe() in DescriptorBase for details about the output format. Args: image: the input image. Returns: Detected keypoints, with length N <= max_keypoints. Corr. descriptors, of shape (N, D) where D is the dimension of each descriptor. """ # conert to grayscale gray_image = image_utils.rgb_to_gray_cv(image) # Creating OpenCV object opencv_obj = cv.SIFT_create() # Run the opencv code cv_keypoints, descriptors = opencv_obj.detectAndCompute( gray_image.value_array, None) # convert to GTSFM's keypoints keypoints = feature_utils.cast_to_gtsfm_keypoints(cv_keypoints) # sort the features and descriptors by the score # (need to sort here as we need the sorting order for descriptors) sort_idx = np.argsort(-keypoints.responses)[:self.max_keypoints] keypoints = Keypoints( coordinates=keypoints.coordinates[sort_idx], scales=keypoints.scales[sort_idx], responses=keypoints.responses[sort_idx], ) descriptors = descriptors[sort_idx] return keypoints, descriptors
def describe(self, image: Image, keypoints: Keypoints) -> np.ndarray: """Assign descriptors to detected features in an image. Arguments: image: the input image. keypoints: the keypoints to describe, of length N. Returns: Descriptors for the input features, of shape (N, D) where D is the dimension of each descriptor. """ if len(keypoints) == 0: return np.array([]) gray_image = image_utils.rgb_to_gray_cv(image) opencv_obj = cv.SIFT_create() # TODO(ayush): what to do about new set of keypoints _, descriptors = opencv_obj.compute( gray_image.value_array, keypoints.cast_to_opencv_keypoints()) return descriptors
def get_nonzero_intensity_mask( img: Image, eps: int = 5, kernel_size: Tuple[int, int] = (15, 15)) -> np.ndarray: """Generate mask of where image intensity values are non-zero. After thresholding the image, we use an erosion kernel to add a buffer between the foreground and background. Args: img: input Image to be masked (values in range [0, 255]). eps: minimum allowable intensity value, i.e., values below this value will be masked out. kernel_size: size of erosion kernel. Returns: Mask (as an integer array) of Image where with a value of 1 where the intensity value is above `eps` and 0 otherwise. """ gray_image = image_utils.rgb_to_gray_cv(img) _, binary_image = cv.threshold(gray_image.value_array, eps, 255, cv.THRESH_BINARY) mask = cv.erode(binary_image, np.ones(kernel_size, np.uint8)) // 255 return mask