def forward(self, laf: torch.Tensor, img: torch.Tensor) -> torch.Tensor: # type: ignore """ Args: laf: (torch.Tensor), shape [BxNx2x3] img: (torch.Tensor), shape [Bx1xHxW] Returns: laf_out: (torch.Tensor), shape [BxNx2x3] """ raise_error_if_laf_is_not_valid(laf) img_message: str = "Invalid img shape, we expect BxCxHxW. Got: {}".format( img.shape) if not torch.is_tensor(img): raise TypeError("img type is not a torch.Tensor. Got {}".format( type(img))) if len(img.shape) != 4: raise ValueError(img_message) if laf.size(0) != img.size(0): raise ValueError( "Batch size of laf and img should be the same. Got {}, {}". format(img.size(0), laf.size(0))) B, N = laf.shape[:2] patches: torch.Tensor = extract_patches_from_pyramid( img, laf, self.patch_size).view(-1, 1, self.patch_size, self.patch_size) angles_radians: torch.Tensor = self.angle_detector(patches).view(B, N) rotmat: torch.Tensor = angle_to_rotation_matrix( rad2deg(angles_radians)).view(B * N, 2, 2) laf_out: torch.Tensor = torch.cat([ torch.bmm(make_upright(laf).view(B * N, 2, 3)[:, :2, :2], rotmat), laf.view(B * N, 2, 3)[:, :2, 2:] ], dim=2).view(B, N, 2, 3) return laf_out
def forward(self, laf: torch.Tensor, img: torch.Tensor) -> torch.Tensor: # type: ignore """ Args: laf: (torch.Tensor) shape [BxNx2x3] img: (torch.Tensor) shape [Bx1xHxW] Returns: laf_out: (torch.Tensor) shape [BxNx2x3]""" raise_error_if_laf_is_not_valid(laf) img_message: str = "Invalid img shape, we expect BxCxHxW. Got: {}".format( img.shape) if not torch.is_tensor(img): raise TypeError("img type is not a torch.Tensor. Got {}".format( type(img))) if len(img.shape) != 4: raise ValueError(img_message) if laf.size(0) != img.size(0): raise ValueError( "Batch size of laf and img should be the same. Got {}, {}". format(img.size(0), laf.size(0))) B, N = laf.shape[:2] PS: int = self.patch_size patches: torch.Tensor = extract_patches_from_pyramid( img, make_upright(laf), PS, True).view(-1, 1, PS, PS) ellipse_shape: torch.Tensor = self.affine_shape_detector(patches) ellipses = torch.cat( [laf.view(-1, 2, 3)[..., 2].unsqueeze(1), ellipse_shape], dim=2).view(B, N, 5) scale_orig = get_laf_scale(laf) laf_out = ellipse_to_laf(ellipses) ellipse_scale = get_laf_scale(laf_out) laf_out = scale_laf(laf_out, scale_orig / ellipse_scale) return laf_out
def forward(self, laf: torch.Tensor, img: torch.Tensor) -> torch.Tensor: """ Args: laf: shape [BxNx2x3] img: shape [Bx1xHxW] Returns: laf_out, shape [BxNx2x3] """ raise_error_if_laf_is_not_valid(laf) img_message: str = "Invalid img shape, we expect BxCxHxW. Got: {}".format( img.shape) if not isinstance(img, torch.Tensor): raise TypeError("img type is not a torch.Tensor. Got {}".format( type(img))) if len(img.shape) != 4: raise ValueError(img_message) if laf.size(0) != img.size(0): raise ValueError( "Batch size of laf and img should be the same. Got {}, {}". format(img.size(0), laf.size(0))) B, N = laf.shape[:2] patches: torch.Tensor = extract_patches_from_pyramid( img, laf, self.patch_size).view(-1, 1, self.patch_size, self.patch_size) angles_radians: torch.Tensor = self.angle_detector(patches).view(B, N) prev_angle = get_laf_orientation(laf).view_as(angles_radians) laf_out: torch.Tensor = set_laf_orientation( laf, rad2deg(angles_radians) + prev_angle) return laf_out
def extract_features(img_fname, detector, affine, descriptor, device, visualize=False): img = cv2.cvtColor(cv2.imread(img_fname), cv2.COLOR_BGR2RGB) if visualize: plt.imshow(img) kpts = detector.detect(img, None)[:8000] # We will not train anything, so let's save time and memory by no_grad() with torch.no_grad(): timg = K.image_to_tensor(img, False).float() / 255. timg = timg.to(device) timg_gray = K.rgb_to_grayscale(timg) # kornia expects keypoints in the local affine frame format. # Luckily, kornia_moons has a conversion function lafs = laf_from_opencv_SIFT_kpts(kpts, device=device) lafs_new = affine(lafs, timg_gray) if visualize: visualize_LAF(timg, lafs_new, 0) patches = KF.extract_patches_from_pyramid(timg_gray, lafs_new, 32) B, N, CH, H, W = patches.size() # Descriptor accepts standard tensor [B, CH, H, W], while patches are [B, N, CH, H, W] shape # So we need to reshape a bit :) descs = descriptor(patches.view(B * N, CH, H, W)).view(B * N, -1).detach().cpu().numpy() return kpts, descs, img
def test_same(self, device, dtype): B, C, H, W = 1, 3, 64, 64 PS = 16 img = torch.rand(B, C, H, W, device=device, dtype=dtype) img_gray = kornia.color.rgb_to_grayscale(img) centers = torch.tensor([[H / 3.0, W / 3.0], [2.0 * H / 3.0, W / 2.0]], device=device, dtype=dtype).view(1, 2, 2) scales = torch.tensor([(H + W) / 4.0, (H + W) / 8.0], device=device, dtype=dtype).view(1, 2, 1, 1) ori = torch.tensor([0.0, 30.0], device=device, dtype=dtype).view(1, 2, 1) lafs = kornia.feature.laf_from_center_scale_ori(centers, scales, ori) sift = SIFTDescriptor(PS).to(device, dtype) descs_test_from_rgb = get_laf_descriptors(img, lafs, sift, PS, True) descs_test_from_gray = get_laf_descriptors(img_gray, lafs, sift, PS, True) patches = extract_patches_from_pyramid(img_gray, lafs, PS) B1, N1, CH1, H1, W1 = patches.size() # Descriptor accepts standard tensor [B, CH, H, W], while patches are [B, N, CH, H, W] shape # So we need to reshape a bit :) descs_reference = sift(patches.view(B1 * N1, CH1, H1, W1)).view(B1, N1, -1) assert_close(descs_test_from_rgb, descs_reference) assert_close(descs_test_from_gray, descs_reference)
def get_local_descriptors(img, cv2_sift_kpts, kornia_descriptor, aff): #We will not train anything, so let's save time and memory by no_grad() with torch.no_grad(): timg = K.color.rgb_to_grayscale(K.image_to_tensor(img, False)) / 255. timg = timg.cuda() lafs = laf_from_opencv_SIFT_kpts(cv2_sift_kpts).cuda() angles = KF.laf.get_laf_orientation(lafs) # We will estimate affine shape of the feature and re-orient the keypoints with the OriNet lafs_new = aff(lafs, timg) patches = KF.extract_patches_from_pyramid(timg, lafs_new, 32) B, N, CH, H, W = patches.size() # Descriptor accepts standard tensor [B, CH, H, W], while patches are [B, N, CH, H, W] shape # So we need to reshape a bit :) descs = kornia_descriptor(patches.view(B * N, CH, H, W)).view(B * N, -1) return descs.detach().cpu().numpy()
def test_same(self, device, dtype): B, C, H, W = 1, 1, 64, 64 PS = 16 img = torch.rand(B, C, H, W, device=device, dtype=dtype) det = ScaleSpaceDetector(10) desc = SIFTDescriptor(PS) local_feature = LocalFeature(det, LAFDescriptor(desc, PS)).to(device, dtype) lafs, responses, descs = local_feature(img) lafs1, responses1 = det(img) assert_close(lafs, lafs1) assert_close(responses, responses1) patches = extract_patches_from_pyramid(img, lafs1, PS) B1, N1, CH1, H1, W1 = patches.size() # Descriptor accepts standard tensor [B, CH, H, W], while patches are [B, N, CH, H, W] shape # So we need to reshape a bit :) descs1 = desc(patches.view(B1 * N1, CH1, H1, W1)).view(B1, N1, -1) assert_close(descs, descs1)
def extract_features(self, im): kpts = self.det.detect(im, None) # We will not train anything, so let's save time and memory by no_grad() with torch.no_grad(): timg = K.image_to_tensor(im, False).float() / 255. timg = timg.to(self.device) if timg.shape[1] == 3: timg_gray = K.rgb_to_grayscale(timg) else: timg_gray = timg # kornia expects keypoints in the local affine frame format. # Luckily, kornia_moons has a conversion function lafs = laf_from_opencv_SIFT_kpts(kpts, device=self.device) lafs_new = self.aff(lafs, timg_gray) patches = KF.extract_patches_from_pyramid(timg_gray, lafs_new, 32) B, N, CH, H, W = patches.size() # Descriptor accepts standard tensor [B, CH, H, W], while patches are [B, N, CH, H, W] shape # So we need to reshape a bit :) descs = self.desc(patches.view(B * N, CH, H, W)).view(B * N, -1).detach().cpu().numpy() kpts = np.array([[kp.pt[0], kp.pt[1]] for kp in kpts]) return kpts, descs
def forward(self, laf: torch.Tensor, img: torch.Tensor) -> torch.Tensor: """ Args: laf: (torch.Tensor) shape [BxNx2x3] img: (torch.Tensor) shape [Bx1xHxW] Returns: torch.Tensor: laf_out shape [BxNx2x3]""" raise_error_if_laf_is_not_valid(laf) img_message: str = "Invalid img shape, we expect BxCxHxW. Got: {}".format( img.shape) if not torch.is_tensor(img): raise TypeError("img type is not a torch.Tensor. Got {}".format( type(img))) if len(img.shape) != 4: raise ValueError(img_message) if laf.size(0) != img.size(0): raise ValueError( "Batch size of laf and img should be the same. Got {}, {}". format(img.size(0), laf.size(0))) B, N = laf.shape[:2] PS: int = self.patch_size patches: torch.Tensor = extract_patches_from_pyramid( img, make_upright(laf), PS, True).view(-1, 1, PS, PS) xy = self.features(self._normalize_input(patches)).view(-1, 3) a1 = torch.cat( [1.0 + xy[:, 0].reshape(-1, 1, 1), 0 * xy[:, 0].reshape(-1, 1, 1)], dim=2) a2 = torch.cat( [xy[:, 1].reshape(-1, 1, 1), 1.0 + xy[:, 2].reshape(-1, 1, 1)], dim=2) new_laf_no_center = torch.cat([a1, a2], dim=1).reshape(B, N, 2, 2) new_laf = torch.cat([new_laf_no_center, laf[:, :, :, 2:3]], dim=3) scale_orig = get_laf_scale(laf) ellipse_scale = get_laf_scale(new_laf) laf_out = scale_laf(make_upright(new_laf), scale_orig / ellipse_scale) return laf_out