def draw_corspd_region_torch(img0, img1, H): """ img0: [3, H_0, W_0] img1: [3, H_1, W_1] """ img0 = (tonumpy(unnormalize(img0)) * 255.).astype(np.uint8) # [H0, W0, 3] img1 = (tonumpy(unnormalize(img1)) * 255.).astype(np.uint8) # [H1, W1, 3] if isinstance(H, torch.Tensor): H = H.detach().cpu().numpy() h1, w1 = img1.shape[:2] pts1 = np.array( [[0, 0], [0, h1], [w1, h1], [w1, 0]] ).astype(np.float32) pts0 = cv2.perspectiveTransform(np.reshape(pts1, [1, -1, 2]), np.linalg.inv(H))[0] # draw the corresponding region on the image pts0 = pts0.astype(np.int32) img0 = cv2.polylines(img0.copy(), [pts0.reshape([-1, 2])], True, (255, 0, 0), thickness=5) pts1 = pts1.astype(np.int32) img1 = cv2.polylines(img1.copy(), [pts1.reshape([-1, 2])], True, (255, 0, 0), thickness=5) return np.concatenate([img0, img1], axis=1)
def get_tensorboard_data(self, num_kps=20): """ This processes the data needed for visualization. It expects the follow- ing in self.logger - image0: (C, H, W), Tensor, normalized - image1: (C, H, W), Tensor, normalized - scale: (H, W), Tensor - scale_pred: (3, H', W'), Tensor And it returns a dictionary - img0: image 1, (3, H, W) - img1: image 2, (3, H, W) - scale: (3, H, W) """ # original images image0 = self.logger['image0'] image1 = self.logger['image1'] # scale ratio of right image to left image scale_pred = self.logger['scale_pred'] num_cls = scale_pred.shape[0] scale_pred = torch.argmax(scale_pred, dim=0).long() scale_pred = cls2RGB(scale_pred, num_cls) scale = self.logger['scale'] scale[scale > 1.5] = 2 scale[scale < 0.75] = 0 scale[(scale >= 0.75) * (scale <= 1.5)] = 1 scale = scale.long() scale = cls2RGB(scale, num_cls) # region that has corresponding pixels msk = self.logger['msk'] # process the images image0 = unnormalize(image0) image1 = unnormalize(image1) return { 'img0': image0, 'img1': image1, 'scale_pred': scale_pred, 'msk': msk, 'scale': scale }
def forward(self, images): """ images0: [N, 3, H, W] images1: [N, 3, H, W] targets: {"img2": [N, 3, H, W], "kps0": [N, 3000], "kps1": [N, 3000], "kps2": [N, 3000]} """ transform = lambda x: touint8(tonumpy_batch(unnormalize(x))) desc_trans = lambda x: totensor_batch(x) images = transform(images) N, H, W, C = images.shape descs = [] for i in range(N): # shape (2, H, W) keypoints = np.mgrid[:H, :W] # reverse x and y, reshape to (H*W, 2) keypoints = keypoints[::-1].transpose(1, 2, 0).reshape(-1, 2) # opencv keypoints keypoints = [cv2.KeyPoint(x, y, 0) for (x, y) in keypoints] _, desc = self.daisy.compute(images[i], keypoints) desc = desc.reshape(H, W, -1) descs.append(desc) # recombine into batches descs = desc_trans(np.stack(descs, axis=0)) descs = F.normalize(descs) return descs
def forward(self, x): """ Forward pass that jointly computes unprocessed point and descriptor tensors. Input x: Image pytorch tensor shaped N x 1 x H x W. Output semi: Output point pytorch tensor shaped N x 65 x H/8 x W/8. desc: Output descriptor pytorch tensor shaped N x 256 x H/8 x W/8. """ x = unnormalize(x) x = (0.299 * x[:, 0] + 0.587 * x[:, 1] + 0.114 * x[:, 2]).unsqueeze(1).cuda() # Shared Encoder. x = self.relu(self.conv1a(x)) x = self.relu(self.conv1b(x)) x = self.pool(x) x = self.relu(self.conv2a(x)) x = self.relu(self.conv2b(x)) x = self.pool(x) x = self.relu(self.conv3a(x)) x = self.relu(self.conv3b(x)) x = self.pool(x) x = self.relu(self.conv4a(x)) x = self.relu(self.conv4b(x)) # Descriptor Head. cDa = self.relu(self.convDa(x)) desc = self.convDb(cDa) dn = torch.norm(desc, p=2, dim=1) # Compute the norm. desc = desc.div(torch.unsqueeze(dn, 1)) # Divide by norm to normalize. # extra things # desc = F.normalize(F.interpolate(desc, (h, w), mode="bilinear"), dim=1) return desc
def draw_img_desc_torch(img, desc): """ img: [3, H, W] desc: [C, H, W] """ img = (tonumpy(unnormalize(img)) * 255.).astype(np.uint8) # [H, W, 3] desc = (desc2RGB(tonumpy(desc)) * 255.).astype(np.uint8) # [H, W, 3] h, w = img.shape[:2] desc = cv2.resize(desc.copy(), (w, h), interpolation=cv2.INTER_LINEAR) return np.concatenate([img, desc], axis=1)
def draw_paired_img_desc_torch(img0, desc0, kps0, img1, desc1, kps1, H): """ img0: [3, H_0, W_0] desc0: [C, H_0', W_0'] kps0: [N, 2] img1: [3, H_1, W_1] desc1: [C, H_1', W_1'] kps1: [N, 2] H: [3, 3] """ img0 = (tonumpy(unnormalize(img0)) * 255.).astype(np.uint8) # [H, W, 3] desc0 = (desc2RGB(tonumpy(desc0)) * 255.).astype(np.uint8) # [H, W, 3] kps0 = kps0.detach().cpu().numpy() img1 = (tonumpy(unnormalize(img1)) * 255.).astype(np.uint8) desc1 = (desc2RGB(tonumpy(desc1)) * 255.).astype(np.uint8) kps1 = kps1.detach().cpu().numpy() # compute the corresponding region H = H.detach().cpu().numpy() h1, w1 = img1.shape[:2] pts1 = np.array( [[0, 0], [0, h1], [w1, h1], [w1, 0]] ).astype(np.float32) pts0 = cv2.perspectiveTransform(np.reshape(pts1, [1, -1, 2]), np.linalg.inv(H))[0] # draw the corresponding region on the image pts0 = pts0.astype(np.int32) img0 = cv2.polylines(img0.copy(), [pts0.reshape([-1, 2])], True, (255, 0, 0), thickness=5) desc0 = cv2.polylines(desc0.copy(), [pts0.reshape([-1, 2])], True, (255, 0, 0), thickness=5) # desc0 = cv2.warpPerspective(desc0.copy(), H, (w1, h1), flags=cv2.INTER_LINEAR) pts1 = pts1.astype(np.int32) img1 = cv2.polylines(img1.copy(), [pts1.reshape([-1, 2])], True, (255, 0, 0), thickness=5) desc1 = cv2.polylines(desc1.copy(), [pts1.reshape([-1, 2])], True, (255, 0, 0), thickness=5) # draw correspondences img = draw_corr(img0, kps0, img1, kps1, num=10) # desc = draw_corr(desc0, kps0, desc1, kps1, num=10) desc = draw_corr(desc0, kps1, desc1, kps1, num=10) return np.concatenate([img, desc], axis=0)
def get_tensorboard_data(self, num_kps=20): """ This processes the data needed for visualization. It expects the follow- ing in self.logger - image0: (C, H, W), Tensor, normalized - image1: (C, H, W), Tensor, normalized - desc0: (C, H, W) - desc1: (C, H, W) - kps0: (N, 2), each being (x, y) - kps1: (N, 2) - kps2: (N, 2), negative ones And it returns a dictionary - desc0: descriptor 1, RGB, (3, H, W) - desc1: descriptor 2, RGB, (3, H, W) - img0: image 1, (3, H, W) - img1: image 2, (3, H, W) - keypoints: the two images marked with num_kps keypoints - neg_keypoints: image 2 marked with negative keypoints - corr: ground truth correspondences - corr false: false correspondences """ # original images image0 = self.logger['image0'] image1 = self.logger['image1'] # descriptors desc0 = self.logger['desc0'] desc1 = self.logger['desc1'] # keypoints kps0 = self.logger['kps0'] kps1 = self.logger['kps1'] kps2 = self.logger['kps2'] # process the images image0 = unnormalize(image0) image1 = unnormalize(image1) # process the descriptor desc0, desc1 = [desc2RGB(tonumpy(x)) for x in [desc0, desc1]] desc0, desc1 = [totensor(d) for d in [desc0, desc1]] # choose keypoints N = kps0.shape[0] indices = np.random.choice(N, size=num_kps, replace=False) # draw keypoints kps = draw_kps_torch(image0, kps0[indices], image1, kps1[indices]) # draw negative keypoints neg_kps = draw_kps_torch(image0, kps0[indices], image1, kps2[indices]) # draw correspondences corr_gt = draw_corr_torch(image0, kps0[indices], image1, kps1[indices]) # draw correspondences corr_false = draw_corr_torch(image0, kps0[indices], image1, kps2[indices]) return { 'img0': image0, 'img1': image1, 'desc0': desc0, 'desc1': desc1, 'keypoints': kps, 'neg_keypoints': neg_kps, 'corr': corr_gt, 'corr_false': corr_false }
def get_tensorboard_data(self, num_kps=20): """ This processes the data needed for visualization. It expects the follow- ing in self.logger - image0: (C, H, W), Tensor, normalized - image1: (C, H, W), Tensor, normalized - d03: (C, H', W') - d13: (C, H', W') - kps0: (N, 2), each being (x, y) - kps1: (N, 2) - kps2: (N, 2), negative ones - scale_pred: (1, H', W'), Tensor - scale: (1, H, W), Tensor - msk: (H, W), Tensor And it returns a dictionary - desc0: descriptor 1, RGB, (3, H', W') - desc1: descriptor 2, RGB, (3, H', W') - img0: image 1, (3, H, W) - img1: image 2, (3, H, W) - keypoints: the two images marked with num_kps keypoints - neg_keypoints: image 2 marked with negative keypoints - corr: ground truth correspondences - corr false: false correspondences - scale_pred: (3, H', W') - scale: (3, H, W) """ # original images image0 = self.logger['image0'] image1 = self.logger['image1'] # descriptors d03 = self.logger['d03'] d13 = self.logger['d13'] # keypoints kps0 = self.logger['kps0'] kps1 = self.logger['kps1'] kps2 = self.logger['kps2'] # homography matrix H = self.logger['H'] # scale ratio of right image to left image scale_pred = self.logger['scale_pred'] scale = self.logger['scale'] # region that has corresponding pixels msk = self.logger['msk'] scale = draw_scale_torch(scale[0] * msk[0]) msk = F.interpolate(msk.unsqueeze(0), scale_pred.shape[1:], mode='bilinear')[0] scale_pred = draw_scale_torch(scale_pred[0] * msk[0]) msk = msk[0] # process the images image0 = unnormalize(image0) image1 = unnormalize(image1) # process the descriptor desc = draw_paired_desc_torch(d03, kps0, image0, d13, kps1, image1, H) # choose keypoints N = kps0.shape[0] indices = np.random.choice(N, size=num_kps, replace=False) # draw keypoints kps = draw_kps_torch(image0, kps0[indices], image1, kps1[indices]) # draw negative keypoints neg_kps = draw_kps_torch(image0, kps0[indices], image1, kps2[indices]) # draw correspondences corr_gt = draw_corr_torch(image0, kps0[indices], image1, kps1[indices]) # draw correspondences corr_false = draw_corr_torch(image0, kps0[indices], image1, kps2[indices]) return { 'img0': image0, 'img1': image1, 'desc': desc, 'keypoints': kps, 'neg_keypoints': neg_kps, 'corr': corr_gt, 'corr_false': corr_false, 'scale_pred': scale_pred, 'msk': msk, 'scale': scale }
def get_tensorboard_data(self, num_kps=20): """ This processes the data needed for visualization. It expects the follow- ing in self.logger - image0: (C, H, W), Tensor, normalized - image1: (C, H, W), Tensor, normalized - d03: (C, H, W) - d13: (C, H, W) - kps0: (N, 2), each being (x, y) - kps1: (N, 2) - kps2: (N, 2), negative ones And it returns a dictionary - d03: descriptor 1, RGB, (3, H, W) - d13: descriptor 2, RGB, (3, H, W) - img0: image 1, (3, H, W) - img1: image 2, (3, H, W) - keypoints: the two images marked with num_kps keypoints - neg_keypoints: image 2 marked with negative keypoints - corr: ground truth correspondences - corr false: false correspondences """ # original images image0 = self.logger['image0'] image1 = self.logger['image1'] # descriptors d03 = self.logger['d03'] d13 = self.logger['d13'] # keypoints kps0 = self.logger['kps0'] kps1 = self.logger['kps1'] kps2 = self.logger['kps2'] # homography matrix H = self.logger['H'] # img = draw_img_desc_torch( # image0, d03, kps0, # image1, d13, kps1, # H # ) # import matplotlib.pyplot as plt # plt.imshow(img) # plt.show() # process the images image0 = unnormalize(image0) image1 = unnormalize(image1) # process the descriptor desc = draw_paired_desc_torch(d03, kps0, image0, d13, kps1, image1, H) # choose keypoints N = kps0.shape[0] indices = np.random.choice(N, size=num_kps, replace=False) # draw keypoints kps = draw_kps_torch(image0, kps0[indices], image1, kps1[indices]) # draw negative keypoints neg_kps = draw_kps_torch(image0, kps0[indices], image1, kps2[indices]) # draw correspondences corr_gt = draw_corr_torch(image0, kps0[indices], image1, kps1[indices]) # draw correspondences corr_false = draw_corr_torch(image0, kps0[indices], image1, kps2[indices]) return { 'img0': image0, 'img1': image1, 'desc': desc, 'keypoints': kps, 'neg_keypoints': neg_kps, 'corr': corr_gt, 'corr_false': corr_false }