def resample_record(self, idx, p): from PIL import ImageOps r = self.dataloader._all_records.iloc[idx] image_id = r["Image"] img = self.dataloader.read_image(image_id, r["flipped"]) _xy = min(img.size) img = img.resize((_xy, _xy)) img = AUG_IMAGE_T(image=np.array(img))["image"] img = FINALIZE_T(image=img)["image"].to(p.device) ih, iw = img.shape[1:] hiw, hih = iw // 2, ih // 2 hsw, hsh = self.w // 2, self.h // 2 rclamp = lambda x, _min, _max: x * (_max - _min) + _min w = rclamp(p[0], hsw, hiw) h = rclamp(p[1], hsh, hih) cx = rclamp(p[2], hsw, iw - hsw) cy = rclamp(p[3], hsh, ih - hsh) x1 = (cx - w).clamp(0, iw) x2 = (cx + w).clamp(0, iw) y1 = (cy - h).clamp(0, ih) y2 = (cy + h).clamp(0, ih) # compute perspective transform points_src = torch.stack( [ torch.stack([y1, x1]), torch.stack([y1, x2]), torch.stack([y2, x1]), torch.stack([y2, x2]), ] ).unsqueeze(0) img_warp = kornia.crop_and_resize( img.unsqueeze(0), points_src, (self.h, self.w) ) # M = kornia.get_perspective_transform(points_src, self.points_dest.to(p.device)) # warp the original image by the found transform # img_warp = kornia.warp_perspective(img.unsqueeze(0), M, dsize=(self.h, self.w)) return img_warp.squeeze(0)
def test_crop_batch_broadcast(self, device, dtype): inp = torch.tensor([[[ [1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.], [13., 14., 15., 16.], ]], [[ [1., 5., 9., 13.], [2., 6., 10., 14.], [3., 7., 11., 15.], [4., 8., 12., 16.], ]]], device=device, dtype=dtype) expected = torch.tensor([[[ [6., 7.], [10., 11.], ]], [[ [6., 10.], [7., 11.], ]]], device=device, dtype=dtype) boxes = torch.tensor([[ [1., 1.], [2., 1.], [2., 2.], [1., 2.], ]], device=device, dtype=dtype) # 1x4x2 patches = kornia.crop_and_resize(inp, boxes, (2, 2), align_corners=True) assert_allclose(patches, expected, rtol=1e-4, atol=1e-4)
def test_crop_batch(self, device): inp = torch.tensor([[[ [1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.], [13., 14., 15., 16.], ]], [[ [1., 5., 9., 13.], [2., 6., 10., 14.], [3., 7., 11., 15.], [4., 8., 12., 16.], ]]]).to(device) height, width = 2, 2 expected = torch.tensor([[[ [6., 7.], [10., 11.], ]], [[ [7., 15.], [8., 16.], ]]]).to(device) boxes = torch.tensor([[ [1., 1.], [2., 1.], [2., 2.], [1., 2.], ], [ [1., 2.], [3., 2.], [3., 3.], [1., 3.], ]]).to(device) # 2x4x2 patches = kornia.crop_and_resize(inp, boxes, (height, width), align_corners=True) assert_allclose(patches, expected)
def test_crop(self, device): inp = torch.tensor([[ [1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.], [13., 14., 15., 16.], ]]).to(device) height, width = 2, 3 expected = torch.tensor( [[[6.7222, 7.1667, 7.6111], [9.3889, 9.8333, 10.2778]]]).to(device) boxes = torch.tensor([[ [1., 1.], [2., 1.], [2., 2.], [1., 2.], ]]).to(device) # 1x4x2 patches = kornia.crop_and_resize(inp, boxes, (height, width)) assert_allclose(patches, expected)
def test_crop_batch(self, device, dtype): inp = torch.tensor( [ [[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]], [[[1.0, 5.0, 9.0, 13.0], [2.0, 6.0, 10.0, 14.0], [3.0, 7.0, 11.0, 15.0], [4.0, 8.0, 12.0, 16.0]]], ], device=device, dtype=dtype, ) expected = torch.tensor( [[[[6.0, 7.0], [10.0, 11.0]]], [[[7.0, 15.0], [8.0, 16.0]]]], device=device, dtype=dtype ) boxes = torch.tensor( [[[1.0, 1.0], [2.0, 1.0], [2.0, 2.0], [1.0, 2.0]], [[1.0, 2.0], [3.0, 2.0], [3.0, 3.0], [1.0, 3.0]]], device=device, dtype=dtype, ) # 2x4x2 patches = kornia.crop_and_resize(inp, boxes, (2, 2)) assert_close(patches, expected, rtol=1e-4, atol=1e-4)
def test_crop(self): inp = torch.tensor([[ [1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.], [13., 14., 15., 16.], ]]) height, width = 2, 3 expected = torch.tensor([[ [6., 6.5, 7.], [10., 10.5, 11.], ]]) boxes = torch.tensor([[ [1., 1.], [2., 1.], [2., 2.], [1., 2.], ]]) # 1x4x2 patches = kornia.crop_and_resize(inp, boxes, (height, width)) assert_allclose(patches, expected)
def test_align_corners_false(self, device, dtype): inp = torch.tensor( [[[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]]], device=device, dtype=dtype, ) height, width = 2, 3 expected = torch.tensor( [[[[6.7222, 7.1667, 7.6111], [9.3889, 9.8333, 10.2778]]]], device=device, dtype=dtype) boxes = torch.tensor( [[[1.0, 1.0], [2.0, 1.0], [2.0, 2.0], [1.0, 2.0]]], device=device, dtype=dtype) # 1x4x2 patches = kornia.crop_and_resize(inp, boxes, (height, width), align_corners=False) assert_allclose(patches, expected, rtol=1e-4, atol=1e-4)
def test_align_corners_true(self, device, dtype): inp = torch.tensor( [[[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]]], device=device, dtype=dtype, ) height, width = 2, 3 expected = torch.tensor( [[[[6.0000, 6.5000, 7.0000], [10.0000, 10.5000, 11.0000]]]], device=device, dtype=dtype) boxes = torch.tensor( [[[1.0, 1.0], [2.0, 1.0], [2.0, 2.0], [1.0, 2.0]]], device=device, dtype=dtype) # 1x4x2 # default should use align_coners True patches = kornia.crop_and_resize(inp, boxes, (height, width)) assert_allclose(patches, expected, rtol=1e-4, atol=1e-4)
def test_jit(self, device): @torch.jit.script def op_script(input: torch.Tensor, boxes: torch.Tensor, size: Tuple[int, int]) -> torch.Tensor: return kornia.crop_and_resize(input, boxes, size) img = torch.tensor([[ [1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 11., 12.], [13., 14., 15., 16.], ]]).to(device) boxes = torch.tensor([[ [1., 1.], [2., 1.], [2., 2.], [1., 2.], ]]).to(device) # 1x4x2 crop_height, crop_width = 4, 2 actual = op_script(img, boxes, (crop_height, crop_width)) expected = kornia.crop_and_resize(img, boxes, (crop_height, crop_width)) assert_allclose(actual, expected)
def op_script(input: torch.Tensor, boxes: torch.Tensor, size: Tuple[int, int]) -> torch.Tensor: return kornia.crop_and_resize(input, boxes, size)
def get_new_image(self, src_img, dst_img): self.boxes=torch.matmul(self.mem, self.xyxy).reshape(1, 4, 2) return kornia.crop_and_resize((src_img), self.boxes, dst_img.shape[-2:])
def forward(self, p, p_len, r, r_len, batch_size, dropout): # Embed all words with learnable word embeddings for each word in vocabulary (maybe try w2vec) # Construct similarity grid (with multiple channels) # Pass through resnet # Possibly try passing different 3-channel grids through different resnets # i.e. explore the idea behind hierarchical ensembling at every level in a given model Dropout = torch.nn.Dropout(dropout) max_p_len = torch.max(p_len) max_r_len = torch.max(r_len) p_emb = self.wordEmbd(p) r_emb = self.wordEmbd(r) # Cut-off excess words p_emb = p_emb[:, 0:max_p_len, :] r_emb = r_emb[:, 0:max_r_len, :] # Apply dropout p_emb = Dropout(p_emb) r_emb = Dropout(r_emb) r_emb = r_emb.repeat(1, max_p_len, 1) r_emb = torch.reshape( r_emb, (batch_size, max_p_len, max_r_len, self.hyps['EMBD_DIM'])) r_emb = torch.transpose(r_emb, 1, 2) p_emb = p_emb.repeat(1, max_r_len, 1) p_emb = torch.reshape( p_emb, (batch_size, max_r_len, max_p_len, self.hyps['EMBD_DIM'])) gridCos = self._cosine_dist(r_emb, p_emb, ax=3) # Convert to 4D tensor [batch, height, width, channels] - there is currently 1 channel gridCos = torch.unsqueeze(gridCos, 3) p_emb2 = self.wordEmbd2(p) r_emb2 = self.wordEmbd2(r) p_emb2 = p_emb2[:, 0:max_p_len, :] r_emb2 = r_emb2[:, 0:max_r_len, :] # Apply dropout p_emb2 = Dropout(p_emb2) r_emb2 = Dropout(r_emb2) r_emb2 = r_emb2.repeat(1, max_p_len, 1) r_emb2 = torch.reshape( r_emb2, (batch_size, max_p_len, max_r_len, self.hyps['EMBD_DIM'])) r_emb2 = torch.transpose(r_emb2, 1, 2) p_emb2 = p_emb2.repeat(1, max_r_len, 1) p_emb2 = torch.reshape( p_emb2, (batch_size, max_r_len, max_p_len, self.hyps['EMBD_DIM'])) gridCos2 = self._cosine_dist(r_emb2, p_emb2, ax=3) gridCos2 = torch.unsqueeze(gridCos2, 3) p_emb3 = self.wordEmbd3(p) r_emb3 = self.wordEmbd3(r) p_emb3 = p_emb3[:, 0:max_p_len, :] r_emb3 = r_emb3[:, 0:max_r_len, :] # Apply dropout p_emb3 = Dropout(p_emb3) r_emb3 = Dropout(r_emb3) r_emb3 = r_emb3.repeat(1, max_p_len, 1) r_emb3 = torch.reshape( r_emb3, (batch_size, max_p_len, max_r_len, self.hyps['EMBD_DIM'])) r_emb3 = torch.transpose(r_emb3, 1, 2) p_emb3 = p_emb3.repeat(1, max_r_len, 1) p_emb3 = torch.reshape( p_emb3, (batch_size, max_r_len, max_p_len, self.hyps['EMBD_DIM'])) gridCos3 = self._cosine_dist(r_emb3, p_emb3, ax=3) gridCos3 = torch.unsqueeze(gridCos3, 3) grid = torch.cat((gridCos, gridCos2, gridCos3), 3) # Crop and resize the grid # For pytorch, the image should be NCHW (it was NHWC in tensorflow) grid = torch.transpose(grid, 1, 3) # So now the dimensions are : [batch_size, num_channels, max_p_len, max_r_len] # Create the bounding boxes for cropping zero_zero = torch.zeros([batch_size, 2]).long().to(self.device) zero_max = torch.cat((torch.unsqueeze( torch.zeros(batch_size).long().to(self.device), 1), torch.unsqueeze(r_len, 1)), 1) max_max = torch.cat( (torch.unsqueeze(p_len, 1), torch.unsqueeze(r_len, 1)), 1) max_zero = torch.cat( (torch.unsqueeze(p_len, 1), torch.unsqueeze( torch.zeros(batch_size).long().to(self.device), 1)), 1) boxes = torch.cat( (torch.unsqueeze(zero_zero, 1), torch.unsqueeze(zero_max, 1), torch.unsqueeze(max_max, 1), torch.unsqueeze(max_zero, 1)), 1) grid_proc = kornia.crop_and_resize( grid, boxes, [self.hyps['IMG_WIDTH'], self.hyps['IMG_HEIGHT']]) # Pass through resnet-18 # y_1000 = self.resnet18(grid_proc) # y_pred = torch.sigmoid(self.final_layer(y_1000)) # Pass through resnet-152 y_1000 = self.resnet152(grid_proc) y_pred = torch.sigmoid(self.final_layer(y_1000)) return y_pred