def test_tensor_to_ndimage(): """ Tests tensor_to_ndimage functionality """ orig_images = np.random.randint(0, 255, (2, 20, 10, 3), np.uint8) torch_images = torch.stack( [ normalize(to_tensor(im), _IMAGENET_MEAN, _IMAGENET_STD) for im in orig_images ], dim=0, ) byte_images = utils.tensor_to_ndimage(torch_images, dtype=np.uint8) float_images = utils.tensor_to_ndimage(torch_images, dtype=np.float32) assert np.allclose(byte_images, orig_images) assert np.allclose(float_images, orig_images / 255, atol=1e-3, rtol=1e-3) assert np.allclose( utils.tensor_to_ndimage(torch_images[0]), orig_images[0] / 255, atol=1e-3, rtol=1e-3, )
def predict_table_polygon( self, batch: Union[torch.Tensor, np.ndarray]) -> np.ndarray: """ Predicts table vertices for each frame in batch and returns it Args: batch: batch of frames Returns: polygons: batch of table polygons. Each polygon is the convex polygon with 4 vertices and all vertices have 2 coordinates (x, y). Polygon has the following form: [x1, y1, x2, y2, x3, y3, x4, y4] where x coordinates are responsible for polygon place relative to image width, y -- to image height """ hulls = [] for frame in batch: image = utils.tensor_to_ndimage(frame) image = (image * 255 + 0.5).astype(int).clip(0, 255).astype('uint8') try: hull = tp.find_table_polygon(deepcopy(image)) hull = tp.remove_big_angles_from_hull(hull) hull = tp.take_longest_sides_from_hull( hull, 4).reshape(-1).astype('float32') hull[0::2] = hull[0::2] / image.shape[0] hull[1::2] = hull[1::2] / image.shape[1] hulls.append(hull) except Exception: hulls.append(np.array([0, 0, 0, 1, 1, 1, 1, 0])) return np.array(hulls)
def apply(self, img: torch.Tensor, **params) -> np.ndarray: """Apply the transform to the image""" if len(img.shape) == 2: img = img.unsqueeze(0) return utils.tensor_to_ndimage( img, denormalize=self.denormalize, move_channels_dim=self.move_channels_dim)
def predict_table_polygon( self, batch: Union[torch.Tensor, np.ndarray]) -> np.ndarray: hulls = [] masks = self.predict_table_mask(batch) for frame, mask in zip(batch, masks): image = utils.tensor_to_ndimage(frame) image = (image * 255 + 0.5).astype(int).clip(0, 255).astype('uint8') try: hull = tp.find_table_polygon(deepcopy(image), mask=mask) # TODO: refactor (code duplication with super class default implementation) hull = tp.remove_big_angles_from_hull(hull) hull = tp.take_longest_sides_from_hull( hull, 4).reshape(-1).astype('float32') hull[0::2] = hull[0::2] / image.shape[0] hull[1::2] = hull[1::2] / image.shape[1] hulls.append(hull) except Exception: hulls.append(np.array([0, 0, 0, 1, 1, 1, 1, 0])) return np.array(hulls)
def on_batch_end(self, state: RunnerState): lm = state.loader_name names = state.input.get(self.name_key, []) features = state.input[self.input_key].detach().cpu() images = tensor_to_ndimage(features) logits = state.output[self.output_key] logits = torch.unsqueeze_(logits, dim=1) \ if len(logits.shape) < 4 \ else logits if self.mask_type == "soft": probabilities = torch.sigmoid(logits) else: probabilities = F.softmax(logits, dim=1) probabilities = probabilities.detach().cpu().numpy() masks = [] for probability in probabilities: mask = np.zeros_like(probability[0], dtype=np.int32) for i, ch in enumerate(probability): mask[ch >= self.threshold] = i + 1 masks.append(mask) for i, (image, mask) in enumerate(zip(images, masks)): try: suffix = names[i] except IndexError: suffix = f"{self.counter:06d}" self.counter += 1 mask = label2rgb(mask, bg_label=0) image = image * (1 - self.mask_strength) \ + mask * self.mask_strength image = (image * 255).clip(0, 255).round().astype(np.uint8) filename = f"{self.out_prefix}/{lm}/{suffix}.jpg" imageio.imwrite(filename, image)
def get_statistics(loader: DataLoader, table_recognizer: TableRecognizer, verbose=False) -> RecognitionStatistics: ptr = iter(loader) sum_mask_iou = 0 sum_mask_dice = 0 sum_table_iou = 0 sum_table_dice = 0 cnt = 0 sum_time = 0 while True: st = time() masks, tables = table_recognizer.next_data() if masks is None: break sum_time += time() - st batch = next(ptr) img_size = batch['image'].shape[2:] for mask, truth, table, image in zip(masks, batch['mask'], tables, batch['image']): mask = mask.astype(bool) truth = truth.numpy().astype(bool) canonical_table = tp.get_canonical_4_polygon(table) table_mask = tp.find_convex_hull_mask( img_size, [(int(x * img_size[0]), int(y * img_size[1])) for x, y in canonical_table]).astype(bool) mask_iou = np.sum(mask & truth) / np.sum(mask | truth) mask_dice = 2 * np.sum(mask & truth) / (np.sum(mask) + np.sum(truth)) table_iou = np.sum(table_mask & truth) / np.sum(table_mask | truth) table_dice = 2 * np.sum(table_mask & truth) / (np.sum(table_mask) + np.sum(truth)) sum_mask_iou += mask_iou sum_mask_dice += mask_dice sum_table_iou += table_iou sum_table_dice += table_dice cnt += 1 if verbose: plt.figure(figsize=(14, 10)) plt.subplot(2, 2, 1) plt.imshow(mask) plt.title('Predicted mask') plt.subplot(2, 2, 2) plt.imshow(truth.reshape(truth.shape[-2:])) plt.title('True mask') plt.subplot(2, 2, 3) image = utils.tensor_to_ndimage(image) image = (image * 255 + 0.5).astype(int).clip( 0, 255).astype('uint8') plt.title('Image') plt.imshow(image) plt.subplot(2, 2, 4) plt.title('Predicted table mask') plt.imshow(table_mask) plt.show() return RecognitionStatistics(mean_mask_iou=sum_mask_iou / cnt, mean_mask_dice=sum_mask_dice / cnt, mean_table_iou=sum_table_iou / cnt, mean_table_dice=sum_table_dice / cnt, mean_time=sum_time / cnt)
lambda x: x["logits"].cpu().numpy(), runner.predict_loader( loader=infer_loader, resume=f"{logdir}/checkpoints/best.pth" ), ) ) ) print(type(predictions)) print(predictions.shape) threshold = 0.5 max_count = 5 for i, (features, logits) in enumerate(zip(test_dataset, predictions)): image = utils.tensor_to_ndimage(features["image"]) mask_ = torch.from_numpy(logits[0]).sigmoid() mask = utils.detach(mask_ > threshold).astype("float") show_examples(name="", image=image, mask=mask) if i >= max_count: break batch = next(iter(loaders["valid"])) # saves to `logdir` and returns a `ScriptModule` class runner.trace(model=model, batch=batch, logdir=logdir, fp16=is_fp16_used) !ls {logdir}/trace/
list( map( lambda x: x["logits"].cpu().numpy(), runner.predict_loader( loader=infer_loader, resume=f"{logdir_root}/logs/checkpoints/best.pth"), ))) print(type(predictions)) print(predictions.shape) threshold = 0.5 max_count = 5 for i, (features, logits) in enumerate(zip(train_dataset, predictions)): image = utils.tensor_to_ndimage(features["image"], denormalize=False) # filename_mask = os.path.splitext(features["filename"])[0] filename_mask = os.path.splitext(features["filename_mask"])[0] gt = imread(mask_path / f"{filename_mask}.png") gt_res = gt.copy() gt_res.resize((224, 224)) gt_im = Image.fromarray(gt_res * 255) mask_ = torch.from_numpy(logits[0]).sigmoid() mask = utils.detach(mask_ > threshold).astype("uint8") # # Replace mask with real image # # filename_mask = os.path.splitext(features["filename_img"])[0] # # mask = imread(test_image_path / f"{filename_mask}.jpg")