예제 #1
0
def test_tensor_to_ndimage():
    """
    Tests tensor_to_ndimage functionality
    """
    orig_images = np.random.randint(0, 255, (2, 20, 10, 3), np.uint8)

    torch_images = torch.stack(
        [
            normalize(to_tensor(im), _IMAGENET_MEAN, _IMAGENET_STD)
            for im in orig_images
        ],
        dim=0,
    )

    byte_images = utils.tensor_to_ndimage(torch_images, dtype=np.uint8)
    float_images = utils.tensor_to_ndimage(torch_images, dtype=np.float32)

    assert np.allclose(byte_images, orig_images)
    assert np.allclose(float_images, orig_images / 255, atol=1e-3, rtol=1e-3)

    assert np.allclose(
        utils.tensor_to_ndimage(torch_images[0]),
        orig_images[0] / 255,
        atol=1e-3,
        rtol=1e-3,
    )
    def predict_table_polygon(
            self, batch: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
        """
        Predicts table vertices for each frame in batch and returns it

        Args:
            batch: batch of frames
        Returns:
            polygons: batch of table polygons. Each polygon is the convex polygon with 4 vertices and
                all vertices have 2 coordinates (x, y).
                Polygon has the following form: [x1, y1, x2, y2, x3, y3, x4, y4] where x coordinates are responsible
                for polygon place relative to image width, y -- to image height
        """
        hulls = []
        for frame in batch:
            image = utils.tensor_to_ndimage(frame)
            image = (image * 255 + 0.5).astype(int).clip(0,
                                                         255).astype('uint8')
            try:
                hull = tp.find_table_polygon(deepcopy(image))
                hull = tp.remove_big_angles_from_hull(hull)
                hull = tp.take_longest_sides_from_hull(
                    hull, 4).reshape(-1).astype('float32')
                hull[0::2] = hull[0::2] / image.shape[0]
                hull[1::2] = hull[1::2] / image.shape[1]
                hulls.append(hull)
            except Exception:
                hulls.append(np.array([0, 0, 0, 1, 1, 1, 1, 0]))
        return np.array(hulls)
예제 #3
0
    def apply(self, img: torch.Tensor, **params) -> np.ndarray:
        """Apply the transform to the image"""
        if len(img.shape) == 2:
            img = img.unsqueeze(0)

        return utils.tensor_to_ndimage(
            img,
            denormalize=self.denormalize,
            move_channels_dim=self.move_channels_dim)
 def predict_table_polygon(
         self, batch: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
     hulls = []
     masks = self.predict_table_mask(batch)
     for frame, mask in zip(batch, masks):
         image = utils.tensor_to_ndimage(frame)
         image = (image * 255 + 0.5).astype(int).clip(0,
                                                      255).astype('uint8')
         try:
             hull = tp.find_table_polygon(deepcopy(image), mask=mask)
             # TODO: refactor (code duplication with super class default implementation)
             hull = tp.remove_big_angles_from_hull(hull)
             hull = tp.take_longest_sides_from_hull(
                 hull, 4).reshape(-1).astype('float32')
             hull[0::2] = hull[0::2] / image.shape[0]
             hull[1::2] = hull[1::2] / image.shape[1]
             hulls.append(hull)
         except Exception:
             hulls.append(np.array([0, 0, 0, 1, 1, 1, 1, 0]))
     return np.array(hulls)
예제 #5
0
    def on_batch_end(self, state: RunnerState):
        lm = state.loader_name
        names = state.input.get(self.name_key, [])

        features = state.input[self.input_key].detach().cpu()
        images = tensor_to_ndimage(features)

        logits = state.output[self.output_key]
        logits = torch.unsqueeze_(logits, dim=1) \
            if len(logits.shape) < 4 \
            else logits

        if self.mask_type == "soft":
            probabilities = torch.sigmoid(logits)
        else:
            probabilities = F.softmax(logits, dim=1)
        probabilities = probabilities.detach().cpu().numpy()

        masks = []
        for probability in probabilities:
            mask = np.zeros_like(probability[0], dtype=np.int32)
            for i, ch in enumerate(probability):
                mask[ch >= self.threshold] = i + 1
            masks.append(mask)

        for i, (image, mask) in enumerate(zip(images, masks)):
            try:
                suffix = names[i]
            except IndexError:
                suffix = f"{self.counter:06d}"
            self.counter += 1

            mask = label2rgb(mask, bg_label=0)

            image = image * (1 - self.mask_strength) \
                + mask * self.mask_strength
            image = (image * 255).clip(0, 255).round().astype(np.uint8)

            filename = f"{self.out_prefix}/{lm}/{suffix}.jpg"
            imageio.imwrite(filename, image)
def get_statistics(loader: DataLoader,
                   table_recognizer: TableRecognizer,
                   verbose=False) -> RecognitionStatistics:

    ptr = iter(loader)

    sum_mask_iou = 0
    sum_mask_dice = 0
    sum_table_iou = 0
    sum_table_dice = 0
    cnt = 0
    sum_time = 0

    while True:
        st = time()
        masks, tables = table_recognizer.next_data()
        if masks is None:
            break
        sum_time += time() - st
        batch = next(ptr)
        img_size = batch['image'].shape[2:]
        for mask, truth, table, image in zip(masks, batch['mask'], tables,
                                             batch['image']):
            mask = mask.astype(bool)
            truth = truth.numpy().astype(bool)

            canonical_table = tp.get_canonical_4_polygon(table)
            table_mask = tp.find_convex_hull_mask(
                img_size, [(int(x * img_size[0]), int(y * img_size[1]))
                           for x, y in canonical_table]).astype(bool)

            mask_iou = np.sum(mask & truth) / np.sum(mask | truth)
            mask_dice = 2 * np.sum(mask & truth) / (np.sum(mask) +
                                                    np.sum(truth))
            table_iou = np.sum(table_mask & truth) / np.sum(table_mask | truth)
            table_dice = 2 * np.sum(table_mask & truth) / (np.sum(table_mask) +
                                                           np.sum(truth))

            sum_mask_iou += mask_iou
            sum_mask_dice += mask_dice
            sum_table_iou += table_iou
            sum_table_dice += table_dice
            cnt += 1

            if verbose:
                plt.figure(figsize=(14, 10))

                plt.subplot(2, 2, 1)
                plt.imshow(mask)
                plt.title('Predicted mask')

                plt.subplot(2, 2, 2)
                plt.imshow(truth.reshape(truth.shape[-2:]))
                plt.title('True mask')

                plt.subplot(2, 2, 3)
                image = utils.tensor_to_ndimage(image)
                image = (image * 255 + 0.5).astype(int).clip(
                    0, 255).astype('uint8')
                plt.title('Image')
                plt.imshow(image)

                plt.subplot(2, 2, 4)
                plt.title('Predicted table mask')
                plt.imshow(table_mask)

                plt.show()

    return RecognitionStatistics(mean_mask_iou=sum_mask_iou / cnt,
                                 mean_mask_dice=sum_mask_dice / cnt,
                                 mean_table_iou=sum_table_iou / cnt,
                                 mean_table_dice=sum_table_dice / cnt,
                                 mean_time=sum_time / cnt)
예제 #7
0
            lambda x: x["logits"].cpu().numpy(),
            runner.predict_loader(
                loader=infer_loader, resume=f"{logdir}/checkpoints/best.pth"
            ),
        )
    )
)

print(type(predictions))
print(predictions.shape)

threshold = 0.5
max_count = 5

for i, (features, logits) in enumerate(zip(test_dataset, predictions)):
    image = utils.tensor_to_ndimage(features["image"])

    mask_ = torch.from_numpy(logits[0]).sigmoid()
    mask = utils.detach(mask_ > threshold).astype("float")

    show_examples(name="", image=image, mask=mask)

    if i >= max_count:
        break

batch = next(iter(loaders["valid"]))
# saves to `logdir` and returns a `ScriptModule` class
runner.trace(model=model, batch=batch, logdir=logdir, fp16=is_fp16_used)

!ls {logdir}/trace/
예제 #8
0
    list(
        map(
            lambda x: x["logits"].cpu().numpy(),
            runner.predict_loader(
                loader=infer_loader,
                resume=f"{logdir_root}/logs/checkpoints/best.pth"),
        )))

print(type(predictions))
print(predictions.shape)

threshold = 0.5
max_count = 5

for i, (features, logits) in enumerate(zip(train_dataset, predictions)):
    image = utils.tensor_to_ndimage(features["image"], denormalize=False)

    # filename_mask = os.path.splitext(features["filename"])[0]
    filename_mask = os.path.splitext(features["filename_mask"])[0]
    gt = imread(mask_path / f"{filename_mask}.png")
    gt_res = gt.copy()
    gt_res.resize((224, 224))
    gt_im = Image.fromarray(gt_res * 255)

    mask_ = torch.from_numpy(logits[0]).sigmoid()
    mask = utils.detach(mask_ > threshold).astype("uint8")

    #     # Replace mask with real image
    #     # filename_mask = os.path.splitext(features["filename_img"])[0]
    #     # mask = imread(test_image_path / f"{filename_mask}.jpg")