Esempio n. 1
0
    def prepare_data(self, *args, **kwargs) -> None:
        """
        Prepare IAM lines such that they can be used to generate synthetic paragraphs dataset in setup().
        This method is IAMLines.prepare_data + resizing of line crops.
        """
        if PROCESSED_DATA_DIRNAME.exists():
            return
        print(
            "IAMSyntheticParagraphs.prepare_data: preparing IAM lines for synthetic IAM paragraph creation..."
        )
        print("Cropping IAM line regions and loading labels...")
        iam = IAM()
        iam.prepare_data()
        crops_trainval, labels_trainval = line_crops_and_labels(
            iam, "trainval")
        crops_test, labels_test = line_crops_and_labels(iam, "test")

        crops_trainval = [
            resize_image(crop, IMAGE_SCALE_FACTOR) for crop in crops_trainval
        ]
        crops_test = [
            resize_image(crop, IMAGE_SCALE_FACTOR) for crop in crops_test
        ]

        print(f"Saving images and labels at {PROCESSED_DATA_DIRNAME}...")
        save_images_and_labels(crops_trainval, labels_trainval, "trainval",
                               PROCESSED_DATA_DIRNAME)
        save_images_and_labels(crops_test, labels_test, "test",
                               PROCESSED_DATA_DIRNAME)
Esempio n. 2
0
    def predict(self, image_filename: Path) -> str:
        """Predict/infer text in input image filename."""
        pil_img = util.read_image_pil(image_filename, grayscale=True)
        pil_img = resize_image(
            pil_img, IMAGE_SCALE_FACTOR
        )  # ideally resize should have been part of transform
        img_tensor = self.transform(pil_img)

        y_pred = self.lit_model(img_tensor.unsqueeze(axis=0))[0]
        pred_str = convert_y_label_to_string(y=y_pred,
                                             mapping=self.mapping,
                                             ignore_tokens=self.ignore_tokens)

        return pred_str
    def predict(self, image: Union[str, Path, Image.Image]) -> str:
        """Predict/infer text in input image (which can be a file path)."""
        image_pil = image
        if not isinstance(image, Image.Image):
            image_pil = util.read_image_pil(image, grayscale=True)

        image_pil = resize_image(image_pil, IMAGE_SCALE_FACTOR)
        image_tensor = self.transform(image_pil)

        y_pred = self.scripted_model(image_tensor.unsqueeze(axis=0))[0]
        pred_str = convert_y_label_to_string(y=y_pred,
                                             mapping=self.mapping,
                                             ignore_tokens=self.ignore_tokens)

        return pred_str