Example #1
0
def test_filename():  # pylint: disable=R0201
    predictor = ParagraphTextRecognizer()
    num_text_lines_by_name = {"a01-000u-cropped": 7}
    for filename in (SUPPORT_DIRNAME).glob("*.jpg"):
        full_image = util.read_image(str(filename), grayscale=True)
        predicted_text, line_region_crops = predictor.predict(full_image)
        print(filename, ':', predicted_text)
    def _decide_on_crop_dims(self) -> Tuple[int, int]:
        """Decide on the dimensions to crop out of the form image.

        Since image width is larger than a comfortable crop around the longest paragraph,
        we will make the crop a square form factor.
        And since the found dimensions 610x610 are pretty close to 512x512,
        we might as well resize crops and make it exactly that, which lets us
        do all kinds of power-of-2 pooling and upsampling should we choose to.

        Returns:
            Tuple[int, int]: A tuple of crop dimensions.

        Raises:
            RuntimeError: When max crop height is larger than max crop width.

        """

        sample_form_filename = self.iam_dataset.form_filenames[0]
        sample_image = util.read_image(sample_form_filename, grayscale=True)
        max_crop_width = sample_image.shape[1]
        max_crop_height = _get_max_paragraph_crop_height(
            self.iam_dataset.line_regions_by_id)
        if not max_crop_height <= max_crop_width:
            raise RuntimeError(
                f"Max crop height is larger then max crop width: {max_crop_height} >= {max_crop_width}"
            )

        crop_dims = (max_crop_width, max_crop_width)
        logger.info(
            f"Max crop width and height were found to be {max_crop_width}x{max_crop_height}."
        )
        logger.info(f"Setting them to {max_crop_width}x{max_crop_width}")
        return crop_dims
Example #3
0
 def predict(
         self, image_or_filename: Union[np.ndarray,
                                        str]) -> Tuple[str, float]:
     if isinstance(image_or_filename, str):
         image = util.read_image(image_or_filename, grayscale=True)
     else:
         image = image_or_filename
     return self.model.predict_on_image(image)
 def test_filename(self):
     predictor = ParagraphTextRecognizer()
     num_text_lines_by_name = {'a01-000u-cropped': 7}
     for filename in (SUPPORT_DIRNAME).glob('*.jpg'):
         full_image = util.read_image(str(filename), grayscale=True)
         predicted_text, line_region_crops = predictor.predict(full_image)
         print(f"ouptut text {predicted_text}")
         assert len(line_region_crops) == num_text_lines_by_name[
             filename.stem]
Example #5
0
 def predict(
         self, image_or_filename: Union[np.ndarray,
                                        str]) -> Tuple[str, float]:
     """Predict on a single images contianing a handwritten character."""
     if isinstance(image_or_filename, str):
         image = read_image(image_or_filename, grayscale=True)
     else:
         image = image_or_filename
     return self.model.predict_on_image(image)
def _load_iam_paragraphs():
    print('Loading IAM paragraph crops and ground truth from image files...')
    images = []
    gt_images = []
    ids = []
    for filename in CROPS_DIRNAME.glob('*.jpg'):
        id_ = filename.stem
        image = util.read_image(filename, grayscale=True)
        image = 1. - image / 255

        gt_filename = GT_DIRNAME / f'{id_}.png'
        gt_image = util.read_image(gt_filename, grayscale=True)

        images.append(image)
        gt_images.append(gt_image)
        ids.append(id_)
    images = np.array(images).astype(np.float32)
    gt_images = util.to_categorical(np.array(gt_images), 3).astype(np.uint8)
    return images, gt_images, np.array(ids)
def _crop_paragraph_image(filename: str, line_regions: Dict,
                          crop_dims: Tuple[int,
                                           int], final_dims: Tuple) -> None:
    image = util.read_image(filename, grayscale=True)

    min_y1 = min(region["y1"] for region in line_regions) - PARAGRAPH_BUFFER
    max_y2 = max(region["y2"] for region in line_regions) + PARAGRAPH_BUFFER
    height = max_y2 - min_y1
    crop_height = crop_dims[0]
    buffer = (crop_height - height) // 2

    # Generate image crop.
    image_crop = 255 * np.ones(crop_dims, dtype=np.uint8)
    try:
        image_crop[buffer:buffer + height] = image[min_y1:max_y2]
    except Exception as e:  # pylint: disable=broad-except
        logger.error(f"Rescued {filename}: {e}")
        return

    # Generate ground truth.
    gt_image = np.zeros_like(image_crop, dtype=np.uint8)
    for index, region in enumerate(line_regions):
        gt_image[(region["y1"] - min_y1 + buffer):(region["y2"] - min_y1 +
                                                   buffer),
                 region["x1"]:region["x2"], ] = (index % 2 + 1)

    # Generate image for debugging.
    import matplotlib.pyplot as plt

    cmap = plt.get_cmap("Set1")
    image_crop_for_debug = np.dstack([image_crop, image_crop, image_crop])
    for index, region in enumerate(line_regions):
        color = [255 * _ for _ in cmap(index)[:-1]]
        cv2.rectangle(
            image_crop_for_debug,
            (region["x1"], region["y1"] - min_y1 + buffer),
            (region["x2"], region["y2"] - min_y1 + buffer),
            color,
            3,
        )
    image_crop_for_debug = cv2.resize(image_crop_for_debug,
                                      final_dims,
                                      interpolation=cv2.INTER_AREA)
    util.write_image(image_crop_for_debug,
                     DEBUG_CROPS_DIRNAME / f"{filename.stem}.jpg")

    image_crop = cv2.resize(image_crop,
                            final_dims,
                            interpolation=cv2.INTER_AREA)
    util.write_image(image_crop, CROPS_DIRNAME / f"{filename.stem}.jpg")

    gt_image = cv2.resize(gt_image,
                          final_dims,
                          interpolation=cv2.INTER_NEAREST)
    util.write_image(gt_image, GT_DIRNAME / f"{filename.stem}.png")
def _load_iam_paragraphs() -> None:
    logger.info(
        "Loading IAM paragraph crops and ground truth from image files...")
    images = []
    gt_images = []
    ids = []
    for filename in CROPS_DIRNAME.glob("*.jpg"):
        id_ = filename.stem
        image = util.read_image(filename, grayscale=True)
        image = 1.0 - image / 255

        gt_filename = GT_DIRNAME / f"{id_}.png"
        gt_image = util.read_image(gt_filename, grayscale=True)

        images.append(image)
        gt_images.append(gt_image)
        ids.append(id_)
    images = np.array(images).astype(np.float32)
    gt_images = np.array(gt_images).astype(np.uint8)
    ids = np.array(ids)
    return images, gt_images, ids
Example #9
0
 def test_filename(self):
     predictor = LinePredictor()
     for filename in SUPPORT_DIRNAME.glob('*.png'):
         image = util.read_image(str(filename), grayscale=True)
         print('Saved image shape:', image.shape)
         image = image[:, :-np.random.randint(1, 150)]  # pylint: disable=invalid-unary-operand-type
         print('Randomly cropped image shape:', image.shape)
         pred, conf = predictor.predict(image)
         true = str(filename.stem)
         edit_distance = editdistance.eval(pred, true) / len(pred)
         print(f'Pred: "{pred}" | Confidence: {conf} | True: {true} | Edit distance: {edit_distance}')
         self.assertLess(edit_distance, 0.2)
Example #10
0
def _crop_paragraph_image(filename, line_regions, crop_dims, final_dims):  # pylint: disable=too-many-locals
    image = util.read_image(filename, grayscale=True)

    min_y1 = min(r["y1"] for r in line_regions) - PARAGRAPH_BUFFER
    max_y2 = max(r["y2"] for r in line_regions) + PARAGRAPH_BUFFER
    height = max_y2 - min_y1
    crop_height = crop_dims[0]
    buffer = (crop_height - height) // 2

    # Generate image crop
    image_crop = 255 * np.ones(crop_dims, dtype=np.uint8)
    try:
        image_crop[buffer:buffer + height] = image[min_y1:max_y2]
    except Exception as e:  # pylint: disable=broad-except
        print(f"Rescued {filename}: {e}")
        return

    # Generate ground truth
    gt_image = np.zeros_like(image_crop, dtype=np.uint8)
    for ind, region in enumerate(line_regions):
        gt_image[(region["y1"] - min_y1 + buffer):(region["y2"] - min_y1 +
                                                   buffer),
                 region["x1"]:region["x2"]] = (ind % 2 + 1)

    # Generate image for debugging
    import matplotlib.pyplot as plt  # pylint: disable=import-outside-toplevel

    cmap = plt.get_cmap("Set1")
    image_crop_for_debug = np.dstack([image_crop, image_crop, image_crop])
    for ind, region in enumerate(line_regions):
        color = [255 * _ for _ in cmap(ind)[:-1]]
        cv2.rectangle(
            image_crop_for_debug,
            (region["x1"], region["y1"] - min_y1 + buffer),
            (region["x2"], region["y2"] - min_y1 + buffer),
            color,
            3,
        )
    image_crop_for_debug = cv2.resize(image_crop_for_debug,
                                      final_dims,
                                      interpolation=cv2.INTER_AREA)
    util.write_image(image_crop_for_debug,
                     DEBUG_CROPS_DIRNAME / f"{filename.stem}.jpg")

    image_crop = cv2.resize(
        image_crop, final_dims,
        interpolation=cv2.INTER_AREA)  # Quality interpolation for input
    util.write_image(image_crop, CROPS_DIRNAME / f"{filename.stem}.jpg")

    gt_image = cv2.resize(
        gt_image, final_dims,
        interpolation=cv2.INTER_NEAREST)  # No interpolation for labels
    util.write_image(gt_image, GT_DIRNAME / f"{filename.stem}.png")
Example #11
0
def _load_image():
    if request.method == 'POST':
        data = request.get_json()
        if data is None:
            return 'no json received'
        return util.read_b64_image(data['image'], grayscale=True)
    elif request.method == 'GET':
        image_url = request.args.get('image_url')
        if image_url is None:
            return 'no image_url defined in query string'
        return util.read_image(image_url, grayscale=True)
    else:
        raise ValueError('Unsupported HTTP method')
Example #12
0
def _load_image():
    if request.method == "POST":
        data = request.get_json()
        if data is None:
            return "no json received"
        return util.read_b64_image(data["image"], grayscale=True)
    if request.method == "GET":
        image_url = request.args.get("image_url")
        if image_url is None:
            return "no image_url defined in query string"
        print("INFO url {}".format(image_url))
        return util.read_image(image_url, grayscale=True)
    raise ValueError("Unsupported HTTP method")
    def predict(self, image_or_filename: Union[str, np.ndarray]) -> Tuple:
        """Takes an image and returns all text within it."""
        image = (read_image(image_or_filename) if isinstance(
            image_or_filename, str) else image_or_filename)

        line_region_crops = self._get_line_region_crops(image)
        processed_line_region_crops = [
            self._process_image_for_line_predictor(image=crop)
            for crop in line_region_crops
        ]
        line_region_strings = [
            self.line_predictor_model.predict_on_image(crop)[0]
            for crop in processed_line_region_crops
        ]

        return " ".join(line_region_strings), line_region_crops
def _crop_paragraph_image(filename, line_regions, crop_dims, final_dims):
    image = util.read_image(filename, grayscale=True)

    min_y1 = min(r['y1'] for r in line_regions) - PARAGRAPH_BUFFER
    max_y2 = max(r['y2'] for r in line_regions) + PARAGRAPH_BUFFER
    height = max_y2 - min_y1
    crop_height = crop_dims[0]
    buffer = (crop_height - height) // 2

    # Generate image crop
    image_crop = 255 * np.ones(crop_dims, dtype=np.uint8)
    try:
        image_crop[buffer:buffer + height] = image[min_y1:max_y2]
    except Exception as e:
        print(f'Rescued {filename}: {e}')
        return

    # Generate ground truth
    gt_image = np.zeros_like(image_crop, dtype=np.uint8)
    for ind, region in enumerate(line_regions):
        gt_image[
            (region['y1'] - min_y1 + buffer):(region['y2'] - min_y1 + buffer),
            region['x1']:region['x2']
        ] = ind % 2 + 1

    # Generate image for debugging
    import matplotlib.pyplot as plt
    cmap = plt.get_cmap('Set1')
    image_crop_for_debug = np.dstack([image_crop, image_crop, image_crop])
    for ind, region in enumerate(line_regions):
        color = [255 * _ for _ in cmap(ind)[:-1]]
        cv2.rectangle(
            image_crop_for_debug,
            (region['x1'], region['y1'] - min_y1 + buffer),
            (region['x2'], region['y2'] - min_y1 + buffer),
            color,
            3
        )
    image_crop_for_debug = cv2.resize(image_crop_for_debug, final_dims, interpolation=cv2.INTER_AREA)
    util.write_image(image_crop_for_debug, DEBUG_CROPS_DIRNAME / f'{filename.stem}.jpg')

    image_crop = cv2.resize(image_crop, final_dims, interpolation=cv2.INTER_AREA)  # Quality interpolation for input
    util.write_image(image_crop, CROPS_DIRNAME / f'{filename.stem}.jpg')

    gt_image = cv2.resize(gt_image, final_dims, interpolation=cv2.INTER_NEAREST)  # No interpolation for labels
    util.write_image(gt_image, GT_DIRNAME / f'{filename.stem}.png')
Example #15
0
    def predict(self, image_or_filename: Union[np.ndarray, str]):
        """
        Take an image and return all the text in it.
        """
        if isinstance(image_or_filename, str):
            image = util.read_image(image_or_filename, grayscale=True)
        else:
            image = image_or_filename

        line_region_crops = self._get_line_region_crops(image=image)
        print([a.shape for a in line_region_crops])
        prepared_line_region_crops = [
            self._prepare_image_for_line_predictor_model(image=crop) for crop in line_region_crops
        ]

        line_region_strings = [
            self.line_predictor_model.predict_on_image(crop)[0] for crop in prepared_line_region_crops
        ]
        return " ".join(line_region_strings), line_region_crops
    def _decide_on_crop_dims(self):
        """
        Decide on the dimensions to crop out of the form image.
        Since image width is larger than a comfortable crop around the longest paragraph,
        we will make the crop a square form factor.

        And since the found dimensions 610x610 are pretty close to 512x512,
        we might as well resize crops and make it exactly that, which lets us
        do all kinds of power-of-2 pooling and upsampling should we choose to.
        """
        sample_form_filename = self.iam_dataset.form_filenames[0]
        sample_image = util.read_image(sample_form_filename, grayscale=True)
        max_crop_width = sample_image.shape[1]
        max_crop_height = _get_max_paragraph_crop_height(self.iam_dataset.line_regions_by_id)
        assert max_crop_height <= max_crop_width
        crop_dims = (max_crop_width, max_crop_width)
        print(f'Max crop width and height were found to be {max_crop_width}x{max_crop_height}.')
        print(f'Setting them to {max_crop_width}x{max_crop_width}')
        return crop_dims
 def test_filename(self) -> None:
     """Test model on support image."""
     line_predictor_args = {
         "dataset": "EmnistLineDataset",
         "network_fn": "CNNTransformer",
     }
     line_detector_args = {
         "dataset": "EmnistLineDataset",
         "network_fn": "UNet"
     }
     model = ParagraphTextRecognizor(
         line_predictor_args=line_predictor_args,
         line_detector_args=line_detector_args,
     )
     num_text_lines_by_name = {"a01-000u-cropped": 7}
     for filename in (SUPPORT_DIRNAME).glob("*.jpg"):
         full_image = util.read_image(str(filename), grayscale=True)
         predicted_text, line_region_crops = model.predict(full_image)
         print(predicted_text)
         self.assertTrue(len(line_region_crops),
                         num_text_lines_by_name[filename.stem])