def test_filename(): # pylint: disable=R0201 predictor = ParagraphTextRecognizer() num_text_lines_by_name = {"a01-000u-cropped": 7} for filename in (SUPPORT_DIRNAME).glob("*.jpg"): full_image = util.read_image(str(filename), grayscale=True) predicted_text, line_region_crops = predictor.predict(full_image) print(filename, ':', predicted_text)
def _decide_on_crop_dims(self) -> Tuple[int, int]: """Decide on the dimensions to crop out of the form image. Since image width is larger than a comfortable crop around the longest paragraph, we will make the crop a square form factor. And since the found dimensions 610x610 are pretty close to 512x512, we might as well resize crops and make it exactly that, which lets us do all kinds of power-of-2 pooling and upsampling should we choose to. Returns: Tuple[int, int]: A tuple of crop dimensions. Raises: RuntimeError: When max crop height is larger than max crop width. """ sample_form_filename = self.iam_dataset.form_filenames[0] sample_image = util.read_image(sample_form_filename, grayscale=True) max_crop_width = sample_image.shape[1] max_crop_height = _get_max_paragraph_crop_height( self.iam_dataset.line_regions_by_id) if not max_crop_height <= max_crop_width: raise RuntimeError( f"Max crop height is larger then max crop width: {max_crop_height} >= {max_crop_width}" ) crop_dims = (max_crop_width, max_crop_width) logger.info( f"Max crop width and height were found to be {max_crop_width}x{max_crop_height}." ) logger.info(f"Setting them to {max_crop_width}x{max_crop_width}") return crop_dims
def predict( self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: if isinstance(image_or_filename, str): image = util.read_image(image_or_filename, grayscale=True) else: image = image_or_filename return self.model.predict_on_image(image)
def test_filename(self): predictor = ParagraphTextRecognizer() num_text_lines_by_name = {'a01-000u-cropped': 7} for filename in (SUPPORT_DIRNAME).glob('*.jpg'): full_image = util.read_image(str(filename), grayscale=True) predicted_text, line_region_crops = predictor.predict(full_image) print(f"ouptut text {predicted_text}") assert len(line_region_crops) == num_text_lines_by_name[ filename.stem]
def predict( self, image_or_filename: Union[np.ndarray, str]) -> Tuple[str, float]: """Predict on a single images contianing a handwritten character.""" if isinstance(image_or_filename, str): image = read_image(image_or_filename, grayscale=True) else: image = image_or_filename return self.model.predict_on_image(image)
def _load_iam_paragraphs(): print('Loading IAM paragraph crops and ground truth from image files...') images = [] gt_images = [] ids = [] for filename in CROPS_DIRNAME.glob('*.jpg'): id_ = filename.stem image = util.read_image(filename, grayscale=True) image = 1. - image / 255 gt_filename = GT_DIRNAME / f'{id_}.png' gt_image = util.read_image(gt_filename, grayscale=True) images.append(image) gt_images.append(gt_image) ids.append(id_) images = np.array(images).astype(np.float32) gt_images = util.to_categorical(np.array(gt_images), 3).astype(np.uint8) return images, gt_images, np.array(ids)
def _crop_paragraph_image(filename: str, line_regions: Dict, crop_dims: Tuple[int, int], final_dims: Tuple) -> None: image = util.read_image(filename, grayscale=True) min_y1 = min(region["y1"] for region in line_regions) - PARAGRAPH_BUFFER max_y2 = max(region["y2"] for region in line_regions) + PARAGRAPH_BUFFER height = max_y2 - min_y1 crop_height = crop_dims[0] buffer = (crop_height - height) // 2 # Generate image crop. image_crop = 255 * np.ones(crop_dims, dtype=np.uint8) try: image_crop[buffer:buffer + height] = image[min_y1:max_y2] except Exception as e: # pylint: disable=broad-except logger.error(f"Rescued {filename}: {e}") return # Generate ground truth. gt_image = np.zeros_like(image_crop, dtype=np.uint8) for index, region in enumerate(line_regions): gt_image[(region["y1"] - min_y1 + buffer):(region["y2"] - min_y1 + buffer), region["x1"]:region["x2"], ] = (index % 2 + 1) # Generate image for debugging. import matplotlib.pyplot as plt cmap = plt.get_cmap("Set1") image_crop_for_debug = np.dstack([image_crop, image_crop, image_crop]) for index, region in enumerate(line_regions): color = [255 * _ for _ in cmap(index)[:-1]] cv2.rectangle( image_crop_for_debug, (region["x1"], region["y1"] - min_y1 + buffer), (region["x2"], region["y2"] - min_y1 + buffer), color, 3, ) image_crop_for_debug = cv2.resize(image_crop_for_debug, final_dims, interpolation=cv2.INTER_AREA) util.write_image(image_crop_for_debug, DEBUG_CROPS_DIRNAME / f"{filename.stem}.jpg") image_crop = cv2.resize(image_crop, final_dims, interpolation=cv2.INTER_AREA) util.write_image(image_crop, CROPS_DIRNAME / f"{filename.stem}.jpg") gt_image = cv2.resize(gt_image, final_dims, interpolation=cv2.INTER_NEAREST) util.write_image(gt_image, GT_DIRNAME / f"{filename.stem}.png")
def _load_iam_paragraphs() -> None: logger.info( "Loading IAM paragraph crops and ground truth from image files...") images = [] gt_images = [] ids = [] for filename in CROPS_DIRNAME.glob("*.jpg"): id_ = filename.stem image = util.read_image(filename, grayscale=True) image = 1.0 - image / 255 gt_filename = GT_DIRNAME / f"{id_}.png" gt_image = util.read_image(gt_filename, grayscale=True) images.append(image) gt_images.append(gt_image) ids.append(id_) images = np.array(images).astype(np.float32) gt_images = np.array(gt_images).astype(np.uint8) ids = np.array(ids) return images, gt_images, ids
def test_filename(self): predictor = LinePredictor() for filename in SUPPORT_DIRNAME.glob('*.png'): image = util.read_image(str(filename), grayscale=True) print('Saved image shape:', image.shape) image = image[:, :-np.random.randint(1, 150)] # pylint: disable=invalid-unary-operand-type print('Randomly cropped image shape:', image.shape) pred, conf = predictor.predict(image) true = str(filename.stem) edit_distance = editdistance.eval(pred, true) / len(pred) print(f'Pred: "{pred}" | Confidence: {conf} | True: {true} | Edit distance: {edit_distance}') self.assertLess(edit_distance, 0.2)
def _crop_paragraph_image(filename, line_regions, crop_dims, final_dims): # pylint: disable=too-many-locals image = util.read_image(filename, grayscale=True) min_y1 = min(r["y1"] for r in line_regions) - PARAGRAPH_BUFFER max_y2 = max(r["y2"] for r in line_regions) + PARAGRAPH_BUFFER height = max_y2 - min_y1 crop_height = crop_dims[0] buffer = (crop_height - height) // 2 # Generate image crop image_crop = 255 * np.ones(crop_dims, dtype=np.uint8) try: image_crop[buffer:buffer + height] = image[min_y1:max_y2] except Exception as e: # pylint: disable=broad-except print(f"Rescued {filename}: {e}") return # Generate ground truth gt_image = np.zeros_like(image_crop, dtype=np.uint8) for ind, region in enumerate(line_regions): gt_image[(region["y1"] - min_y1 + buffer):(region["y2"] - min_y1 + buffer), region["x1"]:region["x2"]] = (ind % 2 + 1) # Generate image for debugging import matplotlib.pyplot as plt # pylint: disable=import-outside-toplevel cmap = plt.get_cmap("Set1") image_crop_for_debug = np.dstack([image_crop, image_crop, image_crop]) for ind, region in enumerate(line_regions): color = [255 * _ for _ in cmap(ind)[:-1]] cv2.rectangle( image_crop_for_debug, (region["x1"], region["y1"] - min_y1 + buffer), (region["x2"], region["y2"] - min_y1 + buffer), color, 3, ) image_crop_for_debug = cv2.resize(image_crop_for_debug, final_dims, interpolation=cv2.INTER_AREA) util.write_image(image_crop_for_debug, DEBUG_CROPS_DIRNAME / f"{filename.stem}.jpg") image_crop = cv2.resize( image_crop, final_dims, interpolation=cv2.INTER_AREA) # Quality interpolation for input util.write_image(image_crop, CROPS_DIRNAME / f"{filename.stem}.jpg") gt_image = cv2.resize( gt_image, final_dims, interpolation=cv2.INTER_NEAREST) # No interpolation for labels util.write_image(gt_image, GT_DIRNAME / f"{filename.stem}.png")
def _load_image(): if request.method == 'POST': data = request.get_json() if data is None: return 'no json received' return util.read_b64_image(data['image'], grayscale=True) elif request.method == 'GET': image_url = request.args.get('image_url') if image_url is None: return 'no image_url defined in query string' return util.read_image(image_url, grayscale=True) else: raise ValueError('Unsupported HTTP method')
def _load_image(): if request.method == "POST": data = request.get_json() if data is None: return "no json received" return util.read_b64_image(data["image"], grayscale=True) if request.method == "GET": image_url = request.args.get("image_url") if image_url is None: return "no image_url defined in query string" print("INFO url {}".format(image_url)) return util.read_image(image_url, grayscale=True) raise ValueError("Unsupported HTTP method")
def predict(self, image_or_filename: Union[str, np.ndarray]) -> Tuple: """Takes an image and returns all text within it.""" image = (read_image(image_or_filename) if isinstance( image_or_filename, str) else image_or_filename) line_region_crops = self._get_line_region_crops(image) processed_line_region_crops = [ self._process_image_for_line_predictor(image=crop) for crop in line_region_crops ] line_region_strings = [ self.line_predictor_model.predict_on_image(crop)[0] for crop in processed_line_region_crops ] return " ".join(line_region_strings), line_region_crops
def _crop_paragraph_image(filename, line_regions, crop_dims, final_dims): image = util.read_image(filename, grayscale=True) min_y1 = min(r['y1'] for r in line_regions) - PARAGRAPH_BUFFER max_y2 = max(r['y2'] for r in line_regions) + PARAGRAPH_BUFFER height = max_y2 - min_y1 crop_height = crop_dims[0] buffer = (crop_height - height) // 2 # Generate image crop image_crop = 255 * np.ones(crop_dims, dtype=np.uint8) try: image_crop[buffer:buffer + height] = image[min_y1:max_y2] except Exception as e: print(f'Rescued {filename}: {e}') return # Generate ground truth gt_image = np.zeros_like(image_crop, dtype=np.uint8) for ind, region in enumerate(line_regions): gt_image[ (region['y1'] - min_y1 + buffer):(region['y2'] - min_y1 + buffer), region['x1']:region['x2'] ] = ind % 2 + 1 # Generate image for debugging import matplotlib.pyplot as plt cmap = plt.get_cmap('Set1') image_crop_for_debug = np.dstack([image_crop, image_crop, image_crop]) for ind, region in enumerate(line_regions): color = [255 * _ for _ in cmap(ind)[:-1]] cv2.rectangle( image_crop_for_debug, (region['x1'], region['y1'] - min_y1 + buffer), (region['x2'], region['y2'] - min_y1 + buffer), color, 3 ) image_crop_for_debug = cv2.resize(image_crop_for_debug, final_dims, interpolation=cv2.INTER_AREA) util.write_image(image_crop_for_debug, DEBUG_CROPS_DIRNAME / f'{filename.stem}.jpg') image_crop = cv2.resize(image_crop, final_dims, interpolation=cv2.INTER_AREA) # Quality interpolation for input util.write_image(image_crop, CROPS_DIRNAME / f'{filename.stem}.jpg') gt_image = cv2.resize(gt_image, final_dims, interpolation=cv2.INTER_NEAREST) # No interpolation for labels util.write_image(gt_image, GT_DIRNAME / f'{filename.stem}.png')
def predict(self, image_or_filename: Union[np.ndarray, str]): """ Take an image and return all the text in it. """ if isinstance(image_or_filename, str): image = util.read_image(image_or_filename, grayscale=True) else: image = image_or_filename line_region_crops = self._get_line_region_crops(image=image) print([a.shape for a in line_region_crops]) prepared_line_region_crops = [ self._prepare_image_for_line_predictor_model(image=crop) for crop in line_region_crops ] line_region_strings = [ self.line_predictor_model.predict_on_image(crop)[0] for crop in prepared_line_region_crops ] return " ".join(line_region_strings), line_region_crops
def _decide_on_crop_dims(self): """ Decide on the dimensions to crop out of the form image. Since image width is larger than a comfortable crop around the longest paragraph, we will make the crop a square form factor. And since the found dimensions 610x610 are pretty close to 512x512, we might as well resize crops and make it exactly that, which lets us do all kinds of power-of-2 pooling and upsampling should we choose to. """ sample_form_filename = self.iam_dataset.form_filenames[0] sample_image = util.read_image(sample_form_filename, grayscale=True) max_crop_width = sample_image.shape[1] max_crop_height = _get_max_paragraph_crop_height(self.iam_dataset.line_regions_by_id) assert max_crop_height <= max_crop_width crop_dims = (max_crop_width, max_crop_width) print(f'Max crop width and height were found to be {max_crop_width}x{max_crop_height}.') print(f'Setting them to {max_crop_width}x{max_crop_width}') return crop_dims
def test_filename(self) -> None: """Test model on support image.""" line_predictor_args = { "dataset": "EmnistLineDataset", "network_fn": "CNNTransformer", } line_detector_args = { "dataset": "EmnistLineDataset", "network_fn": "UNet" } model = ParagraphTextRecognizor( line_predictor_args=line_predictor_args, line_detector_args=line_detector_args, ) num_text_lines_by_name = {"a01-000u-cropped": 7} for filename in (SUPPORT_DIRNAME).glob("*.jpg"): full_image = util.read_image(str(filename), grayscale=True) predicted_text, line_region_crops = model.predict(full_image) print(predicted_text) self.assertTrue(len(line_region_crops), num_text_lines_by_name[filename.stem])