def __init__(self, config, config_path=''): json_file = compose_path(config['OCR_JSON'], config_path) if 'METHOD' in config and config['METHOD'] == 'pytorch_ocr': from pero_ocr.ocr_engine.pytorch_ocr_engine import PytorchEngineLineOCR self.ocr_engine = PytorchEngineLineOCR(json_file, gpu_id=0) else: self.ocr_engine = line_ocr_engine.EngineLineOCR(json_file, gpu_id=0)
def main(): args = parse_arguments() ocr_json = args.ocr_json os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # suppress tensorflow warnings on loading models ocr_engine = ocr.EngineLineOCR(ocr_json, gpu_id=0) lines, names = read_images(args.input) _, logits = ocr_engine.process_lines(lines) complete_data = {'names': names, 'logits': logits} with open(args.output, 'wb') as f: pickle.dump(complete_data, f)
def __init__(self, config): json_file = config['OCR_JSON'] self.ocr_engine = line_ocr_engine.EngineLineOCR(json_file, gpu_id=0)
def main(): args = parseargs() page_img = cv2.imread(args.input_img) page_layout = layout.PageLayout(file=args.input_page) page_img_orig = page_img.copy() page_img_rendered = page_img.copy() print('\nLoading engines...') ocr_engine = ocr.EngineLineOCR(args.ocr_json, gpu_id=0) repair_engine = repair.EngineRepairCNN(args.repair_json) crop_engine = cropper.EngineLineCropper(line_height=repair_engine.height, poly=2, scale=1) cv2.namedWindow("Page Editor", cv2.WINDOW_NORMAL) cv2.resizeWindow('Page Editor', 1024, 1024) layout_clicker = LayoutClicker(page_layout) cv2.setMouseCallback("Page Editor", layout_clicker.callback) while True: page_img_rendered = page_img.copy() if layout_clicker.chosen_line: page_img_rendered = layout.draw_lines( page_img_rendered, [layout_clicker.chosen_line.polygon], color=(0, 255, 0), close=True) if layout_clicker.points: page_img_rendered = layout.draw_lines(page_img_rendered, [layout_clicker.points], color=(0, 0, 255)) cv2.imshow('Page Editor', page_img_rendered) key = cv2.waitKey(1) if key == ord('q'): break elif key == ord('r'): text_input = TextInputRepair( layout_clicker.chosen_line.transcription) action, new_transcription = text_input.run() layout_clicker.chosen_line.transcription = new_transcription if action == 'repair': line_crop, line_mapping, offset = crop_engine.crop( page_img, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) line_crop = repair_engine.repair_line( line_crop, layout_clicker.chosen_line.transcription) page_img = crop_engine.blend_in(page_img, line_crop, line_mapping, offset) page_img_rendered = page_img.copy() elif action == 'revert': line_crop, line_mapping, offset = crop_engine.crop( page_img_orig, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) page_img = crop_engine.blend_in(page_img, line_crop, line_mapping, offset) page_img_rendered = page_img.copy() elif key == ord('e') and len(layout_clicker.points) == 2: line_crop, line_mapping, offset = crop_engine.crop( page_img, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) y1 = np.round(line_mapping[line_mapping.shape[0] // 2, layout_clicker.points[0][1] - offset[1], 1]).astype(np.uint16) y2 = np.round( line_mapping[line_mapping.shape[0] // 2, np.clip(layout_clicker.points[1][1] - offset[1], 0, line_mapping.shape[1] - 2), 1]).astype(np.uint16) if layout_clicker.points[1][1] - offset[1] > line_mapping.shape[ 1] - 10: # dirty fix noisy values at the end of coord map y2 = np.amax(line_mapping[:, :, 1].astype(np.uint16)) print('{}/{}'.format(y2, line_crop.shape[1])) transcriptions, _, _ = ocr_engine.process_lines([ line_crop[:, :np.minimum(y1, y2), :], line_crop[:, np.maximum(y1, y2):, :] ]) line_crop[:, np.minimum(y1, y2):np.maximum(y1, y2), :] = 0 text_input = TextInputInpaint(transcriptions[0], transcriptions[1]) action, new_transcription = text_input.run() if action == 'inpaint': layout_clicker.chosen_line.transcription = new_transcription line_crop = repair_engine.inpaint_line( line_crop, layout_clicker.chosen_line.transcription) page_img = crop_engine.blend_in(page_img, line_crop, line_mapping, offset) line_crop, line_mapping, offset = crop_engine.crop( page_img, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) layout_clicker.points = []