def main(): args = parse_arguments() SKIPPING = True # skip to the first page not containing editted anotations assert args.page_dir is not None or args.output_dir is not None, "Specify input page folder and/or output page folder" assert os.path.exists(args.image_dir), "Can't find input image folder" if args.page_dir is not None: assert os.path.exists(args.page_dir), "Can't find input page folder" if args.output_dir is not None: if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) filename_list = [x for x in os.listdir(args.image_dir)] editor = PageEditor(downsample=2, line_thickness=2, show_hint=True, cursor=0) while editor.cursor > -1 and editor.cursor < len(filename_list): print('cursor: ', editor.cursor) page_filename = os.path.splitext( filename_list[editor.cursor])[0] + '.xml' if not os.path.exists(os.path.join(args.page_dir, page_filename)): editor.cursor += 1 print('Cannot find PAGE XML in expected location {}'.format( os.path.join(args.page_dir, page_filename))) continue cur_image = cv2.imread( os.path.join(args.image_dir, filename_list[editor.cursor])) if args.output_dir is not None and os.path.exists( os.path.join(args.output_dir, page_filename)): if SKIPPING: editor.cursor += 1 continue cur_layout = layout.PageLayout( file=os.path.join(args.output_dir, page_filename)) elif args.page_dir is not None and os.path.exists( os.path.join(args.page_dir, page_filename)): cur_layout = layout.PageLayout( file=os.path.join(args.page_dir, page_filename)) else: cur_layout = layout.PageLayout( id=os.path.splitext(filename_list[editor.cursor])[0], page_size=(cur_image.shape[1], cur_image.shape[0])) edited_layout = editor.annotate(cur_image, cur_layout) if args.output_dir is not None: cur_layout.to_pagexml(os.path.join(args.output_dir, page_filename)) else: cur_layout.to_pagexml(os.path.join(args.page_dir, page_filename))
def main(): args = parseargs() if not os.path.isdir(args.output_path): os.makedirs(args.output_path) print('Loading engines...') enhancer = repair_engine.EngineRepairCNN(args.repair_json) if args.parse_config is not None: config = configparser.ConfigParser() config.read(args.parse_config) # convert relative paths to absolute for section, key in [['LINE_PARSER', 'MODEL_PATH'], ['OCR', 'OCR_JSON']]: if not os.path.isabs(config[section][key]): config[section][key] = os.path.realpath( os.path.join(os.path.dirname(args.parse_config), config[section][key])) parser = page_parser.PageParser(config) else: parser = None for filename in os.listdir(args.input_images): if os.path.splitext(filename)[1].lower() in ['.jpg', '.png', '.tif']: page_img = cv2.imread(os.path.join(args.input_images, filename)) page_id, _ = os.path.splitext(filename) page_xml_file = os.path.join(args.input_page, page_id + '.xml') if os.path.exists(page_xml_file): page_layout = layout.PageLayout(file=page_xml_file) elif not os.path.exists(page_xml_file) and parser is not None: print( 'Page xml file for page {} not found, running automatic parser...' .format(page_id)) page_layout = layout.PageLayout(id=page_id, page_size=(page_img.shape[0], page_img.shape[1])) page_layout = parser.process_page(page_img, page_layout) page_layout.to_pagexml(page_xml_file) else: raise Exception( 'Page xml file for page {} not found and automatic page parser config not specified.' .format(page_id)) page_img = enhancer.enhance_page(page_img, page_layout) cv2.imwrite( os.path.join(args.output_path, '{}_enhanced.jpg'.format(page_id)), page_img)
def main(): from pero_ocr.document_ocr import layout import matplotlib.pyplot as plt page_img = cv2.imread( '../../../example/82f4ac84-6f1e-43ba-b1d5-e2b28d69508d.jpg') page_layout = layout.PageLayout( file='../../../example/82f4ac84-6f1e-43ba-b1d5-e2b28d69508d.xml') cropper = EngineLineCropper(line_height=48, poly=2, scale=1) cropped_line, mapping = cropper.crop( page_img, page_layout.regions[2].lines[0].baseline, page_layout.regions[2].lines[0].heights, return_mapping=True) back_mapped = cropper.blend_in(page_img, cropped_line, mapping) plt.subplot(131) plt.imshow(cropped_line) plt.subplot(132) plt.imshow(back_mapped[410:420, 1200:1240]) plt.subplot(133) plt.imshow( np.concatenate((mapping, mapping[:, :, :1]), axis=2).astype(np.int)[410:420, 1200:1240]) plt.show()
def main(): args = parse_arguments() assert args.page_dir is not None or args.output_dir is not None, "Specify input page folder and/or output page folder" assert os.path.exists(args.image_dir), "Can't find input image folder" if args.page_dir is not None: assert os.path.exists(args.page_dir), "Can't find input page folder" if args.output_dir is not None: if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) filename_list = [x for x in os.listdir(args.image_dir)] editor = PageEditor(downsample=2, line_thickness=2, show_hint=True, cursor=0) while editor.cursor > -1 and editor.cursor < len(filename_list): page_filename = os.path.splitext( filename_list[editor.cursor])[0] + '.xml' cur_image = cv2.imread( os.path.join(args.image_dir, filename_list[editor.cursor])) if args.output_dir is not None and os.path.exists( os.path.join(args.output_dir, page_filename)): cur_layout = layout.PageLayout( file=os.path.join(args.output_dir, page_filename)) elif args.page_dir is not None and os.path.exists( os.path.join(args.page_dir, page_filename)): cur_layout = layout.PageLayout( file=os.path.join(args.page_dir, page_filename)) else: cur_layout = layout.PageLayout( id=os.path.splitext(filename_list[editor.cursor])[0], page_size=(cur_image.shape[1], cur_image.shape[0])) edited_layout = editor.annotate(cur_image, cur_layout) if args.output_dir is not None: cur_layout.to_pagexml(os.path.join(args.output_dir, page_filename)) else: cur_layout.to_pagexml(os.path.join(args.page_dir, page_filename))
f'Error: Failed to detect lines in adapted resolution. Downsample was {self.downsample}', file=sys.stderr) height = np.median([h[0] + h[1] for h in heights_list]) print( f"OPTIMAL DOWNAMPLING {img.shape[0]//self.downsample}:{img.shape[1]//self.downsample}", self.downsample, height, height / self.downsample) self.downsample = temp_downsample return baselines_list, heights_list, textlines_list if __name__ == '__main__': from pero_ocr.document_ocr import layout test_layout = layout.PageLayout(id='test') test_p = layout.RegionLayout('r', np.zeros((4, 2))) engine_instance = EngineLineDetectorCNN( '/mnt/matylda1/hradis/PERO/layout_engines/baselines_prod/parsenet_multi_alldata_fix_exported' ) image = cv2.imread( '/mnt/matylda1/ikodym/junk/refactor_test/8e41ecc2-57ed-412a-aa4f-d945efa7c624.jpg' ) baselines, heights, textlines = engine_instance.detect_lines(image) for baseline, height, textline in zip(baselines, heights, textlines): new_textline = layout.TextLine(baseline=baseline, heights=height, polygon=textline) test_p.lines.append(new_textline)
def main(): args = parseargs() page_img = cv2.imread(args.input_img) page_layout = layout.PageLayout(file=args.input_page) page_img_orig = page_img.copy() page_img_rendered = page_img.copy() print('\nLoading engines...') ocr_engine = ocr.EngineLineOCR(args.ocr_json, gpu_id=0) repair_engine = repair.EngineRepairCNN(args.repair_json) crop_engine = cropper.EngineLineCropper(line_height=repair_engine.height, poly=2, scale=1) cv2.namedWindow("Page Editor", cv2.WINDOW_NORMAL) cv2.resizeWindow('Page Editor', 1024, 1024) layout_clicker = LayoutClicker(page_layout) cv2.setMouseCallback("Page Editor", layout_clicker.callback) while True: page_img_rendered = page_img.copy() if layout_clicker.chosen_line: page_img_rendered = layout.draw_lines( page_img_rendered, [layout_clicker.chosen_line.polygon], color=(0, 255, 0), close=True) if layout_clicker.points: page_img_rendered = layout.draw_lines(page_img_rendered, [layout_clicker.points], color=(0, 0, 255)) cv2.imshow('Page Editor', page_img_rendered) key = cv2.waitKey(1) if key == ord('q'): break elif key == ord('r'): text_input = TextInputRepair( layout_clicker.chosen_line.transcription) action, new_transcription = text_input.run() layout_clicker.chosen_line.transcription = new_transcription if action == 'repair': line_crop, line_mapping, offset = crop_engine.crop( page_img, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) line_crop = repair_engine.repair_line( line_crop, layout_clicker.chosen_line.transcription) page_img = crop_engine.blend_in(page_img, line_crop, line_mapping, offset) page_img_rendered = page_img.copy() elif action == 'revert': line_crop, line_mapping, offset = crop_engine.crop( page_img_orig, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) page_img = crop_engine.blend_in(page_img, line_crop, line_mapping, offset) page_img_rendered = page_img.copy() elif key == ord('e') and len(layout_clicker.points) == 2: line_crop, line_mapping, offset = crop_engine.crop( page_img, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) y1 = np.round(line_mapping[line_mapping.shape[0] // 2, layout_clicker.points[0][1] - offset[1], 1]).astype(np.uint16) y2 = np.round( line_mapping[line_mapping.shape[0] // 2, np.clip(layout_clicker.points[1][1] - offset[1], 0, line_mapping.shape[1] - 2), 1]).astype(np.uint16) if layout_clicker.points[1][1] - offset[1] > line_mapping.shape[ 1] - 10: # dirty fix noisy values at the end of coord map y2 = np.amax(line_mapping[:, :, 1].astype(np.uint16)) print('{}/{}'.format(y2, line_crop.shape[1])) transcriptions, _, _ = ocr_engine.process_lines([ line_crop[:, :np.minimum(y1, y2), :], line_crop[:, np.maximum(y1, y2):, :] ]) line_crop[:, np.minimum(y1, y2):np.maximum(y1, y2), :] = 0 text_input = TextInputInpaint(transcriptions[0], transcriptions[1]) action, new_transcription = text_input.run() if action == 'inpaint': layout_clicker.chosen_line.transcription = new_transcription line_crop = repair_engine.inpaint_line( line_crop, layout_clicker.chosen_line.transcription) page_img = crop_engine.blend_in(page_img, line_crop, line_mapping, offset) line_crop, line_mapping, offset = crop_engine.crop( page_img, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) layout_clicker.points = []
def main(): args = parseargs() page_img = cv2.imread(args.input_img) page_img_orig = page_img.copy() page_img_rendered = page_img.copy() print('\nLoading engines...') if args.parse_config is not None: config = configparser.ConfigParser() config.read(args.parse_config) # convert relative paths to absolute for section, key in [['LINE_PARSER', 'MODEL_PATH'], ['OCR', 'OCR_JSON']]: if not os.path.isabs(config[section][key]): config[section][key] = os.path.realpath( os.path.join(os.path.dirname(args.parse_config), config[section][key])) parser = page_parser.PageParser(config) else: parser = None enhancer = repair_engine.EngineRepairCNN(args.repair_json) print('Loading page layout...') if os.path.exists(args.input_page): page_layout = layout.PageLayout(file=args.input_page) elif not os.path.exists(args.input_page) and parser is not None: print('Page xml file not found, running automatic parser...') page_layout = layout.PageLayout(id='id_placeholder', page_size=(page_img_orig.shape[0], page_img_orig.shape[1])) page_layout = parser.process_page(page_img_orig, page_layout) if not os.path.exists('./output_pages'): os.makedirs('./output_pages') file_name = os.path.splitext(os.path.split(args.input_img)[1])[0] page_layout.to_pagexml( os.path.join('./output_pages', '{}.xml'.format(file_name))) else: raise Exception( 'Page xml file not found and automatic page parser config not specified.' ) print('\n\n Welcome to the page enhancement interactive demo.') print( 'After choosing a line by double-clicking, you can enhance it by pressing r key. After that, you can change individual parts of the text by selecting the area and pressing e key.' ) cv2.namedWindow("Page Editor", cv2.WINDOW_NORMAL) cv2.resizeWindow('Page Editor', 1024, 1024) layout_clicker = LayoutClicker(page_layout) cv2.setMouseCallback("Page Editor", layout_clicker.callback) while True: page_img_rendered = page_img.copy() if layout_clicker.chosen_line: page_img_rendered = layout.draw_lines( page_img_rendered, [layout_clicker.chosen_line.polygon], color=(0, 255, 0), close=True) if layout_clicker.points: page_img_rendered = layout.draw_lines(page_img_rendered, [layout_clicker.points], color=(0, 0, 255)) cv2.imshow('Page Editor', page_img_rendered) key = cv2.waitKey(1) if key == ord('q'): break elif key == ord('r'): text_input = TextInputRepair( layout_clicker.chosen_line.transcription) action, new_transcription = text_input.run() layout_clicker.chosen_line.transcription = new_transcription if action == 'repair': page_img = enhancer.enhance_line_in_page( page_img, layout_clicker.chosen_line) page_img_rendered = page_img.copy() elif action == 'revert': line_crop, line_mapping, offset = enhancer.cropper.crop( page_img_orig, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) page_img = enhancer.cropper.blend_in(page_img, line_crop, line_mapping, offset) page_img_rendered = page_img.copy() elif key == ord('e') and len(layout_clicker.points) == 2: line_crop, line_mapping, offset = enhancer.cropper.crop( page_img, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) y1 = np.round(line_mapping[line_mapping.shape[0] // 2, layout_clicker.points[0][0] - offset[1], 0]).astype(np.uint16) y2 = np.round( line_mapping[line_mapping.shape[0] // 2, np.clip(layout_clicker.points[1][0] - offset[1], 0, line_mapping.shape[1] - 2), 0]).astype(np.uint16) transcriptions, _ = parser.ocr.ocr_engine.process_lines([ line_crop[:, :np.minimum(y1, y2), :], line_crop[:, np.maximum(y1, y2):, :] ]) line_crop[:, np.minimum(y1, y2):np.maximum(y1, y2), :] = 0 text_input = TextInputInpaint(transcriptions[0], transcriptions[1]) action, new_transcription = text_input.run() if action == 'inpaint': layout_clicker.chosen_line.transcription = new_transcription line_crop = enhancer.inpaint_line( line_crop, layout_clicker.chosen_line.transcription) page_img = enhancer.cropper.blend_in(page_img, line_crop, line_mapping, offset) line_crop, line_mapping, offset = enhancer.cropper.crop( page_img, layout_clicker.chosen_line.baseline, layout_clicker.chosen_line.heights, return_mapping=True) layout_clicker.points = []