Example #1
0
 def __init__(self, config, config_path=''):
     json_file = compose_path(config['OCR_JSON'], config_path)
     if 'METHOD' in config and config['METHOD'] == 'pytorch_ocr':
         from pero_ocr.ocr_engine.pytorch_ocr_engine import PytorchEngineLineOCR
         self.ocr_engine = PytorchEngineLineOCR(json_file, gpu_id=0)
     else:
         self.ocr_engine = line_ocr_engine.EngineLineOCR(json_file,
                                                         gpu_id=0)
def main():
    args = parse_arguments()

    ocr_json = args.ocr_json

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # suppress tensorflow warnings on loading models
    ocr_engine = ocr.EngineLineOCR(ocr_json, gpu_id=0)

    lines, names = read_images(args.input)
    _, logits = ocr_engine.process_lines(lines)

    complete_data = {'names': names, 'logits': logits}

    with open(args.output, 'wb') as f:
        pickle.dump(complete_data, f)
Example #3
0
 def __init__(self, config):
     json_file = config['OCR_JSON']
     self.ocr_engine = line_ocr_engine.EngineLineOCR(json_file, gpu_id=0)
Example #4
0
def main():

    args = parseargs()

    page_img = cv2.imread(args.input_img)
    page_layout = layout.PageLayout(file=args.input_page)

    page_img_orig = page_img.copy()
    page_img_rendered = page_img.copy()

    print('\nLoading engines...')
    ocr_engine = ocr.EngineLineOCR(args.ocr_json, gpu_id=0)
    repair_engine = repair.EngineRepairCNN(args.repair_json)
    crop_engine = cropper.EngineLineCropper(line_height=repair_engine.height,
                                            poly=2,
                                            scale=1)

    cv2.namedWindow("Page Editor", cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Page Editor', 1024, 1024)
    layout_clicker = LayoutClicker(page_layout)
    cv2.setMouseCallback("Page Editor", layout_clicker.callback)

    while True:
        page_img_rendered = page_img.copy()
        if layout_clicker.chosen_line:
            page_img_rendered = layout.draw_lines(
                page_img_rendered, [layout_clicker.chosen_line.polygon],
                color=(0, 255, 0),
                close=True)
        if layout_clicker.points:
            page_img_rendered = layout.draw_lines(page_img_rendered,
                                                  [layout_clicker.points],
                                                  color=(0, 0, 255))

        cv2.imshow('Page Editor', page_img_rendered)
        key = cv2.waitKey(1)

        if key == ord('q'):
            break

        elif key == ord('r'):
            text_input = TextInputRepair(
                layout_clicker.chosen_line.transcription)
            action, new_transcription = text_input.run()
            layout_clicker.chosen_line.transcription = new_transcription

            if action == 'repair':
                line_crop, line_mapping, offset = crop_engine.crop(
                    page_img,
                    layout_clicker.chosen_line.baseline,
                    layout_clicker.chosen_line.heights,
                    return_mapping=True)
                line_crop = repair_engine.repair_line(
                    line_crop, layout_clicker.chosen_line.transcription)
                page_img = crop_engine.blend_in(page_img, line_crop,
                                                line_mapping, offset)
                page_img_rendered = page_img.copy()

            elif action == 'revert':
                line_crop, line_mapping, offset = crop_engine.crop(
                    page_img_orig,
                    layout_clicker.chosen_line.baseline,
                    layout_clicker.chosen_line.heights,
                    return_mapping=True)
                page_img = crop_engine.blend_in(page_img, line_crop,
                                                line_mapping, offset)
                page_img_rendered = page_img.copy()

        elif key == ord('e') and len(layout_clicker.points) == 2:
            line_crop, line_mapping, offset = crop_engine.crop(
                page_img,
                layout_clicker.chosen_line.baseline,
                layout_clicker.chosen_line.heights,
                return_mapping=True)

            y1 = np.round(line_mapping[line_mapping.shape[0] // 2,
                                       layout_clicker.points[0][1] - offset[1],
                                       1]).astype(np.uint16)
            y2 = np.round(
                line_mapping[line_mapping.shape[0] // 2,
                             np.clip(layout_clicker.points[1][1] -
                                     offset[1], 0, line_mapping.shape[1] - 2),
                             1]).astype(np.uint16)
            if layout_clicker.points[1][1] - offset[1] > line_mapping.shape[
                    1] - 10:  # dirty fix noisy values at the end of coord map
                y2 = np.amax(line_mapping[:, :, 1].astype(np.uint16))
            print('{}/{}'.format(y2, line_crop.shape[1]))
            transcriptions, _, _ = ocr_engine.process_lines([
                line_crop[:, :np.minimum(y1, y2), :],
                line_crop[:, np.maximum(y1, y2):, :]
            ])
            line_crop[:, np.minimum(y1, y2):np.maximum(y1, y2), :] = 0
            text_input = TextInputInpaint(transcriptions[0], transcriptions[1])
            action, new_transcription = text_input.run()
            if action == 'inpaint':
                layout_clicker.chosen_line.transcription = new_transcription

                line_crop = repair_engine.inpaint_line(
                    line_crop, layout_clicker.chosen_line.transcription)
                page_img = crop_engine.blend_in(page_img, line_crop,
                                                line_mapping, offset)
                line_crop, line_mapping, offset = crop_engine.crop(
                    page_img,
                    layout_clicker.chosen_line.baseline,
                    layout_clicker.chosen_line.heights,
                    return_mapping=True)
                layout_clicker.points = []