Beispiel #1
0
def main():
    args = parse_arguments()

    SKIPPING = True  # skip to the first page not containing editted anotations

    assert args.page_dir is not None or args.output_dir is not None, "Specify input page folder and/or output page folder"
    assert os.path.exists(args.image_dir), "Can't find input image folder"
    if args.page_dir is not None:
        assert os.path.exists(args.page_dir), "Can't find input page folder"
    if args.output_dir is not None:
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)

    filename_list = [x for x in os.listdir(args.image_dir)]
    editor = PageEditor(downsample=2,
                        line_thickness=2,
                        show_hint=True,
                        cursor=0)

    while editor.cursor > -1 and editor.cursor < len(filename_list):
        print('cursor: ', editor.cursor)
        page_filename = os.path.splitext(
            filename_list[editor.cursor])[0] + '.xml'
        if not os.path.exists(os.path.join(args.page_dir, page_filename)):
            editor.cursor += 1
            print('Cannot find PAGE XML in expected location {}'.format(
                os.path.join(args.page_dir, page_filename)))
            continue
        cur_image = cv2.imread(
            os.path.join(args.image_dir, filename_list[editor.cursor]))

        if args.output_dir is not None and os.path.exists(
                os.path.join(args.output_dir, page_filename)):
            if SKIPPING:
                editor.cursor += 1
                continue
            cur_layout = layout.PageLayout(
                file=os.path.join(args.output_dir, page_filename))
        elif args.page_dir is not None and os.path.exists(
                os.path.join(args.page_dir, page_filename)):
            cur_layout = layout.PageLayout(
                file=os.path.join(args.page_dir, page_filename))
        else:
            cur_layout = layout.PageLayout(
                id=os.path.splitext(filename_list[editor.cursor])[0],
                page_size=(cur_image.shape[1], cur_image.shape[0]))

        edited_layout = editor.annotate(cur_image, cur_layout)
        if args.output_dir is not None:
            cur_layout.to_pagexml(os.path.join(args.output_dir, page_filename))
        else:
            cur_layout.to_pagexml(os.path.join(args.page_dir, page_filename))
Beispiel #2
0
def main():

    args = parseargs()
    if not os.path.isdir(args.output_path):
        os.makedirs(args.output_path)

    print('Loading engines...')
    enhancer = repair_engine.EngineRepairCNN(args.repair_json)

    if args.parse_config is not None:
        config = configparser.ConfigParser()
        config.read(args.parse_config)
        # convert relative paths to absolute
        for section, key in [['LINE_PARSER', 'MODEL_PATH'],
                             ['OCR', 'OCR_JSON']]:
            if not os.path.isabs(config[section][key]):
                config[section][key] = os.path.realpath(
                    os.path.join(os.path.dirname(args.parse_config),
                                 config[section][key]))
        parser = page_parser.PageParser(config)
    else:
        parser = None

    for filename in os.listdir(args.input_images):
        if os.path.splitext(filename)[1].lower() in ['.jpg', '.png', '.tif']:
            page_img = cv2.imread(os.path.join(args.input_images, filename))
            page_id, _ = os.path.splitext(filename)
            page_xml_file = os.path.join(args.input_page, page_id + '.xml')

            if os.path.exists(page_xml_file):
                page_layout = layout.PageLayout(file=page_xml_file)
            elif not os.path.exists(page_xml_file) and parser is not None:
                print(
                    'Page xml file for page {} not found, running automatic parser...'
                    .format(page_id))
                page_layout = layout.PageLayout(id=page_id,
                                                page_size=(page_img.shape[0],
                                                           page_img.shape[1]))
                page_layout = parser.process_page(page_img, page_layout)
                page_layout.to_pagexml(page_xml_file)
            else:
                raise Exception(
                    'Page xml file for page {} not found and automatic page parser config not specified.'
                    .format(page_id))

            page_img = enhancer.enhance_page(page_img, page_layout)

            cv2.imwrite(
                os.path.join(args.output_path,
                             '{}_enhanced.jpg'.format(page_id)), page_img)
Beispiel #3
0
def main():
    from pero_ocr.document_ocr import layout
    import matplotlib.pyplot as plt

    page_img = cv2.imread(
        '../../../example/82f4ac84-6f1e-43ba-b1d5-e2b28d69508d.jpg')
    page_layout = layout.PageLayout(
        file='../../../example/82f4ac84-6f1e-43ba-b1d5-e2b28d69508d.xml')

    cropper = EngineLineCropper(line_height=48, poly=2, scale=1)
    cropped_line, mapping = cropper.crop(
        page_img,
        page_layout.regions[2].lines[0].baseline,
        page_layout.regions[2].lines[0].heights,
        return_mapping=True)
    back_mapped = cropper.blend_in(page_img, cropped_line, mapping)

    plt.subplot(131)
    plt.imshow(cropped_line)
    plt.subplot(132)
    plt.imshow(back_mapped[410:420, 1200:1240])
    plt.subplot(133)
    plt.imshow(
        np.concatenate((mapping, mapping[:, :, :1]),
                       axis=2).astype(np.int)[410:420, 1200:1240])
    plt.show()
Beispiel #4
0
def main():
    args = parse_arguments()

    assert args.page_dir is not None or args.output_dir is not None, "Specify input page folder and/or output page folder"
    assert os.path.exists(args.image_dir), "Can't find input image folder"
    if args.page_dir is not None:
        assert os.path.exists(args.page_dir), "Can't find input page folder"
    if args.output_dir is not None:
        if not os.path.exists(args.output_dir):
            os.makedirs(args.output_dir)

    filename_list = [x for x in os.listdir(args.image_dir)]
    editor = PageEditor(downsample=2,
                        line_thickness=2,
                        show_hint=True,
                        cursor=0)

    while editor.cursor > -1 and editor.cursor < len(filename_list):

        page_filename = os.path.splitext(
            filename_list[editor.cursor])[0] + '.xml'
        cur_image = cv2.imread(
            os.path.join(args.image_dir, filename_list[editor.cursor]))

        if args.output_dir is not None and os.path.exists(
                os.path.join(args.output_dir, page_filename)):
            cur_layout = layout.PageLayout(
                file=os.path.join(args.output_dir, page_filename))
        elif args.page_dir is not None and os.path.exists(
                os.path.join(args.page_dir, page_filename)):
            cur_layout = layout.PageLayout(
                file=os.path.join(args.page_dir, page_filename))
        else:
            cur_layout = layout.PageLayout(
                id=os.path.splitext(filename_list[editor.cursor])[0],
                page_size=(cur_image.shape[1], cur_image.shape[0]))

        edited_layout = editor.annotate(cur_image, cur_layout)
        if args.output_dir is not None:
            cur_layout.to_pagexml(os.path.join(args.output_dir, page_filename))
        else:
            cur_layout.to_pagexml(os.path.join(args.page_dir, page_filename))
                    f'Error: Failed to detect lines in adapted resolution. Downsample was {self.downsample}',
                    file=sys.stderr)
            height = np.median([h[0] + h[1] for h in heights_list])
            print(
                f"OPTIMAL DOWNAMPLING {img.shape[0]//self.downsample}:{img.shape[1]//self.downsample}",
                self.downsample, height, height / self.downsample)

        self.downsample = temp_downsample

        return baselines_list, heights_list, textlines_list


if __name__ == '__main__':
    from pero_ocr.document_ocr import layout

    test_layout = layout.PageLayout(id='test')
    test_p = layout.RegionLayout('r', np.zeros((4, 2)))

    engine_instance = EngineLineDetectorCNN(
        '/mnt/matylda1/hradis/PERO/layout_engines/baselines_prod/parsenet_multi_alldata_fix_exported'
    )
    image = cv2.imread(
        '/mnt/matylda1/ikodym/junk/refactor_test/8e41ecc2-57ed-412a-aa4f-d945efa7c624.jpg'
    )
    baselines, heights, textlines = engine_instance.detect_lines(image)

    for baseline, height, textline in zip(baselines, heights, textlines):
        new_textline = layout.TextLine(baseline=baseline,
                                       heights=height,
                                       polygon=textline)
        test_p.lines.append(new_textline)
Beispiel #6
0
def main():

    args = parseargs()

    page_img = cv2.imread(args.input_img)
    page_layout = layout.PageLayout(file=args.input_page)

    page_img_orig = page_img.copy()
    page_img_rendered = page_img.copy()

    print('\nLoading engines...')
    ocr_engine = ocr.EngineLineOCR(args.ocr_json, gpu_id=0)
    repair_engine = repair.EngineRepairCNN(args.repair_json)
    crop_engine = cropper.EngineLineCropper(line_height=repair_engine.height,
                                            poly=2,
                                            scale=1)

    cv2.namedWindow("Page Editor", cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Page Editor', 1024, 1024)
    layout_clicker = LayoutClicker(page_layout)
    cv2.setMouseCallback("Page Editor", layout_clicker.callback)

    while True:
        page_img_rendered = page_img.copy()
        if layout_clicker.chosen_line:
            page_img_rendered = layout.draw_lines(
                page_img_rendered, [layout_clicker.chosen_line.polygon],
                color=(0, 255, 0),
                close=True)
        if layout_clicker.points:
            page_img_rendered = layout.draw_lines(page_img_rendered,
                                                  [layout_clicker.points],
                                                  color=(0, 0, 255))

        cv2.imshow('Page Editor', page_img_rendered)
        key = cv2.waitKey(1)

        if key == ord('q'):
            break

        elif key == ord('r'):
            text_input = TextInputRepair(
                layout_clicker.chosen_line.transcription)
            action, new_transcription = text_input.run()
            layout_clicker.chosen_line.transcription = new_transcription

            if action == 'repair':
                line_crop, line_mapping, offset = crop_engine.crop(
                    page_img,
                    layout_clicker.chosen_line.baseline,
                    layout_clicker.chosen_line.heights,
                    return_mapping=True)
                line_crop = repair_engine.repair_line(
                    line_crop, layout_clicker.chosen_line.transcription)
                page_img = crop_engine.blend_in(page_img, line_crop,
                                                line_mapping, offset)
                page_img_rendered = page_img.copy()

            elif action == 'revert':
                line_crop, line_mapping, offset = crop_engine.crop(
                    page_img_orig,
                    layout_clicker.chosen_line.baseline,
                    layout_clicker.chosen_line.heights,
                    return_mapping=True)
                page_img = crop_engine.blend_in(page_img, line_crop,
                                                line_mapping, offset)
                page_img_rendered = page_img.copy()

        elif key == ord('e') and len(layout_clicker.points) == 2:
            line_crop, line_mapping, offset = crop_engine.crop(
                page_img,
                layout_clicker.chosen_line.baseline,
                layout_clicker.chosen_line.heights,
                return_mapping=True)

            y1 = np.round(line_mapping[line_mapping.shape[0] // 2,
                                       layout_clicker.points[0][1] - offset[1],
                                       1]).astype(np.uint16)
            y2 = np.round(
                line_mapping[line_mapping.shape[0] // 2,
                             np.clip(layout_clicker.points[1][1] -
                                     offset[1], 0, line_mapping.shape[1] - 2),
                             1]).astype(np.uint16)
            if layout_clicker.points[1][1] - offset[1] > line_mapping.shape[
                    1] - 10:  # dirty fix noisy values at the end of coord map
                y2 = np.amax(line_mapping[:, :, 1].astype(np.uint16))
            print('{}/{}'.format(y2, line_crop.shape[1]))
            transcriptions, _, _ = ocr_engine.process_lines([
                line_crop[:, :np.minimum(y1, y2), :],
                line_crop[:, np.maximum(y1, y2):, :]
            ])
            line_crop[:, np.minimum(y1, y2):np.maximum(y1, y2), :] = 0
            text_input = TextInputInpaint(transcriptions[0], transcriptions[1])
            action, new_transcription = text_input.run()
            if action == 'inpaint':
                layout_clicker.chosen_line.transcription = new_transcription

                line_crop = repair_engine.inpaint_line(
                    line_crop, layout_clicker.chosen_line.transcription)
                page_img = crop_engine.blend_in(page_img, line_crop,
                                                line_mapping, offset)
                line_crop, line_mapping, offset = crop_engine.crop(
                    page_img,
                    layout_clicker.chosen_line.baseline,
                    layout_clicker.chosen_line.heights,
                    return_mapping=True)
                layout_clicker.points = []
Beispiel #7
0
def main():

    args = parseargs()

    page_img = cv2.imread(args.input_img)

    page_img_orig = page_img.copy()
    page_img_rendered = page_img.copy()

    print('\nLoading engines...')
    if args.parse_config is not None:
        config = configparser.ConfigParser()
        config.read(args.parse_config)
        # convert relative paths to absolute
        for section, key in [['LINE_PARSER', 'MODEL_PATH'],
                             ['OCR', 'OCR_JSON']]:
            if not os.path.isabs(config[section][key]):
                config[section][key] = os.path.realpath(
                    os.path.join(os.path.dirname(args.parse_config),
                                 config[section][key]))
        parser = page_parser.PageParser(config)
    else:
        parser = None

    enhancer = repair_engine.EngineRepairCNN(args.repair_json)

    print('Loading page layout...')
    if os.path.exists(args.input_page):
        page_layout = layout.PageLayout(file=args.input_page)
    elif not os.path.exists(args.input_page) and parser is not None:
        print('Page xml file not found, running automatic parser...')
        page_layout = layout.PageLayout(id='id_placeholder',
                                        page_size=(page_img_orig.shape[0],
                                                   page_img_orig.shape[1]))
        page_layout = parser.process_page(page_img_orig, page_layout)
        if not os.path.exists('./output_pages'):
            os.makedirs('./output_pages')
        file_name = os.path.splitext(os.path.split(args.input_img)[1])[0]
        page_layout.to_pagexml(
            os.path.join('./output_pages', '{}.xml'.format(file_name)))
    else:
        raise Exception(
            'Page xml file not found and automatic page parser config not specified.'
        )

    print('\n\n Welcome to the page enhancement interactive demo.')
    print(
        'After choosing a line by double-clicking, you can enhance it by pressing r key. After that, you can change individual parts of the text by selecting the area and pressing e key.'
    )

    cv2.namedWindow("Page Editor", cv2.WINDOW_NORMAL)
    cv2.resizeWindow('Page Editor', 1024, 1024)
    layout_clicker = LayoutClicker(page_layout)
    cv2.setMouseCallback("Page Editor", layout_clicker.callback)

    while True:
        page_img_rendered = page_img.copy()
        if layout_clicker.chosen_line:
            page_img_rendered = layout.draw_lines(
                page_img_rendered, [layout_clicker.chosen_line.polygon],
                color=(0, 255, 0),
                close=True)
        if layout_clicker.points:
            page_img_rendered = layout.draw_lines(page_img_rendered,
                                                  [layout_clicker.points],
                                                  color=(0, 0, 255))

        cv2.imshow('Page Editor', page_img_rendered)
        key = cv2.waitKey(1)

        if key == ord('q'):
            break

        elif key == ord('r'):
            text_input = TextInputRepair(
                layout_clicker.chosen_line.transcription)
            action, new_transcription = text_input.run()
            layout_clicker.chosen_line.transcription = new_transcription

            if action == 'repair':
                page_img = enhancer.enhance_line_in_page(
                    page_img, layout_clicker.chosen_line)
                page_img_rendered = page_img.copy()

            elif action == 'revert':
                line_crop, line_mapping, offset = enhancer.cropper.crop(
                    page_img_orig,
                    layout_clicker.chosen_line.baseline,
                    layout_clicker.chosen_line.heights,
                    return_mapping=True)
                page_img = enhancer.cropper.blend_in(page_img, line_crop,
                                                     line_mapping, offset)
                page_img_rendered = page_img.copy()

        elif key == ord('e') and len(layout_clicker.points) == 2:
            line_crop, line_mapping, offset = enhancer.cropper.crop(
                page_img,
                layout_clicker.chosen_line.baseline,
                layout_clicker.chosen_line.heights,
                return_mapping=True)

            y1 = np.round(line_mapping[line_mapping.shape[0] // 2,
                                       layout_clicker.points[0][0] - offset[1],
                                       0]).astype(np.uint16)
            y2 = np.round(
                line_mapping[line_mapping.shape[0] // 2,
                             np.clip(layout_clicker.points[1][0] -
                                     offset[1], 0, line_mapping.shape[1] - 2),
                             0]).astype(np.uint16)

            transcriptions, _ = parser.ocr.ocr_engine.process_lines([
                line_crop[:, :np.minimum(y1, y2), :],
                line_crop[:, np.maximum(y1, y2):, :]
            ])
            line_crop[:, np.minimum(y1, y2):np.maximum(y1, y2), :] = 0
            text_input = TextInputInpaint(transcriptions[0], transcriptions[1])
            action, new_transcription = text_input.run()
            if action == 'inpaint':
                layout_clicker.chosen_line.transcription = new_transcription

                line_crop = enhancer.inpaint_line(
                    line_crop, layout_clicker.chosen_line.transcription)
                page_img = enhancer.cropper.blend_in(page_img, line_crop,
                                                     line_mapping, offset)
                line_crop, line_mapping, offset = enhancer.cropper.crop(
                    page_img,
                    layout_clicker.chosen_line.baseline,
                    layout_clicker.chosen_line.heights,
                    return_mapping=True)
                layout_clicker.points = []