Python CalamariRecognizeの例

プログラミング言語: Python

名前空間/パッケージ名: ocrd_calamari

クラス/型: CalamariRecognize

hotexamples.comのコード掲載数: 5

Python CalamariRecognize - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのocrd_calamari.CalamariRecognizeの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

CalamariRecognize(5)

よく使われるメソッド

CalamariRecognize (5)

コード例 #1

ファイルを表示

ファイル: test_recognize.py プロジェクト: mikegerber/ocrd_calamari

def test_word_segmentation(workspace):
    CalamariRecognize(
        workspace,
        input_file_grp="OCR-D-GT-SEG-LINE",
        output_file_grp="OCR-D-OCR-CALAMARI",
        parameter={
            "checkpoint": CHECKPOINT,
            "textequiv_level": "word",   # Note that we're going down to word level here
        }
    ).process()
    workspace.save_mets()

    page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
    assert os.path.exists(page1)
    tree = etree.parse(page1)

    # The result should contain a TextLine that contains the text "December"
    line = tree.xpath(".//pc:TextLine[pc:TextEquiv/pc:Unicode[contains(text(),'December')]]", namespaces=NSMAP)[0]
    assert line

    # The textline should a. contain multiple words and b. these should concatenate fine to produce the same line text
    words = line.xpath(".//pc:Word", namespaces=NSMAP)
    assert len(words) >= 2
    words_text = " ".join(word.xpath("pc:TextEquiv/pc:Unicode", namespaces=NSMAP)[0].text for word in words)
    line_text = line.xpath("pc:TextEquiv/pc:Unicode", namespaces=NSMAP)[0].text
    assert words_text == line_text

    # For extra measure, check that we're not seeing any glyphs, as we asked for textequiv_level == "word"
    glyphs = tree.xpath("//pc:Glyph", namespaces=NSMAP)
    assert len(glyphs) == 0

コード例 #2

ファイルを表示

def test_recognize(workspace):
    CalamariRecognize(workspace,
                      input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
                      output_file_grp="OCR-D-OCR-CALAMARI",
                      parameter={
                          "checkpoint_dir": CHECKPOINT_DIR,
                      }).process()
    workspace.save_mets()

    page1 = os.path.join(workspace.directory,
                         "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
    assert os.path.exists(page1)
    assertFileContains(page1, "verſchuldeten")

コード例 #3

ファイルを表示

def test_recognize_should_warn_if_given_rgb_image_and_single_channel_model(
        workspace, caplog):
    caplog.set_level(logging.WARNING)
    CalamariRecognize(workspace,
                      input_file_grp="OCR-D-GT-SEG-WORD-GLYPH",
                      output_file_grp="OCR-D-OCR-CALAMARI-BROKEN",
                      parameter={
                          'checkpoint_dir': CHECKPOINT_DIR
                      }).process()

    interesting_log_messages = [
        t[2] for t in caplog.record_tuples if "Using raw image" in t[2]
    ]
    assert len(interesting_log_messages) > 10  # For every line!

コード例 #4

ファイルを表示

ファイル: test_recognize.py プロジェクト: mikegerber/ocrd_calamari

def test_recognize(workspace):
    CalamariRecognize(
        workspace,
        input_file_grp="OCR-D-GT-SEG-LINE",
        output_file_grp="OCR-D-OCR-CALAMARI",
        parameter={
            "checkpoint": CHECKPOINT,
        }
    ).process()
    workspace.save_mets()

    page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
    assert os.path.exists(page1)
    with open(page1, "r", encoding="utf-8") as f:
        assert "verſchuldeten" in f.read()

コード例 #5

ファイルを表示

ファイル: test_recognize.py プロジェクト: mikegerber/ocrd_calamari

def test_glyphs(workspace):
    CalamariRecognize(
        workspace,
        input_file_grp="OCR-D-GT-SEG-LINE",
        output_file_grp="OCR-D-OCR-CALAMARI",
        parameter={
            "checkpoint": CHECKPOINT,
            "textequiv_level": "glyph",   # Note that we're going down to glyph level here
        }
    ).process()
    workspace.save_mets()

    page1 = os.path.join(workspace.directory, "OCR-D-OCR-CALAMARI/OCR-D-OCR-CALAMARI_0001.xml")
    assert os.path.exists(page1)
    tree = etree.parse(page1)

    # The result should contain a lot of glyphs
    glyphs = tree.xpath("//pc:Glyph", namespaces=NSMAP)
    assert len(glyphs) >= 100