def runTest(self):
     resolver = Resolver()
     workspace = resolver.workspace_from_url(METS_HEROLD_SMALL,
                                             directory=WORKSPACE_DIR)
     TesserocrSegmentRegion(workspace,
                            input_file_grp="INPUT",
                            output_file_grp="OCR-D-SEG-BLOCK").process()
     workspace.save_mets()
Beispiel #2
0
 def runTest(self):
     resolver = Resolver()
     workspace = resolver.workspace_from_url(METS_HEROLD_SMALL,
                                             dst_dir=WORKSPACE_DIR)
     TesserocrSegmentRegion(workspace,
                            input_file_grp="OCR-D-IMG",
                            output_file_grp="OCR-D-SEG-BLOCK").process()
     TesserocrSegmentLine(workspace,
                          input_file_grp="OCR-D-SEG-BLOCK",
                          output_file_grp="OCR-D-SEG-LINE").process()
     TesserocrSegmentWord(workspace,
                          input_file_grp="OCR-D-SEG-LINE",
                          output_file_grp="OCR-D-SEG-WORD").process()
     workspace.save_mets()
 def runTest(self):
     resolver = Resolver()
     #  workspace = resolver.workspace_from_url(assets.url_of('SBB0000F29300010000/mets_one_file.xml'), directory=WORKSPACE_DIR)
     workspace = resolver.workspace_from_url(
         assets.url_of('kant_aufklaerung_1784-binarized/mets.xml'),
         directory=WORKSPACE_DIR)
     TesserocrSegmentRegion(workspace,
                            input_file_grp="OCR-D-IMG",
                            output_file_grp="OCR-D-SEG-BLOCK").process()
     TesserocrSegmentLine(workspace,
                          input_file_grp="OCR-D-SEG-BLOCK",
                          output_file_grp="OCR-D-SEG-LINE").process()
     TesserocrSegmentWord(workspace,
                          input_file_grp="OCR-D-SEG-LINE",
                          output_file_grp="OCR-D-SEG-WORD").process()
     workspace.save_mets()
Beispiel #4
0
 def runTest(self):
     resolver = Resolver(cache_enabled=True)
     #  workspace = resolver.workspace_from_url(assets.url_of('SBB0000F29300010000/mets_one_file.xml'), directory=WORKSPACE_DIR)
     workspace = resolver.workspace_from_url(assets.url_of(
         'kant_aufklaerung_1784-page-block-line-word/mets.xml'),
                                             directory=WORKSPACE_DIR)
     TesserocrSegmentRegion(workspace,
                            input_file_grp="OCR-D-IMG",
                            output_file_grp="OCR-D-SEG-BLOCK").process()
     workspace.save_mets()
     TesserocrSegmentLine(workspace,
                          input_file_grp="OCR-D-SEG-BLOCK",
                          output_file_grp="OCR-D-SEG-LINE").process()
     workspace.save_mets()
     TesserocrRecognize(workspace,
                        input_file_grp="OCR-D-SEG-LINE",
                        output_file_grp="OCR-D-OCR-TESS",
                        parameter={
                            'textequiv_level': 'word'
                        }).process()
     workspace.save_mets()
Beispiel #5
0
    def runTest(self):
        resolver = Resolver()
        workspace = resolver.workspace_from_url(METS_HEROLD_SMALL,
                                                dst_dir=WORKSPACE_DIR)
        TesserocrSegmentRegion(workspace,
                               input_file_grp="OCR-D-IMG",
                               output_file_grp="OCR-D-SEG-BLOCK").process()
        workspace.save_mets()

        TesserocrSegmentLine(workspace,
                             input_file_grp="OCR-D-SEG-BLOCK",
                             output_file_grp="OCR-D-SEG-LINE").process()
        workspace.save_mets()

        TesserocrRecognize(
            workspace,
            input_file_grp="OCR-D-SEG-LINE",
            output_file_grp="OCR-D-OCR-TESS",
            parameter={
                'textequiv_level': 'line'
            }  # add dep tesseract-ocr-script-frak: , 'model': 'Fraktur'
        ).process()
        workspace.save_mets()

        TesserocrSegmentWord(workspace,
                             input_file_grp="OCR-D-SEG-LINE",
                             output_file_grp="OCR-D-SEG-WORD").process()
        workspace.save_mets()

        TesserocrRecognize(
            workspace,
            input_file_grp="OCR-D-SEG-WORD",
            output_file_grp="OCR-D-OCR-TESS-W2C",
            parameter={
                'textequiv_level': 'glyph'
            }  # add dep tesseract-ocr-script-frak: , 'model': 'Fraktur'}
        ).process()
        workspace.save_mets()
Beispiel #6
0
 def runTest(self):
     resolver = Resolver()
     #  workspace = resolver.workspace_from_url(assets.url_of('SBB0000F29300010000/mets_one_file.xml'), directory=WORKSPACE_DIR)
     workspace = resolver.workspace_from_url(assets.url_of(
         'kant_aufklaerung_1784-page-block-line-word/mets.xml'),
                                             directory=WORKSPACE_DIR)
     TesserocrSegmentRegion(workspace,
                            input_file_grp="OCR-D-IMG",
                            output_file_grp="OCR-D-SEG-BLOCK").process()
     workspace.save_mets()
     TesserocrSegmentLine(workspace,
                          input_file_grp="OCR-D-SEG-BLOCK",
                          output_file_grp="OCR-D-SEG-LINE").process()
     workspace.save_mets()
     TesserocrRecognize(
         workspace,
         input_file_grp="OCR-D-SEG-LINE",
         output_file_grp="OCR-D-OCR-TESS",
         parameter={
             'textequiv_level': 'line'
         }  # add dep tesseract-ocr-script-frak: , 'model': 'Fraktur'
     ).process()
     workspace.save_mets()
     TesserocrSegmentWord(workspace,
                          input_file_grp="OCR-D-SEG-LINE",
                          output_file_grp="OCR-D-SEG-WORD").process()
     workspace.save_mets()
     TesserocrRecognize(
         workspace,
         input_file_grp="OCR-D-SEG-WORD",
         output_file_grp="OCR-D-OCR-TESS-W2C",
         parameter={
             'textequiv_level': 'glyph'
         }  # add dep tesseract-ocr-script-frak: , 'model': 'Fraktur'}
     ).process()
     workspace.save_mets()
Beispiel #7
0
 def runTest(self):
     resolver = Resolver(cache_enabled=True)
     workspace = resolver.workspace_from_url(
         assets.url_of('SBB0000F29300010000/mets_one_file.xml'))
     TesserocrSegmentRegion(workspace).process()
     workspace.save_mets()