Ejemplo n.º 1
0
    def test_validate_sequence(self):
        resolver = Resolver()
        with TemporaryDirectory() as tempdir:
            workspace = resolver.workspace_from_url(
                assets.path_to('kant_aufklaerung_1784/data/mets.xml'),
                dst_dir=tempdir)
            params_path = Path(tempdir, 'params.json')
            params_path.write_text('{"param1": true}')

            with self.assertRaisesRegex(
                    Exception,
                    "Input file group not contained in METS or produced by previous steps: FOO'"
            ):
                validate_tasks([
                    ProcessorTask.parse(x) for x in [
                        '%s -I OCR-D-IMG -O OUT1 -p %s' %
                        (SAMPLE_NAME_REQUIRED_PARAM, params_path),
                        '%s -I FOO -O OUT2 -p %s' %
                        (SAMPLE_NAME_REQUIRED_PARAM, params_path)
                    ]
                ], workspace)

            with self.assertRaisesRegex(
                    Exception, "Input fileGrp.@USE='IN'. not in METS!"):
                validate_tasks([
                    ProcessorTask.parse(x) for x in [
                        '%s -I IN -O OUT1 -p %s' %
                        (SAMPLE_NAME_REQUIRED_PARAM, params_path),
                    ]
                ], workspace)
Ejemplo n.º 2
0
 def test_overwrite(self):
     resolver = Resolver()
     with TemporaryDirectory() as tempdir:
         workspace = resolver.workspace_from_url(
             assets.path_to('kant_aufklaerung_1784/data/mets.xml'),
             dst_dir=tempdir)
         # should fail at step 3
         workspace.mets.add_file('OCR-D-SEG-WORD',
                                 url='foo/bar',
                                 ID='foo',
                                 pageId='page1',
                                 mimetype='image/tif')
         with self.assertRaisesRegex(
                 Exception,
                 r"Invalid task sequence input/output file groups: \[\"Output fileGrp\[@USE='OCR-D-SEG-WORD'\] already in METS!\"\]"
         ):
             validate_tasks([
                 ProcessorTask.parse(x) for x in [
                     "sample-processor -I OCR-D-IMG       -O OCR-D-SEG-BLOCK",
                     "sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
                     "sample-processor -I OCR-D-SEG-LINE  -O OCR-D-SEG-WORD",
                     "sample-processor -I OCR-D-SEG-WORD  -O OCR-D-OCR-TESS",
                 ]
             ], workspace)
         # should succeed b/c overwrite
         validate_tasks([
             ProcessorTask.parse(x) for x in [
                 "sample-processor -I OCR-D-IMG       -O OCR-D-SEG-BLOCK",
                 "sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
                 "sample-processor -I OCR-D-SEG-LINE  -O OCR-D-SEG-WORD",
                 "sample-processor -I OCR-D-SEG-WORD  -O OCR-D-OCR-TESS",
             ]
         ],
                        workspace,
                        overwrite=True)
Ejemplo n.º 3
0
 def test_422(self):
     """
     # OCR-D/core#422
     """
     resolver = Resolver()
     with TemporaryDirectory() as tempdir:
         workspace = resolver.workspace_from_url(
             assets.path_to('kant_aufklaerung_1784/data/mets.xml'),
             dst_dir=tempdir)
         validate_tasks([
             ProcessorTask.parse(x) for x in [
                 "sample-processor -I OCR-D-IMG       -O OCR-D-SEG-BLOCK",
                 "sample-processor -I OCR-D-SEG-BLOCK -O OCR-D-SEG-LINE",
                 "sample-processor -I OCR-D-SEG-LINE  -O OCR-D-SEG-WORD",
                 "sample-processor -I OCR-D-SEG-WORD  -O OCR-D-OCR-TESS",
             ]
         ], workspace)
Ejemplo n.º 4
0
def validate_process(tasks, workspace):
    '''
    Validate a sequence of tasks passable to 'ocrd process'
    '''
    if workspace:
        _inform_of_result(
            validate_tasks([ProcessorTask.parse(t) for t in tasks],
                           Workspace(Resolver(), directory=workspace)))
    else:
        for t in [ProcessorTask.parse(t) for t in tasks]:
            _inform_of_result(t.validate())