def test_discover_task_metadata(monkeypatch, tmpdir): user_options = {"tessdata": "/some/path"} monkeypatch.setattr(os.path, "exists", lambda args: True) workflow = workflow_ocr.OCRWorkflow(global_settings=user_options) tessdata_dir = tmpdir / "tessdata" image_dir = tmpdir / "images" tessdata_dir.ensure_dir() user_options = { "tessdata": tessdata_dir.strpath, 'Image File Type': 'JPEG 2000', 'Language': 'English', 'Path': image_dir.strpath } initial_results = [ Result(source=workflow_ocr.FindImagesTask, data=[(image_dir / "dummy.jp2").strpath]) ] new_tasks = workflow.discover_task_metadata(initial_results, None, **user_options) assert len(new_tasks) == 1 new_task = new_tasks[0] assert new_task == { 'source_file_path': (image_dir / "dummy.jp2").strpath, 'destination_path': image_dir.strpath, 'output_file_name': 'dummy.txt', 'lang_code': 'eng', }
def test_discover_task_metadata_raises_with_no_tessdata(monkeypatch): user_options = {"tessdata": "/some/path"} monkeypatch.setattr(os.path, "exists", lambda args: True) workflow = workflow_ocr.OCRWorkflow(global_settings=user_options) monkeypatch.setattr(os.path, "exists", lambda args: False) with pytest.raises(SpeedwagonException): user_options = {"tessdata": None} workflow.discover_task_metadata([], None, **user_options)
def test_no_config(): with pytest.raises(MissingConfiguration): workflow_ocr.OCRWorkflow()
def workflow(self, monkeypatch): global_settings = {"tessdata": os.path.join("some", "path")} monkeypatch.setattr(workflow_ocr.os.path, "exists", lambda path: path == global_settings["tessdata"]) return \ workflow_ocr.OCRWorkflow(global_settings)