Ejemplo n.º 1
0
def test_generate_task_creates_a_file(monkeypatch, tmpdir):
    source_image = tmpdir / "dummy.jp2"
    out_text = tmpdir / "dummy.txt"
    tessdata_dir = tmpdir / "tessdata"
    tessdata_dir.ensure_dir()
    (tessdata_dir / "eng.traineddata").ensure()
    (tessdata_dir / "osd.traineddata").ensure()

    def mock_read(*args, **kwargs):
        return "Spam bacon eggs"

    mock_reader = Mock()

    with monkeypatch.context() as patcher:
        patcher.setattr(reader.Reader, "read", mock_read)
        patcher.setattr(tesseractwrap, "Reader", mock_reader)
        task = workflow_ocr.GenerateOCRFileTask(
            source_image=source_image.strpath,
            out_text_file=out_text.strpath,
            tesseract_path=tessdata_dir.strpath)
        task.log = MagicMock()

        task.work()

    assert os.path.exists(out_text.strpath)
    with open(out_text.strpath, "r") as f:
        assert f.read() == "Spam bacon eggs"
Ejemplo n.º 2
0
 def test_work(self, monkeypatch):
     source_image = os.path.join("12345", "sample.jp2")
     out_text_file = os.path.join("12345", "sample.txt")
     lang = "eng"
     tesseract_path = "tesspath"
     workflow_ocr.GenerateOCRFileTask.set_tess_path = Mock()
     workflow_ocr.GenerateOCRFileTask.engine = Mock()
     task = workflow_ocr.GenerateOCRFileTask(source_image=source_image,
                                             out_text_file=out_text_file,
                                             lang=lang,
                                             tesseract_path=tesseract_path)
     m = mock_open()
     with patch('speedwagon.workflows.workflow_ocr.open', m):
         assert task.work() is True
     assert m.called is True
Ejemplo n.º 3
0
    def test_read_image(self, monkeypatch):
        source_image = os.path.join("12345", "sample.jp2")
        out_text_file = os.path.join("12345", "sample.txt")
        lang = "eng"
        tesseract_path = "tesspath"
        workflow_ocr.GenerateOCRFileTask.set_tess_path = Mock()
        workflow_ocr.GenerateOCRFileTask.engine = Mock()

        reader = Mock()
        workflow_ocr.GenerateOCRFileTask.engine.get_reader = \
            lambda args: reader

        task = workflow_ocr.GenerateOCRFileTask(source_image=source_image,
                                                out_text_file=out_text_file,
                                                lang=lang,
                                                tesseract_path=tesseract_path)
        task.read_image(source_image, "eng")
        assert reader.read.called is True
Ejemplo n.º 4
0
    def test_read_image(self, monkeypatch):
        source_image = os.path.join("12345", "sample.jp2")
        out_text_file = os.path.join("12345", "sample.txt")
        lang = "eng"
        tesseract_path = "tesspath"
        workflow_ocr.GenerateOCRFileTask.set_tess_path = Mock()
        workflow_ocr.GenerateOCRFileTask.engine = Mock()

        reader = Mock()
        workflow_ocr.GenerateOCRFileTask.engine.get_reader = \
            lambda args: reader

        task = workflow_ocr.GenerateOCRFileTask(source_image=source_image,
                                                out_text_file=out_text_file,
                                                lang=lang,
                                                tesseract_path=tesseract_path)
        task.read_image(source_image, "eng")
        assert reader.read.called is True


@pytest.mark.parametrize("task", [
    workflow_ocr.GenerateOCRFileTask(source_image="source_image",
                                     out_text_file="out_text_file",
                                     lang="lang",
                                     tesseract_path="tesseract_path"),
    workflow_ocr.FindImagesTask(root="root", file_extension=".tif")
])
def test_tasks_have_description(task):
    assert task.task_description() is not None