Example #1
0
 def generate_hocr(input_file, output_hocr, output_text, options):
     tesseract.generate_hocr(
         input_file=input_file,
         output_hocr=output_hocr,
         output_text=output_text,
         languages=options.languages,
         engine_mode=options.tesseract_oem,
         tessconfig=options.tesseract_config,
         timeout=options.tesseract_timeout,
         pagesegmode=options.tesseract_pagesegmode,
         user_words=options.user_words,
         user_patterns=options.user_patterns,
     )
Example #2
0
def test_image_too_large_hocr(monkeypatch, resources, outdir):
    def dummy_run(args, *, env=None, **kwargs):
        raise subprocess.CalledProcessError(1, 'tesseract', output=b'Image too large')

    monkeypatch.setattr(tesseract, 'run', dummy_run)
    tesseract.generate_hocr(
        input_file=resources / 'crom.png',
        output_hocr=outdir / 'out.hocr',
        output_text=outdir / 'out.txt',
        languages=['eng'],
        engine_mode=None,
        tessconfig=[],
        timeout=180.0,
        pagesegmode=None,
        user_words=None,
        user_patterns=None,
    )
    assert "name='ocr-capabilities'" in Path(outdir / 'out.hocr').read_text()