def generate_hocr(input_file, output_hocr, output_text, options): tesseract.generate_hocr( input_file=input_file, output_hocr=output_hocr, output_text=output_text, languages=options.languages, engine_mode=options.tesseract_oem, tessconfig=options.tesseract_config, timeout=options.tesseract_timeout, pagesegmode=options.tesseract_pagesegmode, user_words=options.user_words, user_patterns=options.user_patterns, )
def test_image_too_large_hocr(monkeypatch, resources, outdir): def dummy_run(args, *, env=None, **kwargs): raise subprocess.CalledProcessError(1, 'tesseract', output=b'Image too large') monkeypatch.setattr(tesseract, 'run', dummy_run) tesseract.generate_hocr( input_file=resources / 'crom.png', output_hocr=outdir / 'out.hocr', output_text=outdir / 'out.txt', languages=['eng'], engine_mode=None, tessconfig=[], timeout=180.0, pagesegmode=None, user_words=None, user_patterns=None, ) assert "name='ocr-capabilities'" in Path(outdir / 'out.hocr').read_text()