def tess4_available(): """Check if a tesseract 4 binary is available, even if it's not the official "tesseract" on PATH """ old_environ = os.environ.copy() try: os.environ['OCRMYPDF_TESSERACT'] = tess4_possible_location() return tesseract.v4() and tesseract.has_textonly_pdf() finally: os.environ = old_environ
def tess4_available(): """Check if a tesseract 4 binary is available, even if it's not the official "tesseract" on PATH """ try: # ensure_tess4 locates the tess4 binary we are going to check env = ensure_tess4() with modified_os_environ(env): # Now jump into this environment and make sure it really is Tess4 return tesseract.v4() and tesseract.has_textonly_pdf() except EnvironmentError: pass return False
def ensure_tess4(): if tesseract.v4(): # Either "tesseract" on $PATH is already v4, or # OCRMYPDF_TESSERACT is tess4 already return os.environ.copy() if os.environ.get('OCRMYPDF_TESS4'): # OCRMYPDF_TESS4 is a hint environment variable that tells us to look # somewhere special for tess4 if and only if we need it. This allows # setting OCRMYPDF_TESS4 to test tess4 and PATH to point to tess3 # on a system with both installed. env = os.environ.copy() env['OCRMYPDF_TESSERACT'] = env['OCRMYPDF_TESS4'] return env raise EnvironmentError("Can't find Tesseract 4")
def _ensure_tess4(): if tesseract.v4(): # "tesseract" on $PATH is already v4 return os.environ.copy() if os.environ.get('OCRMYPDF_TESS4'): # OCRMYPDF_TESS4 is a hint environment variable that tells us to look # somewhere special for tess4 if and only if we need it. This allows # setting OCRMYPDF_TESS4 to test tess4 and PATH to point to tess3 # on a system with both installed. env = os.environ.copy() tess4 = Path(os.environ['OCRMYPDF_TESS4']) assert tess4.is_file() env['PATH'] = tess4.parent + ':' + env['PATH'] return env raise EnvironmentError("Can't find Tesseract 4")
def _ensure_tess4(): if tesseract.v4(): # "tesseract" on $PATH is already v4 return os.environ.copy() if os.environ.get('OCRMYPDF_TESS4'): # OCRMYPDF_TESS4 is a hint environment variable that tells us to look # somewhere special for tess4 if and only if we need it. This allows # setting OCRMYPDF_TESS4 to test tess4 and PATH to point to tess3 # on a system with both installed. env = os.environ.copy() tess4 = Path(os.environ['OCRMYPDF_TESS4']) assert tess4.is_file() env['PATH'] = tess4.parent + ':' + env['PATH'] env['OCRMYPDF_TESS4'] = os.environ['OCRMYPDF_TESS4'] return env raise EnvironmentError("Can't find Tesseract 4")
def test_tesseract_v4(): assert tesseract.v4()
def test_tesseract_config_invalid(renderer, resources, outdir): cfg_file = outdir / 'test.cfg' with cfg_file.open('w') as f: f.write('''\ THIS FILE IS INVALID ''') p, out, err = run_ocrmypdf( resources / 'ccitt.pdf', outdir / 'out.pdf', '--pdf-renderer', renderer, '--tesseract-config', cfg_file) assert "parameter not found" in err.lower(), "No error message" assert p.returncode == ExitCode.invalid_config @pytest.mark.skipif(tesseract.v4(), reason='arg has no effect in 4.0-beta1') def test_user_words(resources, outdir): word_list = outdir / 'wordlist.txt' sidecar_before = outdir / 'sidecar_before.txt' sidecar_after = outdir / 'sidecar_after.txt' # Don't know how to make this test pass on various versions and platforms # so weaken to merely testing that the argument is accepted consistent = False if consistent: check_ocrmypdf( resources / 'crom.png', outdir / 'out.pdf', '--image-dpi', 150, '--sidecar', sidecar_before )
#!/usr/bin/env python3 # © 2017 James R. Barlow: github.com/jbarlow83 import pytest from ocrmypdf.exceptions import ExitCode from ocrmypdf.exec import tesseract # Skip all tests in this file if not tesseract 3 pytestmark = pytest.mark.skipif(tesseract.v4(), reason="tesseract 3.x required") def test_textonly_pdf_on_tess3(resources, no_outpdf): p, _, _ = pytest.helpers.run_ocrmypdf(resources / 'linn.pdf', no_outpdf, '--pdf-renderer', 'tess4') assert p.returncode == ExitCode.missing_dependency def test_oem_on_tess3(resources, no_outpdf): p, _, err = pytest.helpers.run_ocrmypdf(resources / 'aspect.pdf', no_outpdf, '--tesseract-oem', '1') assert p.returncode == ExitCode.ok assert 'argument ignored' in err
# © 2017 James R. Barlow: github.com/jbarlow83 import pytest from ocrmypdf.exceptions import ExitCode from ocrmypdf.exec import tesseract # Skip all tests in this file if not tesseract 3 pytestmark = pytest.mark.skipif(tesseract.v4(), reason="tesseract 3.x required") @pytest.mark.skipif(tesseract.has_textonly_pdf(), reason="check that missing dep is reported on old tess3") def test_textonly_pdf_on_older_tess3(resources, no_outpdf): p, _, _ = pytest.helpers.run_ocrmypdf( resources / 'linn.pdf', no_outpdf, '--pdf-renderer', 'sandwich') assert p.returncode == ExitCode.missing_dependency @pytest.mark.skipif(not tesseract.has_textonly_pdf(), reason="check that feature is exercised on new test3") def test_textonly_pdf_on_newer_tess3(resources, no_outpdf): p, _, _ = pytest.helpers.run_ocrmypdf( resources / 'linn.pdf', no_outpdf, '--pdf-renderer', 'sandwich') assert p.returncode == ExitCode.ok