def check_options(options): gs_version = ghostscript.version() check_external_program( program='gs', package='ghostscript', version_checker=gs_version, need_version='9.15', # limited by Travis CI / Ubuntu 14.04 backports ) if gs_version in ('9.24', '9.51'): raise MissingDependencyError( f"Ghostscript {gs_version} contains serious regressions and is not " "supported. Please upgrade to a newer version, or downgrade to the " "previous version.") # We have these constraints to check for. # 1. Ghostscript < 9.20 mangles multibyte Unicode # 2. hocr doesn't work on non-Latin languages (so don't select it) is_latin = options.languages.issubset(HOCR_OK_LANGS) if gs_version < '9.20' and options.output_type != 'pdf' and not is_latin: # https://bugs.ghostscript.com/show_bug.cgi?id=696874 # Ghostscript < 9.20 fails to encode multibyte characters properly log.warning( f"The installed version of Ghostscript ({gs_version}) does not work " "correctly with the OCR languages you specified. Use --output-type pdf or " "upgrade to Ghostscript 9.20 or later to avoid this issue.") if options.output_type == 'pdfa': options.output_type = 'pdfa-2' if options.output_type == 'pdfa-3' and ghostscript.version() < '9.19': raise MissingDependencyError( "--output-type pdfa-3 requires Ghostscript 9.19 or later")
def test_pdfa_n(pdfa_level, resources, outpdf): if pdfa_level == '3' and ghostscript.version() < '9.19': pytest.xfail(reason='Ghostscript >= 9.19 required') check_ocrmypdf( resources / 'ccitt.pdf', outpdf, '--output-type', 'pdfa-' + pdfa_level, '--plugin', 'tests/plugins/tesseract_cache.py', ) pdfa_info = file_claims_pdfa(outpdf) assert pdfa_info['conformance'] == f'PDF/A-{pdfa_level}B'