p, out, err = run_ocrmypdf( resources / 'blank.pdf', outpdf, env=spoof_no_tess_gs_render_fail) print(err) assert p.returncode == ExitCode.child_process_error def test_gs_raster_failure(spoof_no_tess_gs_raster_fail, resources, outpdf): p, out, err = run_ocrmypdf( resources / 'ccitt.pdf', outpdf, env=spoof_no_tess_gs_raster_fail) print(err) assert p.returncode == ExitCode.child_process_error @pytest.mark.skipif('8.0.0' <= qpdf.version() <= '8.0.1', reason="qpdf regression on pages with no contents") def test_no_contents(spoof_tesseract_noop, resources, outpdf): check_ocrmypdf(resources / 'no_contents.pdf', outpdf, '--force-ocr', env=spoof_tesseract_noop) @pytest.mark.parametrize('image', [ 'baiona.png', 'baiona_gray.png', 'baiona_alpha.png', 'congress.jpg' ]) def test_compression_preserved(spoof_tesseract_noop, ocrmypdf_exec, resources, image, outpdf): input_file = str(resources / image)
outpdf, env=spoof_no_tess_gs_render_fail) print(err) assert p.returncode == ExitCode.child_process_error def test_gs_raster_failure(spoof_no_tess_gs_raster_fail, resources, outpdf): p, out, err = run_ocrmypdf(resources / 'ccitt.pdf', outpdf, env=spoof_no_tess_gs_raster_fail) print(err) assert p.returncode == ExitCode.child_process_error @pytest.mark.skipif( '8.0.0' <= qpdf.version() <= '8.0.1', reason="qpdf regression on pages with no contents", ) def test_no_contents(spoof_tesseract_noop, resources, outpdf): check_ocrmypdf(resources / 'no_contents.pdf', outpdf, '--force-ocr', env=spoof_tesseract_noop) @pytest.mark.parametrize( 'image', ['baiona.png', 'baiona_gray.png', 'baiona_alpha.png', 'congress.jpg']) def test_compression_preserved(spoof_tesseract_noop, ocrmypdf_exec, resources, image, outpdf): input_file = str(resources / image)
# © 2017 James R. Barlow: github.com/jbarlow83 import logging import resource import pytest from ocrmypdf.exec import ghostscript, tesseract, qpdf from ocrmypdf.pdfinfo import PdfInfo @pytest.mark.skipif( qpdf.version() < '7.0.0', reason="negzero.pdf crashes earlier versions") def test_qpdf_negative_zero(resources, outpdf): negzero = resources / 'negzero.pdf' hugemono = resources / 'hugemono.pdf' # raises exception on err qpdf.merge([str(negzero), str(hugemono)], outpdf, log=logging.getLogger()) @pytest.mark.timeout(15) @pytest.mark.parametrize('max_files,skip', [ (2, 0), # Can we merge correctly without opening more than 2 files at once? (16, 0), # And does this work properly when we can one-shot it? (2, 1), # Or playing with even/odd (3, 0) # Or odd step size ]) def test_qpdf_merge_correctness(resources, outpdf, max_files, skip): # All of these must be only one page long inputs = [ '2400dpi.pdf', 'aspect.pdf', 'blank.pdf', 'ccitt.pdf',
# © 2017 James R. Barlow: github.com/jbarlow83 import logging import resource import pytest from ocrmypdf.exec import ghostscript, tesseract, qpdf from ocrmypdf.pdfinfo import PdfInfo @pytest.mark.skipif(qpdf.version() < '7.0.0', reason="negzero.pdf crashes earlier versions") def test_qpdf_negative_zero(resources, outpdf): negzero = resources / 'negzero.pdf' hugemono = resources / 'hugemono.pdf' # raises exception on err qpdf.merge([str(negzero), str(hugemono)], outpdf, log=logging.getLogger()) @pytest.mark.timeout(15) @pytest.mark.parametrize( 'max_files,skip', [ ( 2, 0 ), # Can we merge correctly without opening more than 2 files at once? (16, 0), # And does this work properly when we can one-shot it? (2, 1), # Or playing with even/odd (3, 0) # Or odd step size ]) def test_qpdf_merge_correctness(resources, outpdf, max_files, skip):
p, out, err = run_ocrmypdf( resources / 'blank.pdf', outpdf, env=spoof_no_tess_gs_render_fail) print(err) assert p.returncode == ExitCode.child_process_error def test_gs_raster_failure(spoof_no_tess_gs_raster_fail, resources, outpdf): p, out, err = run_ocrmypdf( resources / 'ccitt.pdf', outpdf, env=spoof_no_tess_gs_raster_fail) print(err) assert p.returncode == ExitCode.child_process_error @pytest.mark.skipif('8.0.0' <= qpdf.version() <= '8.0.1', reason="qpdf regression") def test_no_contents(spoof_tesseract_noop, resources, outpdf): check_ocrmypdf(resources / 'no_contents.pdf', outpdf, '--force-ocr', env=spoof_tesseract_noop) @pytest.mark.parametrize('image', [ 'baiona.png', 'baiona_gray.png', 'congress.jpg' ]) def test_compression_preserved(spoof_tesseract_noop, ocrmypdf_exec, resources, image, outpdf): from PIL import Image