def test_old_tesseract_error(): with patch('ocrmypdf._exec.tesseract.version', return_value='4.00.00alpha'): with pytest.raises(MissingDependencyError): opts = make_opts(pdf_renderer='sandwich', language='eng') plugin_manager = get_plugin_manager(opts.plugins) vd._check_options(opts, plugin_manager, {'eng'})
def test_hocr_notlatin_warning(caplog): # Bypass the test to see if the language is installed; we just want to pretend # that a non-Latin language is installed vd._check_options( *make_opts_pm(language='chi_sim', pdf_renderer='hocr', output_type='pdfa'), {'chi_sim'}, ) assert 'PDF renderer is known to cause' in caplog.text
def test_no_progress_bar(progress_bar, resources): opts = make_opts(progress_bar=progress_bar, input_file=(resources / 'trivial.pdf')) plugin_manager = get_plugin_manager(opts.plugins) with patch('ocrmypdf._concurrent.tqdm', autospec=True) as tqdmpatch: vd._check_options(opts, plugin_manager, set()) pdfinfo = PdfInfo(opts.input_file, progbar=opts.progress_bar) assert pdfinfo is not None assert tqdmpatch.called _args, kwargs = tqdmpatch.call_args assert kwargs['disable'] != progress_bar
def test_user_words(caplog): with patch('ocrmypdf._exec.tesseract.has_user_words', return_value=False): opts = make_opts(user_words='foo') plugin_manager = get_plugin_manager(opts.plugins) vd._check_options(opts, plugin_manager, set()) assert '4.0 ignores --user-words' in caplog.text caplog.clear() with patch('ocrmypdf._exec.tesseract.has_user_words', return_value=True): opts = make_opts(user_patterns='foo') plugin_manager = get_plugin_manager(opts.plugins) vd._check_options(opts, plugin_manager, set()) assert '4.0 ignores --user-words' not in caplog.text
def test_no_progress_bar(progress_bar, resources): opts = make_opts(progress_bar=progress_bar, input_file=(resources / 'trivial.pdf')) plugin_manager = get_plugin_manager(opts.plugins) vd._check_options(opts, plugin_manager, set()) pbar_disabled = None class CheckProgressBar(NullProgressBar): def __init__(self, disable, **kwargs): nonlocal pbar_disabled pbar_disabled = disable super().__init__(disable=disable, **kwargs) executor = SerialExecutor(pbar_class=CheckProgressBar) pdfinfo = PdfInfo(opts.input_file, progbar=opts.progress_bar, executor=executor) assert pdfinfo is not None assert pbar_disabled is not None and pbar_disabled != progress_bar
def test_old_ghostscript(caplog): with patch('ocrmypdf._exec.ghostscript.version', return_value='9.19'): vd._check_options( *make_opts_pm(language='chi_sim', output_type='pdfa'), {'chi_sim'}) assert 'does not work correctly' in caplog.text with patch('ocrmypdf._exec.ghostscript.version', return_value='9.18'): with pytest.raises(MissingDependencyError): vd._check_options(*make_opts_pm(output_type='pdfa-3'), set()) with patch('ocrmypdf._exec.ghostscript.version', return_value='9.24'): with pytest.raises(MissingDependencyError): vd._check_options(*make_opts_pm(), set())
def test_two_languages(): vd._check_options( *make_opts_pm(language='fakelang1+fakelang2'), {'fakelang1', 'fakelang2'} )
def test_pagesegmode_warning(caplog): opts = make_opts(tesseract_pagesegmode='0') plugin_manager = get_plugin_manager(opts.plugins) vd._check_options(opts, plugin_manager, set()) assert 'disable OCR' in caplog.text
def test_two_languages(): with patch('ocrmypdf._exec.tesseract.has_textonly_pdf', return_value=True): vd._check_options(*make_opts_pm(language='fakelang1+fakelang2'), {'fakelang1', 'fakelang2'})