def test_tesseract_presence(self, capsys): p = P.PyTesseract({}) p.binary = "tesserac" # Misspell it and make sure we get an error with pytest.raises(SystemExit): p._is_version_uptodate() out, err = capsys.readouterr() assert p.msgs['TS_MISSING'] in out
def test_tesseract_version(self, capsys): p = P.PyTesseract({}) p.required = "100" with pytest.raises(SystemExit): p.make_hocr_from_pnms("") out, err = capsys.readouterr() assert p.msgs['TS_VERSION'] in out
def test_version_major_equal(self): with patch("subprocess.check_output") as mock_subprocess: p = P.PyTesseract({}) p.required = "3.02.02" mock_subprocess.return_value = """tesseract 3.02.02""" uptodate, ver = p._is_version_uptodate() assert (uptodate)
def test_tesseract_version_nt(self, monkeypatch): """ Stupid test because Windows Tesseract only returns 3.02 instead of 3.02.02 """ monkeypatch.setattr('os.name', 'nt') monkeypatch.setattr('subprocess.check_output', mock.Mock(return_value="tesseract 3.02")) pyts = pypdfocr_tesseract.PyTesseract({}) pyts.assert_version()
def test_tesseract_version_nt(self, mock_subprocess, mock_os_name): """ Stupid test because Windows Tesseract only returns 3.02 instead of 3.02.02 """ mock_os_name.__str__.return_value = 'nt' p = P.PyTesseract({}) p.required = "3.02.02" mock_subprocess.return_value = """tesseract 3.02""" uptodate, ver = p._is_version_uptodate() assert (uptodate)
def test_force_Nt(self, monkeypatch, tmpdir): monkeypatch.setattr('os.name', 'nt') monkeypatch.setattr('os.path.exists', mock.Mock(return_value=True)) pyts = pypdfocr_tesseract.PyTesseract({}) pyts._ts_version = "4.01" assert 'tesseract.exe' in pyts.binary # force a bad tesseract on windows pyts.binary = "blah" with pytest.raises(SystemExit): pyts.make_hocr_from_pnm(str(tmpdir.join('blah.tiff')))
def test_tesseract_old_output(self, version, ext, monkeypatch): """Test that correct extension is used based on tesseract version Old versions of tesseract (before 3.03) created .html files, whereas more recent versions create .hocr files. """ monkeypatch.setattr('os.path.exists', mock.Mock(return_value=True)) monkeypatch.setattr('os.path.isfile', mock.Mock(return_value=True)) monkeypatch.setattr('subprocess.call', mock.Mock(return_value=0)) pyts = pypdfocr_tesseract.PyTesseract({}) pyts._ts_version = version assert pyts.make_hocr_from_pnm('foo.tiff') == 'foo.{}'.format(ext)
def test_force_Nt(self, mock_os_path_exists, mock_os_name, mock_uptodate, capsys): mock_os_name.__str__.return_value = 'nt' p = P.PyTesseract({}) assert ('tesseract.exe' in p.binary) mock_os_path_exists.return_value = True mock_uptodate.return_value = (True, "") # force a bad tesseract on windows p.binary = "blah" print("here") with pytest.raises(SystemExit): p.make_hocr_from_pnm('blah.tiff')
def test_tesseract_fail(self, caplog, monkeypatch, tmpdir): """ Get all the checks passed and make sure we report the case where tesseract returns a non-zero status. """ monkeypatch.setattr('os.name', 'nt') monkeypatch.setattr('os.path.exists', mock.Mock(return_value=True)) monkeypatch.setattr('subprocess.check_output', mock.Mock(side_effect=subprocess.CalledProcessError(-1 , 'Boom'))) pyts = pypdfocr_tesseract.PyTesseract({}) pyts._ts_version = "4.01" assert 'tesseract.exe' in pyts.binary with pytest.raises(SystemExit): pyts.make_hocr_from_pnm(str(tmpdir.join('blah.tiff'))) assert pyts.msgs['TS_FAILED'] in caplog.text
def test_tesseract_fail(self, mock_os_path_exists, mock_os_name, mock_uptodate, mock_subprocess_call, capsys): """ Get all the checks past and make sure we report the case where tesseract returns a non-zero status """ mock_os_name.__str__.return_value = 'nt' p = P.PyTesseract({}) assert ('tesseract.exe' in p.binary) mock_os_path_exists.return_value = True mock_uptodate.return_value = (True, "") mock_subprocess_call.return_value = -1 with pytest.raises(SystemExit): p.make_hocr_from_pnm('blah.tiff') out, err = capsys.readouterr() assert p.msgs['TS_FAILED'] in out
def test_tiff_file_check(self, capsys): p = P.PyTesseract({}) with pytest.raises(SystemExit): p.make_hocr_from_pnm("DUMMY_NOTPRESENT.tiff") out, err = capsys.readouterr() assert p.msgs['TS_img_MISSING'] in out
def test_override_binary_nt(self, monkeypatch): monkeypatch.setattr("os.name", "nt") pyts = pypdfocr_tesseract.PyTesseract({'binary': '\\foo\\bar\\bin'}) assert pyts.binary == '"\\\\foo\\\\bar\\\\bin"'
def test_override_binary(self): pyts = pypdfocr_tesseract.PyTesseract({'binary': '/foo/bar/bin'}) assert '/foo/bar/bin' in pyts.binary
def pyts(self): return pypdfocr_tesseract.PyTesseract({})
def test_tesseract_4alpha(self, monkeypatch, tmpdir): monkeypatch.setattr('subprocess.check_output', mock.Mock(return_value="tesseract 4.00.00alpha")) pyts = pypdfocr_tesseract.PyTesseract({}) pyts.assert_version()