Exemple #1
0
 def test_tesseract_presence(self, capsys):
     p = P.PyTesseract({})
     p.binary = "tesserac"  # Misspell it and make sure we get an error
     with pytest.raises(SystemExit):
         p._is_version_uptodate()
     out, err = capsys.readouterr()
     assert p.msgs['TS_MISSING'] in out
Exemple #2
0
 def test_tesseract_version(self, capsys):
     p = P.PyTesseract({})
     p.required = "100"
     with pytest.raises(SystemExit):
         p.make_hocr_from_pnms("")
     out, err = capsys.readouterr()
     assert p.msgs['TS_VERSION'] in out
Exemple #3
0
 def test_version_major_equal(self):
     with patch("subprocess.check_output") as mock_subprocess:
         p = P.PyTesseract({})
         p.required = "3.02.02"
         mock_subprocess.return_value = """tesseract 3.02.02"""
         uptodate, ver = p._is_version_uptodate()
         assert (uptodate)
Exemple #4
0
 def test_tesseract_version_nt(self, monkeypatch):
     """
         Stupid test because Windows Tesseract only returns 3.02 instead
         of 3.02.02
     """
     monkeypatch.setattr('os.name', 'nt')
     monkeypatch.setattr('subprocess.check_output',
                         mock.Mock(return_value="tesseract 3.02"))
     pyts = pypdfocr_tesseract.PyTesseract({})
     pyts.assert_version()
Exemple #5
0
    def test_tesseract_version_nt(self, mock_subprocess, mock_os_name):
        """
            Stupid test because Windows Tesseract only returns 3.02 instead of 3.02.02
        """
        mock_os_name.__str__.return_value = 'nt'
        p = P.PyTesseract({})
        p.required = "3.02.02"

        mock_subprocess.return_value = """tesseract 3.02"""
        uptodate, ver = p._is_version_uptodate()
        assert (uptodate)
Exemple #6
0
 def test_force_Nt(self, monkeypatch, tmpdir):
     monkeypatch.setattr('os.name', 'nt')
     monkeypatch.setattr('os.path.exists', mock.Mock(return_value=True))
     pyts = pypdfocr_tesseract.PyTesseract({})
     pyts._ts_version = "4.01"
     assert 'tesseract.exe' in pyts.binary
     
     # force a bad tesseract on windows
     pyts.binary = "blah"
     with pytest.raises(SystemExit):
         pyts.make_hocr_from_pnm(str(tmpdir.join('blah.tiff')))
Exemple #7
0
    def test_tesseract_old_output(self, version, ext, monkeypatch):
        """Test that correct extension is used based on tesseract version

        Old versions of tesseract (before 3.03) created .html files, whereas
        more recent versions create .hocr files.
        """
        monkeypatch.setattr('os.path.exists', mock.Mock(return_value=True))
        monkeypatch.setattr('os.path.isfile', mock.Mock(return_value=True))
        monkeypatch.setattr('subprocess.call', mock.Mock(return_value=0))
        pyts = pypdfocr_tesseract.PyTesseract({})
        pyts._ts_version = version
        assert pyts.make_hocr_from_pnm('foo.tiff') == 'foo.{}'.format(ext)
Exemple #8
0
    def test_force_Nt(self, mock_os_path_exists, mock_os_name, mock_uptodate,
                      capsys):
        mock_os_name.__str__.return_value = 'nt'
        p = P.PyTesseract({})
        assert ('tesseract.exe' in p.binary)

        mock_os_path_exists.return_value = True
        mock_uptodate.return_value = (True, "")
        # force a bad tesseract on windows
        p.binary = "blah"
        print("here")
        with pytest.raises(SystemExit):
            p.make_hocr_from_pnm('blah.tiff')
Exemple #9
0
    def test_tesseract_fail(self, caplog, monkeypatch, tmpdir):
        """
            Get all the checks passed and make sure we report the case where
            tesseract returns a non-zero status.
        """
        monkeypatch.setattr('os.name', 'nt')
        monkeypatch.setattr('os.path.exists', mock.Mock(return_value=True))
        monkeypatch.setattr('subprocess.check_output', mock.Mock(side_effect=subprocess.CalledProcessError(-1 , 'Boom')))
        pyts = pypdfocr_tesseract.PyTesseract({})
        pyts._ts_version = "4.01"
        assert 'tesseract.exe' in pyts.binary

        with pytest.raises(SystemExit):
            pyts.make_hocr_from_pnm(str(tmpdir.join('blah.tiff')))
        assert pyts.msgs['TS_FAILED'] in caplog.text
Exemple #10
0
    def test_tesseract_fail(self, mock_os_path_exists, mock_os_name,
                            mock_uptodate, mock_subprocess_call, capsys):
        """
            Get all the checks past and make sure we report the case where tesseract returns a non-zero status
        """
        mock_os_name.__str__.return_value = 'nt'
        p = P.PyTesseract({})
        assert ('tesseract.exe' in p.binary)

        mock_os_path_exists.return_value = True
        mock_uptodate.return_value = (True, "")
        mock_subprocess_call.return_value = -1
        with pytest.raises(SystemExit):
            p.make_hocr_from_pnm('blah.tiff')

        out, err = capsys.readouterr()
        assert p.msgs['TS_FAILED'] in out
Exemple #11
0
 def test_tiff_file_check(self, capsys):
     p = P.PyTesseract({})
     with pytest.raises(SystemExit):
         p.make_hocr_from_pnm("DUMMY_NOTPRESENT.tiff")
     out, err = capsys.readouterr()
     assert p.msgs['TS_img_MISSING'] in out
Exemple #12
0
 def test_override_binary_nt(self, monkeypatch):
     monkeypatch.setattr("os.name", "nt")
     pyts = pypdfocr_tesseract.PyTesseract({'binary': '\\foo\\bar\\bin'})
     assert pyts.binary == '"\\\\foo\\\\bar\\\\bin"'
Exemple #13
0
 def test_override_binary(self):
     pyts = pypdfocr_tesseract.PyTesseract({'binary': '/foo/bar/bin'})
     assert '/foo/bar/bin' in pyts.binary
Exemple #14
0
 def pyts(self):
     return pypdfocr_tesseract.PyTesseract({})
Exemple #15
0
 def test_tesseract_4alpha(self, monkeypatch, tmpdir):
     monkeypatch.setattr('subprocess.check_output',
                         mock.Mock(return_value="tesseract 4.00.00alpha"))
     pyts = pypdfocr_tesseract.PyTesseract({})
     pyts.assert_version()