def test_version_error(self, popen): self.stdout.wait.return_value = 2 popen.return_value = self.stdout with self.assertRaises(tesseract.TesseractError) as te: tesseract.get_version() self.assertEqual(te.exception.status, 2) self.assertEqual(te.exception.message, self.message)
def test_version_error_nan(self, popen): message = self.message.replace("tesseract 4.0.0", "tesseract A.B.C") self.stdout.stdout.read.return_value = message.encode() popen.return_value = self.stdout with self.assertRaises(tesseract.TesseractError) as te: tesseract.get_version() self.assertEqual(te.exception.status, 0) self.assertIn("Unable to parse Tesseract version (not a number): ", te.exception.message)
def test_version_error_splitting(self, popen): tesseract.g_version = None # drop cached version message = self.message.replace(b"tesseract 4.0.0", b"tesseract 3") self.stdout.stdout.read.return_value = message popen.return_value = self.stdout with self.assertRaises(tesseract.TesseractError) as te: tesseract.get_version() self.assertEqual(te.exception.status, 0) self.assertIn("Unable to parse Tesseract version (spliting failed): ", te.exception.message)
def test_version_cache(self, popen): """ Make sure Tesseract is not called everytime we need the version. We need the version *often* in the code, and calling Tesseract everytime wouldn't be wise. """ tesseract.g_version = None # drop cached version self.stdout.stdout.read.return_value = self.message popen.return_value = self.stdout self.assertSequenceEqual(tesseract.get_version(), (4, 0, 0)) self.stdout.stdout.read.return_value = "garbage" popen.return_value = self.stdout self.assertSequenceEqual(tesseract.get_version(), (4, 0, 0))
class TestContext(unittest.TestCase): """ These tests make sure the requirements for the tests are met. """ def setUp(self): pass def test_available(self): self.assertTrue(tesseract.is_available(), "Tesseract not found. Is it installed ?") @unittest.skipIf(tesseract.get_version() != (3, 2, 1), "This test only works with Tesseract 3.02.1") def test_version(self): self.assertEqual(tesseract.get_version(), (3, 2, 1), ("Tesseract does not have the expected version" " (3.02.1) ! Tests will fail !")) def test_langs(self): langs = tesseract.get_available_languages() self.assertTrue("eng" in langs, ("English training does not appear to be installed." " (required for the tests)")) self.assertTrue("fra" in langs, ("French training does not appear to be installed." " (required for the tests)")) self.assertTrue("jpn" in langs, ("Japanese training does not appear to be installed." " (required for the tests)")) def tearDown(self): pass
def test_version_windows(self, popen): tesseract.g_version = None # drop cached version message = self.message.replace(b"tesseract 4.0.0", b"tesseract v4.0.0.20181030") self.stdout.stdout.read.return_value = message popen.return_value = self.stdout self.assertSequenceEqual(tesseract.get_version(), (4, 0, 0))
def test_version(self): self.assertTrue(tesseract.get_version() in ( (3, 2, 1), (3, 2, 2), (3, 3, 0), ), ("Tesseract does not have the expected version" " (3.3.0) ! Some tests will be skipped !"))
class TestTxt(unittest.TestCase): """ These tests make sure the "usual" OCR works fine. (the one generating a .txt file) """ def setUp(self): pass def __test_txt(self, image_file, expected_output_file, lang='eng'): image_file = "tests/data/" + image_file expected_output_file = "tests/tesseract/" + expected_output_file expected_output = "" with codecs.open(expected_output_file, 'r', encoding='utf-8') \ as file_descriptor: for line in file_descriptor: expected_output += line expected_output = expected_output.strip() output = tesseract.image_to_string(Image.open(image_file), lang=lang) self.assertEqual(output, expected_output) def test_basic(self): self.__test_txt('test.png', 'test.txt') @unittest.skipIf(tesseract.get_version() not in ( (3, 2, 1), (3, 2, 2), (3, 3, 0), ), "This test only works with Tesseract 3.02.1") def test_european(self): self.__test_txt('test-european.jpg', 'test-european.txt') @unittest.skipIf(tesseract.get_version() not in ( (3, 2, 1), (3, 2, 2), (3, 3, 0), ), "This test only works with Tesseract 3.02.1") def test_french(self): self.__test_txt('test-french.jpg', 'test-french.txt', 'fra') def test_japanese(self): self.__test_txt('test-japanese.jpg', 'test-japanese.txt', 'jpn') def tearDown(self): pass
def test_version(self): self.assertTrue(tesseract.get_version() in ( (3, 2, 1), (3, 2, 2), (3, 3, 0), (3, 4, 0), ), ("Tesseract does not have the expected version" " (3.4.0) ! Some tests will be skipped !"))
def test_version(self): self.assertTrue( tesseract.get_version() in ( (3, 2, 1), (3, 2, 2), (3, 3, 0), (3, 4, 0), (3, 4, 1), (3, 5, 0), ), ("Tesseract does not have the expected version"))
def test_version_tesseract4(self, popen): tesseract.g_version = None # drop cached version popen.return_value = self.stdout self.assertSequenceEqual(tesseract.get_version(), (4, 0, 0)) # stderr must be explicitely ignored when calling 'tesseract -v'. # See https://gitlab.gnome.org/World/OpenPaperwork/pyocr/-/issues/118 popen.assert_called_once() (args, kwargs) = popen.call_args self.assertNotIn('stderr', kwargs)
def test_version(self): self.assertTrue( tesseract.get_version() in ( (3, 2, 1), (3, 2, 2), (3, 3, 0), (3, 4, 0), (3, 4, 1), (3, 5, 0), ), ("Tesseract does not have the expected version") )
def test_version_windows(self, popen): message = self.message.replace("tesseract 4.0.0", "tesseract v4.0.0.20181030") self.stdout.stdout.read.return_value = message.encode() popen.return_value = self.stdout self.assertSequenceEqual(tesseract.get_version(), (4, 0, 0))
def test_version_tesseract3_no_minor(self, popen): message = self.message.replace("tesseract 4.0.0", "tesseract 3.0") self.stdout.stdout.read.return_value = message.encode() popen.return_value = self.stdout self.assertSequenceEqual(tesseract.get_version(), (3, 0, 0))
def test_version_tesseract4(self, popen): popen.return_value = self.stdout self.assertSequenceEqual(tesseract.get_version(), (4, 0, 0))
class TestCharBox(unittest.TestCase): """ These tests make sure that Tesseract box handling works fine. """ def setUp(self): self.builder = tesseract.CharBoxBuilder() def __test_txt(self, image_file, expected_box_file, lang='eng'): image_file = "tests/data/" + image_file expected_box_file = "tests/tesseract/" + expected_box_file with codecs.open(expected_box_file, 'r', encoding='utf-8') \ as file_descriptor: expected_boxes = self.builder.read_file(file_descriptor) expected_boxes.sort() boxes = tesseract.image_to_string(Image.open(image_file), lang=lang, builder=self.builder) boxes.sort() self.assertEqual(len(boxes), len(expected_boxes)) for i in range(0, min(len(boxes), len(expected_boxes))): self.assertEqual(boxes[i], expected_boxes[i]) def test_basic(self): self.__test_txt('test.png', 'test.box') def test_european(self): self.__test_txt('test-european.jpg', 'test-european.box') def test_french(self): self.__test_txt('test-french.jpg', 'test-french.box', 'fra') @unittest.skipIf(tesseract.get_version() not in ( (3, 2, 1), (3, 2, 2), (3, 3, 0), ), "This test requires Tesseract 3.02.1") def test_japanese(self): self.__test_txt('test-japanese.jpg', 'test-japanese.box', 'jpn') def test_write_read(self): original_boxes = tesseract.image_to_string( Image.open("tests/data/test.png"), builder=self.builder) self.assertTrue(len(original_boxes) > 0) (file_descriptor, tmp_path) = tempfile.mkstemp() try: # we must open the file with codecs.open() for utf-8 support os.close(file_descriptor) with codecs.open(tmp_path, 'w', encoding='utf-8') as fdescriptor: self.builder.write_file(fdescriptor, original_boxes) with codecs.open(tmp_path, 'r', encoding='utf-8') as fdescriptor: new_boxes = self.builder.read_file(fdescriptor) self.assertEqual(len(new_boxes), len(original_boxes)) for i in range(0, len(original_boxes)): self.assertEqual(new_boxes[i], original_boxes[i]) finally: os.remove(tmp_path) def tearDown(self): pass
def test_version_tesseract3_no_minor(self, popen): tesseract.g_version = None # drop cached version message = self.message.replace(b"tesseract 4.0.0", b"tesseract 3.0") self.stdout.stdout.read.return_value = message popen.return_value = self.stdout self.assertSequenceEqual(tesseract.get_version(), (3, 0, 0))
def test_version(self): self.assertEqual(tesseract.get_version(), (3, 2, 1), ("Tesseract does not have the expected version" " (3.02.1) ! Tests will fail !"))