def __test_txt(self, image_file, expected_box_file, lang='eng'): image_file = "tests/data/" + image_file expected_box_file = "tests/cuneiform/" + expected_box_file with codecs.open(expected_box_file, 'r', encoding='utf-8') \ as file_descriptor: expected_boxes = self.builder.read_file(file_descriptor) expected_boxes.sort() boxes = cuneiform.image_to_string(Image.open(image_file), lang=lang, builder=self.builder) boxes.sort() self.assertEqual(len(boxes), len(expected_boxes)) for i in range(0, min(len(boxes), len(expected_boxes))): try: # Python 2.7 self.assertEqual(type(expected_boxes[i].content), unicode) self.assertEqual(type(boxes[i].content), unicode) except NameError: # Python 3.x self.assertEqual(type(expected_boxes[i].content), str) self.assertEqual(type(boxes[i].content), str) self.assertEqual(boxes[i], expected_boxes[i])
def __test_txt(self, image_file, expected_output_file, lang='eng'): image_file = "tests/data/" + image_file expected_output_file = "tests/cuneiform/" + expected_output_file expected_output = "" with codecs.open(expected_output_file, 'r', encoding='utf-8') \ as file_descriptor: for line in file_descriptor: expected_output += line expected_output = expected_output.strip() output = cuneiform.image_to_string(Image.open(image_file), lang=lang) self.assertEqual(output, expected_output)
def test_write_read(self): original_boxes = cuneiform.image_to_string( Image.open("tests/data/test.png"), builder=self.builder) self.assertTrue(len(original_boxes) > 0) (file_descriptor, tmp_path) = tempfile.mkstemp() try: # we must open the file with codecs.open() for utf-8 support os.close(file_descriptor) with codecs.open(tmp_path, 'w', encoding='utf-8') as file_descriptor: self.builder.write_file(file_descriptor, original_boxes) with codecs.open(tmp_path, 'r', encoding='utf-8') as file_descriptor: new_boxes = self.builder.read_file(file_descriptor) self.assertEqual(len(new_boxes), len(original_boxes)) for i in range(0, len(original_boxes)): self.assertEqual(new_boxes[i], original_boxes[i]) finally: os.remove(tmp_path)