def test_get_output(self): boxes = [ builders.Box("word1", ((10, 11), (12, 13)), 95), builders.Box("word2", ((11, 12), (13, 14))), builders.Box("word3", ((12, 13), (14, 15))), builders.Box("word4", ((13, 14), (15, 16)), 87), ] for box in boxes: self.builder.add_word(box.content, box.position, box.confidence) output = self.builder.get_output() for box, box_expected in zip(output, boxes): self.assertIsInstance(box, builders.Box) self.assertEqual(box, box_expected) self.assertEqual(box.content, box_expected.content)
def setUp(self): box1 = builders.Box("word1", ((15, 22), (23, 30))) box2 = builders.Box("word2", ((25, 23), (30, 32))) box3 = builders.Box("word3", ((32, 25), (40, 32)), 95) box4 = builders.Box("word4", ((41, 18), (44, 33)), 98) box_unicode = builders.Box("\xe9", ((1, 2), (3, 4)), 98) self.line1 = builders.LineBox([box1, box2, box3, box4], ((14, 15), (45, 33))) self.line1_bis = builders.LineBox([box1, box2], ((14, 15), (45, 33))) self.line2 = builders.LineBox([box3, box4], ((30, 5), (53, 20))) self.line1_dupl = builders.LineBox([box1, box2, box3, box4], ((14, 15), (45, 33))) self.line_unicode = builders.LineBox([box1, box_unicode], ((1, 2), (3, 4)))
def test_write_file(self): output_fh = StringIO() lines = [] for l in range(4): boxes = [] for b in range(4): word = "word" + str(4 * l + b) position = ((4 * l + b, 4 * l + b + 1), (4 * l + b + 2, 4 * l + b + 3)) boxes.append(builders.Box(word, position, randint(0, 100))) line_position = ((4 * l, 4 * (l + 1)), (4 * l + 2, 4 * (l + 1) + 2)) lines.append(builders.LineBox(boxes, line_position)) self.builder.write_file(output_fh, lines) output_fh.seek(0) output = output_fh.read() for line in lines: for box in line.word_boxes: self.assertIn(box.content, output) self.assertIn( "{} {} {} {}".format( box.position[0][0], box.position[0][1], box.position[1][0], box.position[1][1], ), output) self.assertIn(str(box.confidence), output)
def test_add_word(self): box = builders.Box("word", ((10, 11), (12, 13))) self.builder.start_line(box) self.builder.add_word(box.content, box) self.assertEqual(self.builder.built_text[0], box.content) self.builder.add_word(box.content, box) self.assertEqual(self.builder.built_text[0], box.content + " " + box.content)
def test_get_output(self): box = builders.Box("word", ((10, 11), (12, 13))) self.builder.start_line(box) self.builder.add_word("word1", box) self.builder.add_word("word2", box) self.builder.start_line(box) self.builder.add_word("word3", box) self.builder.add_word("word4", box) self.assertEqual(self.builder.get_output(), "word1 word2\nword3 word4")
def test_write_file(self): output = StringIO() boxes = [ builders.Box("word1", ((10, 11), (12, 13)), 95), builders.Box("word2", ((11, 12), (13, 14))), builders.Box("word3", ((12, 13), (14, 15))), builders.Box("word4", ((13, 14), (15, 16)), 87), ] self.builder.write_file(output, boxes) output.seek(0) output = output.read() for box in boxes: self.assertIn(box.content, output) self.assertIn( "{} {} {} {}".format( box.position[0][0], box.position[0][1], box.position[1][0], box.position[1][1], ), output) self.assertIn(str(box.confidence), output)
def test_write_file(self): builder = tesseract.CharBoxBuilder() output = StringIO() boxes = [ builders.Box("a", ((10, 11), (12, 13)), 95), builders.Box("b", ((11, 12), (13, 14))), builders.Box("c", ((12, 13), (14, 15))), builders.Box("d", ((13, 14), (15, 16)), 87), builders.Box(u"\xe9", ((14, 15), (16, 17)), 88), ] builder.write_file(output, boxes) output.seek(0) output = output.read() for box in boxes: self.assertIn(box.content, output) self.assertIn( u"{} {} {} {}".format( box.position[0][0], box.position[0][1], box.position[1][0], box.position[1][1], ), output)
def test_get_output(self): lines = [] for l in range(4): boxes = [] for b in range(4): word = "word" + str(4 * l + b) position = ((4 * l + b, 0), (0, 0)) boxes.append(builders.Box(word, position, randint(0, 100))) line_position = ((4 * l, 4 * (l + 1)), (4 * l + 2, 4 * (l + 1) + 2)) lines.append(builders.LineBox(boxes, line_position)) for line in lines: self.builder.start_line(line.position) for word in line.word_boxes: self.builder.add_word(word.content, word.position, word.confidence) self.builder.end_line() # could be useful in future output = self.builder.get_output() for line, line_expected in zip(output, lines): self.assertIsInstance(line, builders.LineBox) self.assertEqual(line, line_expected)
def setUp(self): self.box1 = builders.Box("word1", ((15, 22), (23, 42))) self.box1_bis = builders.Box("word1_bis", ((15, 22), (23, 42))) self.box2 = builders.Box("word2", ((30, 5), (40, 15)), 95) self.box_unicode = builders.Box("\xe9", ((1, 2), (3, 4)))
def test_add_word_no_line(self): box = builders.Box("word", ((10, 11), (12, 13))) with self.assertRaises(IndexError): self.builder.add_word(box.content, box)
def test_start_line(self): box = builders.Box("word", ((10, 11), (12, 13))) self.builder.start_line(box) self.assertListEqual(self.builder.built_text, [""])
def test_add_word_no_line(self): box = builders.Box("word", ((1, 2), (3, 4)), 42) with self.assertRaises(IndexError): self.builder.add_word(box.content, box.position, box.confidence) self.assertListEqual(self.builder.lines, [])
def test_add_word(self): box = builders.Box("word", ((1, 2), (3, 4)), 42) self.builder.add_word(box.content, box.position, box.confidence) for box in self.builder.word_boxes: self.assertIsInstance(box, builders.Box) self.assertEqual(self.builder.word_boxes[0], box)
def test_start_line(self): box = builders.Box("word", ((1, 2), (3, 4))) before = list(self.builder.word_boxes) self.builder.start_line(box) self.assertEqual(self.builder.word_boxes, before)