def test_extract_lines_from_multiple_boxes(self): pages = extract_pages(self.test_pdf) for page in pages: textboxes = extract_textboxes(page) result = extract_lines(textboxes) for line in result: self.assertIsInstance(line, LTTextLineHorizontal)
def test_extract_lines_from_single_textboxs(self): pages = extract_pages(self.test_pdf) for page in pages: textboxes = extract_textboxes(page) for textbox in textboxes: result = extract_lines(textbox) for line in result: self.assertIsInstance(line, LTTextLineHorizontal)
def test_extract_characters_from_multiple_lines(self): pages = extract_pages(self.test_pdf) for page in pages: textboxes = extract_textboxes(page) for textbox in textboxes: lines = extract_lines(textbox) characters = extract_characters(lines) for character in characters: self.assertIsInstance(character, LTChar)