def test_extract_characters_from_multiple_textboxes(self): pages = extract_pages(self.test_pdf) for page in pages: textboxes = extract_textboxes(page) characters = extract_characters(textboxes) for character in characters: self.assertIsInstance(character, LTChar)
def test_extract_lines_from_multiple_boxes(self): pages = extract_pages(self.test_pdf) for page in pages: textboxes = extract_textboxes(page) result = extract_lines(textboxes) for line in result: self.assertIsInstance(line, LTTextLineHorizontal)
def test_extract_lines_from_single_textboxs(self): pages = extract_pages(self.test_pdf) for page in pages: textboxes = extract_textboxes(page) for textbox in textboxes: result = extract_lines(textbox) for line in result: self.assertIsInstance(line, LTTextLineHorizontal)
def test_extract_characters_from_single_textboxes(self): pages = extract_pages(self.test_pdf) for page in pages: textboxes = extract_textboxes(page) for textbox in textboxes: characters = extract_characters(textbox) for character in characters: self.assertIsInstance(character, LTChar)
def test_extract_textboxes_from_multiple_pages(self): pages = extract_pages(self.test_pdf) result = extract_textboxes(pages) for textbox in result: self.assertIsInstance(textbox, LTTextBoxHorizontal)
def test_extract_textboxes_from_single_pages(self): pages = extract_pages(self.test_pdf) for page in pages: result = extract_textboxes(page) for textbox in result: self.assertIsInstance(textbox, LTTextBoxHorizontal)
def test_extract_pages_from_pdf(self): result = extract_pages(self.test_pdf) for page in result: self.assertIsInstance(page, LTPage)