Esempio n. 1
0
def load_lxml_structured_document(filename, page_range=None):
    with FileSystems.open(filename) as f:
        structured_document = LxmlStructuredDocument(etree.parse(f).getroot())
        if page_range:
            structured_document = LxmlStructuredDocument(
                E.DOCUMENT(
                    *structured_document.get_pages()[max(0, page_range[0] -
                                                         1):page_range[1]]))
        return structured_document
Esempio n. 2
0
 def test_should_find_pages(self):
     pages = [
         E.PAGE(),
         E.PAGE()
     ]
     doc = LxmlStructuredDocument(
         E.DOCUMENT(
             *pages
         )
     )
     assert list(doc.get_pages()) == pages