def __init__(self, filepath, orientation="P", layout="letter", font_list=None, font_dir=None): if font_dir is not None: FontLoader.load_from_dir(font_dir) elif font_list is not None: FontLoader.load_from_list(font_list) else: FontLoader.load_fonts() self.filepath = filepath self.destination = None if hasattr(self.filepath, 'write'): self.destination = self.filepath elif self.filepath == 'string': self.destination = 'string' # Create session and document objects self.session = _Session(self) self.document = PDFDocument(self.session, orientation, layout) # Full width display mode default self.set_display_mode() # Set default PDF version number self.pdf_version = '1.7' # Initialize PDF information self.set_information() self.set_compression()
def get_pages(klass, fp, pagenos=None, maxpages=0, password='', caching=True, check_extractable=True): # Create a PDF parser object associated with the file object. parser = PDFParser(fp) # Create a PDF document object that stores the document structure. doc = PDFDocument(parser, caching=caching) # Supply the document password for initialization. # (If no password is set, give an empty string.) doc.initialize(password) # Check if the document allows text extraction. If not, abort. if check_extractable and not doc.is_extractable: raise klass.PDFTextExtractionNotAllowed( 'Text extraction is not allowed: %r' % fp) # Process each page contained in the document. for (pageno, page) in enumerate(klass.create_pages(doc)): if pagenos and (pageno not in pagenos): continue yield page if maxpages and maxpages <= pageno + 1: break return