Exemple #1
0
    def __init__(self,
                 filepath,
                 orientation="P",
                 layout="letter",
                 font_list=None,
                 font_dir=None):
        if font_dir is not None:
            FontLoader.load_from_dir(font_dir)
        elif font_list is not None:
            FontLoader.load_from_list(font_list)
        else:
            FontLoader.load_fonts()

        self.filepath = filepath
        self.destination = None

        if hasattr(self.filepath, 'write'):
            self.destination = self.filepath
        elif self.filepath == 'string':
            self.destination = 'string'

        # Create session and document objects
        self.session = _Session(self)
        self.document = PDFDocument(self.session, orientation, layout)

        # Full width display mode default
        self.set_display_mode()
        # Set default PDF version number
        self.pdf_version = '1.7'

        # Initialize PDF information
        self.set_information()
        self.set_compression()
Exemple #2
0
 def get_pages(klass,
               fp,
               pagenos=None,
               maxpages=0,
               password='',
               caching=True,
               check_extractable=True):
     # Create a PDF parser object associated with the file object.
     parser = PDFParser(fp)
     # Create a PDF document object that stores the document structure.
     doc = PDFDocument(parser, caching=caching)
     # Supply the document password for initialization.
     # (If no password is set, give an empty string.)
     doc.initialize(password)
     # Check if the document allows text extraction. If not, abort.
     if check_extractable and not doc.is_extractable:
         raise klass.PDFTextExtractionNotAllowed(
             'Text extraction is not allowed: %r' % fp)
     # Process each page contained in the document.
     for (pageno, page) in enumerate(klass.create_pages(doc)):
         if pagenos and (pageno not in pagenos):
             continue
         yield page
         if maxpages and maxpages <= pageno + 1:
             break
     return