コード例 #1
0
 def get_pages(klass,
               fp,
               pagenos=None,
               maxpages=0,
               password='',
               caching=True,
               check_extractable=True):
     # Create a PDF parser object associated with the file object.
     parser = PDFParser(fp)
     # Create a PDF document object that stores the document structure.
     doc = PDFDocument(parser, caching=caching)
     # Supply the document password for initialization.
     # (If no password is set, give an empty string.)
     doc.initialize(password)
     # Check if the document allows text extraction. If not, abort.
     if check_extractable and not doc.is_extractable:
         raise klass.PDFTextExtractionNotAllowed(
             'Text extraction is not allowed: %r' % fp)
     # Process each page contained in the document.
     for (pageno, page) in enumerate(klass.create_pages(doc)):
         if pagenos and (pageno not in pagenos):
             continue
         yield page
         if maxpages and maxpages <= pageno + 1:
             break
     return
コード例 #2
0
ファイル: pdfpage.py プロジェクト: andrewdefries/pdfminer
 def get_pages(klass, fp,
               pagenos=None, maxpages=0, password='',
               caching=True, check_extractable=True):
     # Create a PDF parser object associated with the file object.
     parser = PDFParser(fp)
     # Create a PDF document object that stores the document structure.
     doc = PDFDocument(parser, caching=caching)
     # Supply the document password for initialization.
     # (If no password is set, give an empty string.)
     doc.initialize(password)
     # Check if the document allows text extraction. If not, abort.
     if check_extractable and not doc.is_extractable:
         raise klass.PDFTextExtractionNotAllowed('Text extraction is not allowed: %r' % fp)
     # Process each page contained in the document.
     for (pageno,page) in enumerate(klass.create_pages(doc)):
         if pagenos and (pageno not in pagenos): continue
         yield page
         if maxpages and maxpages <= pageno+1: break
     return