Python PdfFileReader 예제들, pdf.PdfFileReader Python 예제들

예제 #1

0

파일 보기

def printMeta(fileName):
    pdfFile = PdfFileReader(file(fileName, 'rb'))
    docInfo = pdfFile.getDocumentInfo()
    print '[*] PDF MetaData for: ' + str(fileName)

    for metaItem in docInfo:
        print '[+] ' + metaItem + ':' + docInfo[metaItem]

예제 #2

0

파일 보기

파일: PDFCropper.py 프로젝트: jebbard/jMeta

def main():
    """
        Script main function.
    """

    if len(argv) < 3:
        print "Usage: python PDFCropper <output_folder> <file_path>"
        exit(-1)

    outputFolderBase = argv[1]

    print "#####################"
    print "   Cropping PDFs!"
    print "#####################"
    print ""
    print "[INFO] Output Folder:", outputFolderBase

    for index in range(2, len(argv)):
        print "[INFO] Mangling file:", argv[index]

        input = PdfFileReader(file(argv[index], "rb"))

        for currentPageIndex in range(input.numPages):
            currentPage = input.getPage(currentPageIndex)

            pageText = currentPage.extractText()

            pageWidthAmount, pageSizeAmount = getPageSizingAmounts(pageText)

            print "[INFO] - ( Page", currentPageIndex + 1, "of", input.numPages, ") of file", argv[
                index]

            if pageWidthAmount is None:
                print "[INFO] - Skipping page, as no size definition found"

            else:
                pageFileName = getFileNameFromPage(pageText)

                if pageFileName is None:
                    print "[INFO] - Skipping page, as no valid filename found"

                else:
                    print "[INFO] - Output filename of current page:", pageFileName

                    cropPDFPagePowerpoint(currentPage, pageWidthAmount,
                                          pageSizeAmount)
                    outputPath = getOutputPath(outputFolderBase, pageFileName)
                    writePDFPage(outputPath, currentPage)
                    print "[INFO] - Done current page:", pageFileName
                    print

예제 #3

0

파일 보기

파일: merger.py 프로젝트: jamma313/PyPDF2

    def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        >>> merge(position, file, bookmark=None, pages=None, import_bookmarks=True)
        
        Merges the pages from the source document specified by "file" into the output
        file at the page number specified by "position".
        
        Optionally, you may specify a bookmark to be applied at the beginning of the 
        included file by supplying the text of the bookmark in the "bookmark" parameter.
        
        You may prevent the source document's bookmarks from being imported by
        specifying "import_bookmarks" as False.
        
        You may also use the "pages" parameter to merge only the specified range of 
        pages from the source document into the output document.
        """
        
        # This parameter is passed to self.inputs.append and means
        # that the stream used was created in this method.
        my_file = False
        
        # If the fileobj parameter is a string, assume it is a path
        # and create a file object at that location. If it is a file,
        # copy the file's contents into a StringIO stream object; if 
        # it is a PdfFileReader, copy that reader's stream into a 
        # StringIO stream.
        # If fileobj is none of the above types, it is not modified
        if type(fileobj) in (str, unicode):
            fileobj = file(fileobj, 'rb')
            my_file = True
        elif type(fileobj) == file:
            fileobj.seek(0)
            filecontent = fileobj.read()
            fileobj = StringIO(filecontent)
            my_file = True
        elif type(fileobj) == PdfFileReader:
            orig_tell = fileobj.stream.tell()   
            fileobj.stream.seek(0)
            filecontent = StringIO(fileobj.stream.read())
            fileobj.stream.seek(orig_tell) # reset the stream to its original location
            fileobj = filecontent
            my_file = True
            
        # Create a new PdfFileReader instance using the stream
        # (either file or StringIO) created above
        pdfr = PdfFileReader(fileobj, strict=self.strict)
        
        # Find the range of pages to merge
        if pages == None:
            pages = (0, pdfr.getNumPages())
        elif type(pages) in (int, float, str, unicode):
            raise TypeError('"pages" must be a tuple of (start, end)')
        
        srcpages = []
        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
        
        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)
        
        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline
        
        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests
        
        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)
            
            id = self.id_count
            self.id_count += 1
            
            mp = _MergedPage(pg, pdfr, id)
            
            srcpages.append(mp)

        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)
            
        
        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages
        
        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))

예제 #4

0

파일 보기

파일: merger.py 프로젝트: KanorUbu/PyPDF2

    def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        >>> merge(position, file, bookmark=None, pages=None, import_bookmarks=True)
        
        Merges the pages from the source document specified by "file" into the output
        file at the page number specified by "position".
        
        Optionally, you may specify a bookmark to be applied at the beginning of the 
        included file by supplying the text of the bookmark in the "bookmark" parameter.
        
        You may prevent the source document's bookmarks from being imported by
        specifying "import_bookmarks" as False.
        
        You may also use the "pages" parameter to merge only the specified range of 
        pages from the source document into the output document.
        """
        
        my_file = False
        if type(fileobj) in (str, unicode):
            fileobj = file(fileobj, 'rb')
            my_file = True
            
        if type(fileobj) == PdfFileReader:
            pdfr = fileobj
            fileobj = pdfr.file
        else:
            pdfr = PdfFileReader(fileobj)
        
        # Find the range of pages to merge
        if pages == None:
            pages = (0, pdfr.getNumPages())
        elif type(pages) in (int, float, str, unicode):
            raise TypeError('"pages" must be a tuple of (start, end)')
        
        srcpages = []
        
        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
        
        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)
        
        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline
        
        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests
        
        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)
            
            id = self.id_count
            self.id_count += 1
            
            mp = _MergedPage(pg, pdfr, id)
            
            srcpages.append(mp)

        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)
            
        
        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages
        
        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))

예제 #5

0

파일 보기

파일: merger.py 프로젝트: codecats-old/kitty

    def merge(self, position, fileobj, bookmark=None, pages=None, import_bookmarks=True):
        """
        >>> merge(position, file, bookmark=None, pages=None, import_bookmarks=True)
        
        Merges the pages from the source document specified by "file" into the output
        file at the page number specified by "position".
        
        Optionally, you may specify a bookmark to be applied at the beginning of the 
        included file by supplying the text of the bookmark in the "bookmark" parameter.
        
        You may prevent the source document's bookmarks from being imported by
        specifying "import_bookmarks" as False.
        
        You may also use the "pages" parameter to merge only the specified range of 
        pages from the source document into the output document.
        """
        
        # This parameter is passed to self.inputs.append and means
        # that the stream used was created in this method.
        my_file = False
        
        # If the fileobj parameter is a string, assume it is a path
        # and create a file object at that location. If it is a file,
        # copy the file's contents into a StringIO stream object; if 
        # it is a PdfFileReader, copy that reader's stream into a 
        # StringIO stream.
        # If fileobj is none of the above types, it is not modified
        if type(fileobj) in (str, unicode):
            fileobj = file(fileobj, 'rb')
            my_file = True
        elif type(fileobj) == file:
            fileobj.seek(0)
            filecontent = fileobj.read()
            fileobj = StringIO(filecontent)
            my_file = True
        elif type(fileobj) == PdfFileReader:
            orig_tell = fileobj.stream.tell()   
            fileobj.stream.seek(0)
            filecontent = StringIO(fileobj.stream.read())
            fileobj.stream.seek(orig_tell) # reset the stream to its original location
            fileobj = filecontent
            my_file = True
            
        # Create a new PdfFileReader instance using the stream
        # (either file or StringIO) created above
        pdfr = PdfFileReader(fileobj, strict=self.strict)
        
        # Find the range of pages to merge
        if pages == None:
            pages = (0, pdfr.getNumPages())
        elif type(pages) in (int, float, str, unicode):
            raise TypeError('"pages" must be a tuple of (start, end)')
        
        srcpages = []
        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark), NumberObject(self.id_count), NameObject('/Fit'))
        
        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)
        
        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline
        
        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests
        
        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)
            
            id = self.id_count
            self.id_count += 1
            
            mp = _MergedPage(pg, pdfr, id)
            
            srcpages.append(mp)

        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)
            
        
        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages
        
        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))

예제 #6

0

파일 보기

    def merge(self,
              position,
              fileobj,
              bookmark=None,
              pages=None,
              import_bookmarks=True):
        """
        >>> merge(position, file, bookmark=None, pages=None, import_bookmarks=True)
        
        Merges the pages from the source document specified by "file" into the output
        file at the page number specified by "position".
        
        Optionally, you may specify a bookmark to be applied at the beginning of the 
        included file by supplying the text of the bookmark in the "bookmark" parameter.
        
        You may prevent the source document's bookmarks from being imported by
        specifying "import_bookmarks" as False.
        
        You may also use the "pages" parameter to merge only the specified range of 
        pages from the source document into the output document.
        """

        my_file = False
        if type(fileobj) in (str, unicode):
            fileobj = file(fileobj, 'rb')
            my_file = True

        if type(fileobj) == PdfFileReader:
            pdfr = fileobj
            fileobj = pdfr.file
        else:
            pdfr = PdfFileReader(fileobj)

        # Find the range of pages to merge
        if pages == None:
            pages = (0, pdfr.getNumPages())
        elif type(pages) in (int, float, str, unicode):
            raise TypeError('"pages" must be a tuple of (start, end)')

        srcpages = []

        if bookmark:
            bookmark = Bookmark(TextStringObject(bookmark),
                                NumberObject(self.id_count),
                                NameObject('/Fit'))

        outline = []
        if import_bookmarks:
            outline = pdfr.getOutlines()
            outline = self._trim_outline(pdfr, outline, pages)

        if bookmark:
            self.bookmarks += [bookmark, outline]
        else:
            self.bookmarks += outline

        dests = pdfr.namedDestinations
        dests = self._trim_dests(pdfr, dests, pages)
        self.named_dests += dests

        # Gather all the pages that are going to be merged
        for i in range(*pages):
            pg = pdfr.getPage(i)

            id = self.id_count
            self.id_count += 1

            mp = _MergedPage(pg, pdfr, id)

            srcpages.append(mp)

        self._associate_dests_to_pages(srcpages)
        self._associate_bookmarks_to_pages(srcpages)

        # Slice to insert the pages at the specified position
        self.pages[position:position] = srcpages

        # Keep track of our input files so we can close them later
        self.inputs.append((fileobj, pdfr, my_file))

예제 #7

0

파일 보기

파일: python_combine_pdfs_yuva_65.py 프로젝트: agalunstop/Errored_Repo

(options, args) = parser.parse_args()

path=options.path
design=options.module

#Combine the 2 reports
doc = SimpleDocTemplate("%s/taxonomy_report_all_%s.pdf" %(path,design), pagesize=letter)

#####################Merge the gate and FF pdfs into one final pdf##################


output = PdfFileWriter()

if (os.path.isdir('%s' %path)):
	#print "Inside if"		
	input1 = PdfFileReader(file("%s/taxonomy_report_gates_%s.pdf" %(path,design), "rb"))
	# print how many pages input1 has:
	print "\n%s/taxonomy_report_gates_%s.pdf has %s pages." % (path,design,input1.getNumPages())
	# add page 1 from input1 to output document, unchanged. Each pdf has only one page
	output.addPage(input1.getPage(0))

	input2 = PdfFileReader(file("%s/taxonomy_report_FFs_%s.pdf" %(path,design), "rb"))
	# print how many pages input1 has:
	print "\n%s/taxonomy_report_FFs_%s.pdf has %s pages." % (path,design,input1.getNumPages())
	# add page 1 from input1 to output document, unchanged. Each pdf has only one page
	output.addPage(input2.getPage(0))

# finally, write "output" to document-output.pdf
outputStream = file("%s/taxonomy_report_all_%s.pdf" %(path,design), "wb")
output.write(outputStream)
outputStream.close()