Python PdfFileReader.getDocumentInfo Examples

Programming Language: Python

Namespace/Package Name: PyPDF4

Class/Type: PdfFileReader

Method/Function: getDocumentInfo

Examples at hotexamples.com: 2

Python PdfFileReader.getDocumentInfo - 2 examples found. These are the top rated real world Python examples of PyPDF4.PdfFileReader.getDocumentInfo extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

PdfFileReader(30)

getNumPages(30)

getPage(30)

decrypt(6)

getDocumentInfo(2)

close(1)

getDestinationPageNumber(1)

getPageLayout(1)

getPageMode(1)

getPageNumber(1)

getXmpMetadata(1)

Example #1

Show file

def printMeta(filename):
    pdfFile = PdfFileReader(open(filename, 'rb'))
    docInfo = pdfFile.getDocumentInfo()
    print('[*] PDF MetaData For: {}'.format(filename))
    for metaItem in docInfo:
        print('[+] {0} : {1}'.format(metaItem, docInfo[metaItem]))

Example #2

Show file

def readPDF(inFileName, outFileName):
    try:
        PDFInputName = inFileName
        PDFOutputName = outFileName
        PDFInterimName = "output.pdf"

        ## Put a white ractangle on page 1
        # Create a a borderless white rectangle
        packet = io.BytesIO()
        can = canvas.Canvas(packet, pagesize=letter)
        can.setFillColorRGB(255, 255, 255)
        can.rect(450, 550, 100, 40, fill=1, stroke=0)
        can.save()

        # set to beginning of bytestream and create a new PDF
        packet.seek(0)
        newPdf = PdfFileReader(packet)
        interimOutput = PdfFileWriter()

        with open(PDFInputName, 'rb') as fileStream:
            existingPdf = PdfFileReader(fileStream)

            # Get first Page and merge with rectangle
            page = existingPdf.getPage(0)
            page.mergePage(newPdf.getPage(0))
            numPages = existingPdf.getNumPages()
            for n in range(numPages):
                interimOutput.addPage(existingPdf.getPage(n))

            with open(PDFInterimName, "wb") as fileStream:
                interimOutput.write(fileStream)

        pdfWriter = PdfFileWriter()

        with open(PDFInterimName, 'rb') as fileHandle:
            # Read & extract Information
            pdfReader = PdfFileReader(fileHandle)
            pdfInfo = pdfReader.getDocumentInfo()
            numPages = pdfReader.getNumPages()

            PDFAllInfo = {
                'author': pdfInfo.author,
                'creator': pdfInfo.creator,
                'producer': pdfInfo.producer,
                'subject': pdfInfo.subject,
                'title': pdfInfo.title,
                'num_pages': numPages
            }
            # Get content in all pages
            for pageNum in range(numPages):
                # Get page
                pageObject = pdfReader.getPage(pageNum)

                # Get only the /Contents item in dictionary Eg : [IndirectObject(4, 0)]
                pageContentsObject = pageObject['/Contents']

                # Extract the elements fo the /Contents object as a contentstream (cant print this directly)
                pageContent = ContentStream(
                    pageContentsObject,
                    pdfReader)  # Check operands and operators

                # Add page to pdf writer
                pdfWriter.addPage(pageObject)

                # loop through operators and operands in contents
                for operands, operator in pageContent.operations:
                    if operator == b_("Tj") and operands == [
                            b'\x00C\x00O\x00P\x00Y\x00-\x00O\x00N\x00L\x00Y'
                    ]:
                        operands[0] = TextStringObject('')

                # Replace /Contents in pageObject
                pageObject.__setitem__(NameObject('/Contents'), pageContent)

            # Write to output file
            with open(PDFOutputName, "wb") as outStream:
                pdfWriter.write(outStream)

        return (f"{PDFOutputName}")

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(f"{exc_type}, {exc_obj} , {fname} : {exc_tb.tb_lineno}")