Ejemplos de PdfFileReader.getDocumentInfo en Python

Lenguaje de programación: Python

Namespace/Package Name: PyPDF3

Clase / Tipo: PdfFileReader

Método / Función: getDocumentInfo

Ejemplos en hotexamples.com: 6

Python PdfFileReader.getDocumentInfo - 6 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de PyPDF3.PdfFileReader.getDocumentInfo extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

PdfFileReader(30)

getPage(30)

getNumPages(20)

getDocumentInfo(6)

decrypt(5)

getXmpMetadata(3)

getFields(1)

Ejemplo n.º 1

Mostrar archivo

def extract_information(pdf_path):
    testread = ""
    with open(pdf_path, 'rb') as f:
        pdf = PdfFileReader(f)
        information = pdf.getDocumentInfo()
        testread = pdf.getPage(92).extractText().strip()
        print(pdf.getPage(92).extractText().strip())
        number_of_pages = pdf.getNumPages()

    # txt = f"""
    # Information about {pdf_path}:

    # Author: {information.author}
    # Creator: {information.creator}
    # Producer: {information.producer}
    # Subject: {information.subject}
    # Title: {information.title}
    # Number of pages: {number_of_pages}
    # """
    print(testread)

    # define variables
    s = testread.strip()
    file = "file.mp3"

    # initialize tts, create mp3 and play
    tts = gTTS(s, 'en')
    tts.save(file)
    #os.system("mpg123 " + file)

    return information

Ejemplo n.º 2

Mostrar archivo

def pdf_meta(tmp_file_path, original_file_name, original_file_extension):
    doc_info = None
    xmp_info = None

    if use_pdf_meta:
        with open(tmp_file_path, 'rb') as f:
            pdf_file = PdfFileReader(f)
            doc_info = pdf_file.getDocumentInfo()
            xmp_info = parse_xmp(pdf_file)

    if xmp_info:
        author = ' & '.join(split_authors(xmp_info['author']))
        title = xmp_info['title']
        subject = xmp_info['subject']
        tags = xmp_info['tags']
        languages = xmp_info['languages']
        publisher = xmp_info['publisher']
    else:
        author = u'Unknown'
        title = ''
        languages = [""]
        publisher = ""
        subject = ""
        tags = ""

    if doc_info:
        if author == '':
            author = ' & '.join(split_authors([doc_info.author])) if doc_info.author else u'Unknown'
        if title == '':
            title = doc_info.title if doc_info.title else original_file_name
        if subject == '':
            subject = doc_info.subject or ""
        if tags == '' and '/Keywords' in doc_info:
            if isinstance(doc_info['/Keywords'], bytes):
                tags = doc_info['/Keywords'].decode('utf-8')
            else:
                tags = doc_info['/Keywords']
    else:
        title = original_file_name

    return BookMeta(
        file_path=tmp_file_path,
        extension=original_file_extension,
        title=title,
        author=author,
        cover=pdf_preview(tmp_file_path, original_file_name),
        description=subject,
        tags=tags,
        series="",
        series_id="",
        languages=','.join(languages),
        publisher=publisher,
        pubdate="",
        identifiers=[])

Ejemplo n.º 3

Mostrar archivo

def pdf_parser(s):
    s = s.strip()
    # required to suppress warning messages
    with open(os.devnull, 'w') as fp:
        pdf = PdfFileReader(BytesIO(s), strict=False, warndest=fp)
    if pdf.isEncrypted:
        try:
            pdf.decrypt('')
        except NotImplementedError:
            return {}
    meta = pdf.getDocumentInfo() or {}
    #print(str(meta))
    result = {}
    for key in meta.keys():
        result[key[1:]] = meta.get(key)
    return result

Ejemplo n.º 4

Mostrar archivo

Archivo: pdfmeta.py Proyecto: c002/BabySploit

def start():
    from PyPDF3 import PdfFileReader
    import glob
    print("Put PDF file in pdfs/")
    print("Which PDF file would you like to read the meta data for?")
    for d in glob.iglob("pdfs/*"):
        if "emptyfile" not in d:
            print(d.replace("pdfs/"))
    ans = str(input("> "))
    if ".pdf" in ans:
        pass
    else:
        ans = ans + ".pdf"
    pdffile = PdfFileReader(file=(ans, 'rb'))
    docInfo = pdffile.getDocumentInfo()
    for metaItem in docInfo:
        print("- " + metaItem + ":" + docInfo[metaItem])
    print("\n")

Ejemplo n.º 5

Mostrar archivo

def pdfHandler(file_dir):
    input1 = PdfFileReader(open(file_dir, 'rb'))

    print('document1.pdf has {} pages.'.format(str(input1.getNumPages())))

    fields = input1.getFields()
    print(type(fields))

    documentInfo = input1.getDocumentInfo()
    print(type(documentInfo))
    if documentInfo is not None:
        for key in documentInfo.keys():
            print('{} : {}'.format(key, documentInfo.get(key)))

    metaData = input1.getXmpMetadata()
    print(type(metaData))
    if metaData is not None:
        # print(metaData)
        for relation in metaData.dc_relation:
            print('relation: {}'.format(relation))

Ejemplo n.º 6

Mostrar archivo

Archivo: admin_views.py Proyecto: bikeshedder/shark

def invoice_pdf(request, number, correction=False):
    invoice = get_object_or_404(Invoice, number=number)
    if correction:
        invoice = invoice.correction
    from reportlab.lib.units import mm
    from reportlab.platypus import Paragraph
    from reportlab.platypus.flowables import Spacer
    from reportlab.platypus.flowables import KeepTogether

    from dinbrief.document import Document
    from dinbrief.invoice import ItemTable, TotalTable
    from dinbrief.styles import styles
    from dinbrief.template import BriefTemplate

    with trans_override(invoice.language):

        response = HttpResponse(content_type='application/pdf')
        if 'download' in request.GET:
            filename = '%s.pdf' % invoice.number
            response[
                'Content-Disposition'] = 'attachment; filename=%s' % filename

        if invoice.type == Invoice.TYPE_INVOICE:
            if callable(INVOICE_TERMS):
                terms = INVOICE_TERMS(invoice)
            else:
                terms = [
                    Paragraph(term, styles['Terms']) for term in INVOICE_TERMS
                ]
        else:
            terms = []

        template = BriefTemplate()
        document = Document(
            sender=invoice.sender_lines,
            recipient=invoice.recipient_lines,
            date=date_format(invoice.created, 'SHORT_DATE_FORMAT'),
            content=[
                Paragraph(
                    '%s %s' %
                    (invoice.get_type_display() if not correction else
                     gettext(u'Correction of invoice'), invoice.number),
                    styles['Subject']),
                Spacer(template.CONTENT_WIDTH, 2 * mm),
                ItemTable(template, invoice),
                KeepTogether(TotalTable(template, invoice)),
                Spacer(template.CONTENT_WIDTH, 10 * mm),
            ] + terms)

        if settings.SHARK['INVOICE']['BACKGROUND']:
            with tempfile.TemporaryFile() as tmp:
                # Create content in a temporary file
                template.render(document, tmp)
                # Combine background with the content
                writer = PdfFileWriter()
                content = PdfFileReader(tmp)
                info_dict = writer._info.getObject()
                info_dict.update(content.getDocumentInfo())
                first_bg = PdfFileReader(
                    open(settings.SHARK['INVOICE']['BACKGROUND']['FIRST_PAGE'],
                         'rb'))
                later_bg = PdfFileReader(
                    open(settings.SHARK['INVOICE']['BACKGROUND']['LATER_PAGE'],
                         'rb'))
                bg = [first_bg.getPage(0), later_bg.getPage(0)]
                for i, page in enumerate(content.pages):
                    page.mergePage(bg[min(i, 1)])
                    page.compressContentStreams()
                    writer.addPage(page)
                writer.write(response)
        else:
            # Render content directly to the HTTP response object if no
            # background images are configured.
            template.render(document, response)

    return response