Python PdfFileReader Examples, pdfrw.PdfFileReader Python Examples

Example #1

0

Show file

def fill_form(targetfile, outputfile, row_data, mapping):
    '''
    Fill up form based on map data
    :param targetfile: Path to pdf file that needs to be filled
    :param outputfile: Path to output
    :param row_data: Row data from excel file
    :param mapping: Dictionary that maps excel row with form field in pdf
    '''
    logger = logging.getLogger(__name__ + '.fill_form')
    template = pdfrw.PdfFileReader(targetfile)
    for page in range(len(template.pages)):
        annotations = template.pages[page]['/Annots']
        for item in annotations:
            if item['/Subtype'] == '/Widget':
                if item['/T']:
                    key = item['/T'][1:-1]
                    if key in mapping.keys():
                        if 'Check Box' in key:
                            if row_data[mapping[key]] == 'yes':
                                item.update(
                                    pdfrw.PdfDict(AS=pdfrw.PdfName('Yes')))
                            elif row_data[mapping[key]] == 'no':
                                item.update(
                                    pdfrw.PdfDict(AS=pdfrw.PdfName('No')))
                        else:
                            logger.debug('Textbox= {0}  Value= {1}'.format(
                                str(key), str(row_data[mapping[key]])))
                            item.update(
                                pdfrw.PdfDict(V=str(row_data[mapping[key]])))
    logger.info('Writing to {}'.format(path.join(OUTPUT_FOLDER, outputfile)))
    pdfrw.PdfWriter().write(path.join(OUTPUT_FOLDER, outputfile), template)

Example #2

0

Show file

def simplify_pdf(pdf: str, select: str, outfile: typing.TextIO):
    """
    Given a PDF and a metric by which to select the first or last page in a range of pages with the same name,
    remove the unnecessary pages
    """
    in_pdf = pdfrw.PdfFileReader(pdf)
    out_pdf = pdfrw.PdfFileWriter(trailer=in_pdf)
    for index in calculate_pages(in_pdf, select):
        out_pdf.addPage(in_pdf.getPage(index))
    out_pdf.write(outfile)

Example #3

0

Show file

def list_pdf_fields():

    template = pdfrw.PdfFileReader(PDF_TEMPLATE_PATH)
    pages = template.pages

    for page in range(1, len(pages) + 1):
        annotations = pages[page - 1][ANNOT_KEY]
        for annotation in annotations:
            if annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY:
                if annotation[ANNOT_FIELD_KEY]:
                    key = annotation[ANNOT_FIELD_KEY][1:-1]
                    print(f' Page {page}: {key}, '
                          f'type {annotation[ANNOT_FORM_type]}, '
                          f'current value: {annotation[ANNOT_VAL_KEY]}')

Example #4

0

Show file

 def create_watermark(cls, doc_pdf: bytes) -> bytes:
     if cls._watermark is None:
         return doc_pdf
     trailer = pdfrw.PdfFileReader(fdata=doc_pdf)
     for page in trailer.pages:
         pdfrw.PageMerge(page).add(
             cls._watermark,
             prepend=cls._underneath,
         ).render()
     stream = io.BytesIO()
     pdfrw.PdfWriter(stream, trailer=trailer).write()
     result_pdf = stream.getvalue()
     stream.close()
     return result_pdf

Example #5

0

Show file

def fill_pdf():

    data_dict = transform_data()
    template = pdfrw.PdfFileReader(PDF_TEMPLATE_PATH)
    pages = template.pages

    for key in data_dict.keys():
        data_row = transform_data_dict(data_dict[key])
        for page in range(1, len(pages) + 1):
            if data_row.get(str(page)) is None:
                print(f'Skipping page {page} in file {key}...')
                continue
            page_data = data_row[str(page)]
            page_data = {k.split('.')[1]: v for k, v in page_data.items()}
            annotations = pages[page - 1][ANNOT_KEY]
            for annotation in annotations:
                if annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY:
                    if annotation[ANNOT_FIELD_KEY]:
                        pdf_key = annotation[ANNOT_FIELD_KEY][1:-1]
                        if pdf_key in page_data.keys():
                            # if annotation[ANNOT_FORM_type] == ANNOT_FORM_button:
                            #     print(f'Updating page {page}: {key},'
                            #           f'field {pdf_key},'
                            #           f' new value: {page_data[pdf_key]}')
                            #     annotation.update(pdfrw.PdfDict(V=pdfrw.PdfDict(page_data[pdf_key]),
                            #     AS=pdfrw.PdfName(page_data[pdf_key]) ))
                            if annotation[ANNOT_FORM_type] == ANNOT_FORM_text:
                                print(f'Updating file {key}:'
                                      f' page {page}:'
                                      f'field {pdf_key},'
                                      f' new value: {page_data[pdf_key]}')
                                annotation.update(
                                    pdfrw.PdfDict(V=page_data[pdf_key],
                                                  AP=page_data[pdf_key]))

        template.Root.AcroForm.update(
            pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true')))
        output_pdf_path = f'{OUTPUT_FOLDER}{PATH_SEPARATOR}{key}.pdf'
        pdfrw.PdfWriter().write(output_pdf_path, template)

Example #6

0

Show file

File: pdf-sign-cms-pdfrw.py Project: tmudunuri/endesive

 def __init__(self, fname, password):
     self.fname = fname
     self.password = password if password != "" else None
     self.compress = False
     self.objects = []
     with open(fname, "rb") as fi:
         self.datau = fi.read()
     self.startdata = len(self.datau)
     self.annotbutton = False
     s = b"startxref"
     i = self.datau.rfind(s)
     assert i != -1
     i += len(s)
     while self.datau[i] not in b"0123456789":
         i += 1
     j = i
     while self.datau[j] in b"0123456789":
         j += 1
     s = self.datau[i:j].decode()
     startprev = int(s, 10)
     self.startprev = startprev
     self.prev = pdf.PdfFileReader(fdata=self.datau,
                                   decrypt=(password is not None),
                                   password=password)

Example #7

0

Show file

 def get_page_dimensions(self):
     pdf = pdfrw.PdfFileReader(self.filename)
     self.num_pages = len(pdf.pages)
     self.page_width = float(pdf.pages[0].MediaBox[2])
     self.page_height = float(pdf.pages[0].MediaBox[3])

Example #8

0

Show file

File: mitpay.py Project: aleksejspopovs/mitpay

def main(filename):
    try:
        pdf = pdfrw.PdfFileReader(filename)
    except Exception as e:
        print('Error: couldn\'t open statement.')
        print(
            'Are you sure the file "{}" exists? Is it a PDF?'.format(filename))
        print()
        print('Technical information:')
        traceback.print_exc()
        return

    try:
        xfa = pdf.Root.AcroForm.XFA.stream

        # if you want to explore the format yourself, I suggest printing
        # the contents of the xfa variable --- it's just a string containing
        # XML, some of which contains statement information and some of which
        # contains JavaScript describing how the data is to be displayed

        tree = ET.fromstring(xfa)
        datasets = tree.find(
            '{http://www.xfa.org/schema/xfa-data/1.0/}datasets')
        statement = datasets.find('*/TouchNet/BillingStmt')
    except Exception as e:
        print('Error: couldn\'t find statement data in PDF.')
        print('Are you sure this PDF is a statement from the new MITPAY?')
        print()
        print('Technical information:')
        traceback.print_exc()
        return

    student_id = statement.find('Student/StuID').text
    student_name = statement.find('Student/FullName').text
    statement_term = format_term(statement.find('TermCode').text)

    statement_date = statement.find('StmtDt').text
    statement_due = statement.find('DueDt').text

    amount_due = statement.find('AmtDue').text

    print_line()
    print_center('MIT BILLING STATEMENT')
    print_center('UNOFFICIAL. Contact SFS in case of discrepancies.')
    print_center(
        'This report should hopefully include all personalized information')
    print_center('from the PDF except the student\'s address. No guarantees.')
    print_center(
        'When paying by mailing a check, you MUST use the original PDF.')
    print_center('If amount due is negative, you are due a refund.')

    print_line()
    print_center('{} (ID: {})'.format(student_name, format_id(student_id)))
    print_pair('Statement generated on {}'.format(format_date(statement_date)),
               'Due on {}'.format(format_date(statement_due)))
    print_pair('Term: {}'.format(statement_term),
               'Amount due: {}'.format(format_amount(amount_due, False)))

    print_line()

    for line in statement.findall('LineItem'):
        kind = line.find('Type').text
        description = line.find('Desc').text
        amount = line.find('Amt').text  # might be None

        date = None
        if line.find('TransDt') is not None:
            date = line.find('TransDt').text
        term = None
        if line.find('TermCode') is not None:
            term = line.find('TermCode').text

        formatted_date = ''
        if (date is not None) and (term is not None):
            formatted_date = ' ({}, {})'.format(format_term(term),
                                                format_date(date))
        elif date is not None:
            formatted_date = ' ({})'.format(format_date(date))
        elif term is not None:
            formatted_date = ' ({})'.format(format_term(term))

        complete_description = '{desc}{date}'.format(desc=description,
                                                     date=formatted_date)

        if amount is None:
            print_center(complete_description)
        else:
            # NB: sometimes Charges have negative amounts
            # e.g. "Initial Housing Average Fee Cr" (freshmen getting their
            # estimated housing fee back) is a charge for some reason.
            invert_sign = (kind == 'Charge') or (kind == 'FutureChgs') \
               or kind.endswith('Chg')

            print_pair(complete_description,
                       format_amount(amount, True, invert_sign))

    print_line()
    print_pair('Amount due', format_amount(amount_due, False))