def fill_form(targetfile, outputfile, row_data, mapping): ''' Fill up form based on map data :param targetfile: Path to pdf file that needs to be filled :param outputfile: Path to output :param row_data: Row data from excel file :param mapping: Dictionary that maps excel row with form field in pdf ''' logger = logging.getLogger(__name__ + '.fill_form') template = pdfrw.PdfFileReader(targetfile) for page in range(len(template.pages)): annotations = template.pages[page]['/Annots'] for item in annotations: if item['/Subtype'] == '/Widget': if item['/T']: key = item['/T'][1:-1] if key in mapping.keys(): if 'Check Box' in key: if row_data[mapping[key]] == 'yes': item.update( pdfrw.PdfDict(AS=pdfrw.PdfName('Yes'))) elif row_data[mapping[key]] == 'no': item.update( pdfrw.PdfDict(AS=pdfrw.PdfName('No'))) else: logger.debug('Textbox= {0} Value= {1}'.format( str(key), str(row_data[mapping[key]]))) item.update( pdfrw.PdfDict(V=str(row_data[mapping[key]]))) logger.info('Writing to {}'.format(path.join(OUTPUT_FOLDER, outputfile))) pdfrw.PdfWriter().write(path.join(OUTPUT_FOLDER, outputfile), template)
def simplify_pdf(pdf: str, select: str, outfile: typing.TextIO): """ Given a PDF and a metric by which to select the first or last page in a range of pages with the same name, remove the unnecessary pages """ in_pdf = pdfrw.PdfFileReader(pdf) out_pdf = pdfrw.PdfFileWriter(trailer=in_pdf) for index in calculate_pages(in_pdf, select): out_pdf.addPage(in_pdf.getPage(index)) out_pdf.write(outfile)
def list_pdf_fields(): template = pdfrw.PdfFileReader(PDF_TEMPLATE_PATH) pages = template.pages for page in range(1, len(pages) + 1): annotations = pages[page - 1][ANNOT_KEY] for annotation in annotations: if annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY: if annotation[ANNOT_FIELD_KEY]: key = annotation[ANNOT_FIELD_KEY][1:-1] print(f' Page {page}: {key}, ' f'type {annotation[ANNOT_FORM_type]}, ' f'current value: {annotation[ANNOT_VAL_KEY]}')
def create_watermark(cls, doc_pdf: bytes) -> bytes: if cls._watermark is None: return doc_pdf trailer = pdfrw.PdfFileReader(fdata=doc_pdf) for page in trailer.pages: pdfrw.PageMerge(page).add( cls._watermark, prepend=cls._underneath, ).render() stream = io.BytesIO() pdfrw.PdfWriter(stream, trailer=trailer).write() result_pdf = stream.getvalue() stream.close() return result_pdf
def fill_pdf(): data_dict = transform_data() template = pdfrw.PdfFileReader(PDF_TEMPLATE_PATH) pages = template.pages for key in data_dict.keys(): data_row = transform_data_dict(data_dict[key]) for page in range(1, len(pages) + 1): if data_row.get(str(page)) is None: print(f'Skipping page {page} in file {key}...') continue page_data = data_row[str(page)] page_data = {k.split('.')[1]: v for k, v in page_data.items()} annotations = pages[page - 1][ANNOT_KEY] for annotation in annotations: if annotation[SUBTYPE_KEY] == WIDGET_SUBTYPE_KEY: if annotation[ANNOT_FIELD_KEY]: pdf_key = annotation[ANNOT_FIELD_KEY][1:-1] if pdf_key in page_data.keys(): # if annotation[ANNOT_FORM_type] == ANNOT_FORM_button: # print(f'Updating page {page}: {key},' # f'field {pdf_key},' # f' new value: {page_data[pdf_key]}') # annotation.update(pdfrw.PdfDict(V=pdfrw.PdfDict(page_data[pdf_key]), # AS=pdfrw.PdfName(page_data[pdf_key]) )) if annotation[ANNOT_FORM_type] == ANNOT_FORM_text: print(f'Updating file {key}:' f' page {page}:' f'field {pdf_key},' f' new value: {page_data[pdf_key]}') annotation.update( pdfrw.PdfDict(V=page_data[pdf_key], AP=page_data[pdf_key])) template.Root.AcroForm.update( pdfrw.PdfDict(NeedAppearances=pdfrw.PdfObject('true'))) output_pdf_path = f'{OUTPUT_FOLDER}{PATH_SEPARATOR}{key}.pdf' pdfrw.PdfWriter().write(output_pdf_path, template)
def __init__(self, fname, password): self.fname = fname self.password = password if password != "" else None self.compress = False self.objects = [] with open(fname, "rb") as fi: self.datau = fi.read() self.startdata = len(self.datau) self.annotbutton = False s = b"startxref" i = self.datau.rfind(s) assert i != -1 i += len(s) while self.datau[i] not in b"0123456789": i += 1 j = i while self.datau[j] in b"0123456789": j += 1 s = self.datau[i:j].decode() startprev = int(s, 10) self.startprev = startprev self.prev = pdf.PdfFileReader(fdata=self.datau, decrypt=(password is not None), password=password)
def get_page_dimensions(self): pdf = pdfrw.PdfFileReader(self.filename) self.num_pages = len(pdf.pages) self.page_width = float(pdf.pages[0].MediaBox[2]) self.page_height = float(pdf.pages[0].MediaBox[3])
def main(filename): try: pdf = pdfrw.PdfFileReader(filename) except Exception as e: print('Error: couldn\'t open statement.') print( 'Are you sure the file "{}" exists? Is it a PDF?'.format(filename)) print() print('Technical information:') traceback.print_exc() return try: xfa = pdf.Root.AcroForm.XFA.stream # if you want to explore the format yourself, I suggest printing # the contents of the xfa variable --- it's just a string containing # XML, some of which contains statement information and some of which # contains JavaScript describing how the data is to be displayed tree = ET.fromstring(xfa) datasets = tree.find( '{http://www.xfa.org/schema/xfa-data/1.0/}datasets') statement = datasets.find('*/TouchNet/BillingStmt') except Exception as e: print('Error: couldn\'t find statement data in PDF.') print('Are you sure this PDF is a statement from the new MITPAY?') print() print('Technical information:') traceback.print_exc() return student_id = statement.find('Student/StuID').text student_name = statement.find('Student/FullName').text statement_term = format_term(statement.find('TermCode').text) statement_date = statement.find('StmtDt').text statement_due = statement.find('DueDt').text amount_due = statement.find('AmtDue').text print_line() print_center('MIT BILLING STATEMENT') print_center('UNOFFICIAL. Contact SFS in case of discrepancies.') print_center( 'This report should hopefully include all personalized information') print_center('from the PDF except the student\'s address. No guarantees.') print_center( 'When paying by mailing a check, you MUST use the original PDF.') print_center('If amount due is negative, you are due a refund.') print_line() print_center('{} (ID: {})'.format(student_name, format_id(student_id))) print_pair('Statement generated on {}'.format(format_date(statement_date)), 'Due on {}'.format(format_date(statement_due))) print_pair('Term: {}'.format(statement_term), 'Amount due: {}'.format(format_amount(amount_due, False))) print_line() for line in statement.findall('LineItem'): kind = line.find('Type').text description = line.find('Desc').text amount = line.find('Amt').text # might be None date = None if line.find('TransDt') is not None: date = line.find('TransDt').text term = None if line.find('TermCode') is not None: term = line.find('TermCode').text formatted_date = '' if (date is not None) and (term is not None): formatted_date = ' ({}, {})'.format(format_term(term), format_date(date)) elif date is not None: formatted_date = ' ({})'.format(format_date(date)) elif term is not None: formatted_date = ' ({})'.format(format_term(term)) complete_description = '{desc}{date}'.format(desc=description, date=formatted_date) if amount is None: print_center(complete_description) else: # NB: sometimes Charges have negative amounts # e.g. "Initial Housing Average Fee Cr" (freshmen getting their # estimated housing fee back) is a charge for some reason. invert_sign = (kind == 'Charge') or (kind == 'FutureChgs') \ or kind.endswith('Chg') print_pair(complete_description, format_amount(amount, True, invert_sign)) print_line() print_pair('Amount due', format_amount(amount_due, False))