def diff_pdf_pages(pdf1_path, pdf2_path): pdf2_fp = PdfFileReader(io.BytesIO(pdf2_path)) pdf2_len = pdf2_fp.getNumPages() if not pdf1_path: return list(range(0, pdf2_len)) pdf1_fp = PdfFileReader(io.BytesIO(pdf1_path)) pdf1_len = pdf1_fp.getNumPages() list_differents = list() for i in range(pdf1_len): if i >= pdf2_len: list_differents.append(i) continue output1 = PdfFileWriter() output2 = PdfFileWriter() output1.addPage(pdf1_fp.getPage(i)) output2.addPage(pdf2_fp.getPage(i)) fp1 = io.BytesIO() fp2 = io.BytesIO() output1.write(fp1) output2.write(fp2) fp1.seek(0) fp2.seek(0) if fp1.read() != fp2.read(): list_differents.append(i) return list_differents
def write_pdf(self, output): # get plain pdf from rml template = select_template([ 'leprikon/{}/{}.rml'.format(self.pdf_export, self.subject.subject_type.slug), 'leprikon/{}/{}.rml'.format(self.pdf_export, self.subject.subject_type.subject_type), 'leprikon/{}/subject.rml'.format(self.pdf_export), ]) rml_content = template.render({ 'object': self, 'site': LeprikonSite.objects.get_current(), }) pdf_content = trml2pdf.parseString(rml_content.encode('utf-8')) # merge with background if self.print_setup.background: template_pdf = PdfFileReader(self.print_setup.background.file) registration_pdf = PdfFileReader(BytesIO(pdf_content)) writer = PdfFileWriter() # merge pages from both template and registration for i in range(registration_pdf.getNumPages()): if i < template_pdf.getNumPages(): page = template_pdf.getPage(i) page.mergePage(registration_pdf.getPage(i)) else: page = registration_pdf.getPage(i) writer.addPage(page) # write result to output writer.write(output) else: # write basic pdf registration to response output.write(pdf_content) return output
def page_extract(start, end, SUBSECTION): PDF_IN = PdfFileReader(open(PDF_DIR, 'rb')) # for i in xrange(PDF_IN.numPages): # for all pages for i in range(int(start) - 1, int(end)): output = PdfFileWriter() output.addPage(PDF_IN.getPage(i)) base, name_ext = os.path.split(PDF_DIR) name, ext = os.path.splitext(name_ext) PDF_OUT = '{}{}'.format(TMP_DIR, '{}-{}{}'.format(name, str(i).zfill(6), ext)) with open(PDF_OUT, 'wb') as outputStream: output.write(outputStream) gs_pdf_to_png(PDF_OUT) os.remove(PDF_OUT) png_list = group(os.listdir(TMP_DIR), 2) for tup in png_list: print tup card_front = os.path.join(TMP_DIR, tup[0]) card_back = os.path.join(TMP_DIR, tup[1]) make_cards(card_front, card_back, SUBSECTION)
def buildPDF(self, data, document_root): data = json.loads(data)[0]['fields'] content = StringIO.StringIO() parser = canvas.Canvas(content, pagesize=letter) self.employee_name(parser, data['name']) self.social_security(parser, data['ssn']) self.title(parser, data['title']) self.base_salary(parser, data['base_salary']) self.period(parser, data['period']) self.period_year(parser, data['period_year']) self.effective_date(parser, data['effective_date']) self.multi_campus(parser, data['multi_campus']) self.sponsored_accounts(parser, data['sponsored_accounts']) self.cost_sharing(parser, data['cost_sharing']) self.university_funds(parser, data['university_funds']) self.payments_paid(parser, data['payments_paid']) self.comments(parser, data['comments']) parser.save() content.seek(0) text = PdfFileReader(content) form = PdfFileReader(document_root+'/a125.pdf').getPage(0) output = PdfFileWriter() form.mergePage(text.getPage(0)) output.addPage(form) outputStream = open(document_root+'/a125-gen.pdf', 'wb') output.write(outputStream) self.form = output
def combine_for_print(folder_title): drive = get_drive() filenames = [] # Download all pdf files from GDrive. for i, fil in enumerate(get_pdf_files(drive, folder_title), 1): print(fil['title']) filename = '__temp-{}.pdf'.format(i) fil.GetContentFile(filename) filenames.append(filename) if not len(filenames): print('No pdf files were downloaded') return # Compute output name by using date and number of files. output_filename = '{:%Y-%m-%d %H%M} ({}).pdf'.format( datetime.datetime.now(), len(filenames)) print('Combining files into {}'.format(output_filename)) writer = PdfFileWriter() for i, filename in enumerate(filenames): reader = PdfFileReader(open(filename, 'rb'), strict=False) if (i % 2) == 0: # if even page page = reader.getPage(0) writer.addPage(page) else: page.mergeTranslatedPage(reader.getPage(0), 0, -5.3*inch) with open(output_filename, 'wb') as fp: writer.write(fp) # Delete temp pdf files. subprocess.call('rm __temp-*.pdf', shell=True)
def tearpage(filename, startpage=1): """ Copy filename to a tempfile, write pages startpage..N to filename. :param filename: PDF filepath :param startpage: page number for the new first page """ # Copy the pdf to a tmp file tmp = tempfile.NamedTemporaryFile() shutil.copy(filename, tmp.name) # Read the copied pdf try: input_file = PdfFileReader(open(tmp.name, 'rb')) except PdfReadError: _fixPdf(filename, tmp.name) input_file = PdfFileReader(open(tmp.name, 'rb')) # Seek for the number of pages num_pages = input_file.getNumPages() # Write pages excepted the first one output_file = PdfFileWriter() for i in range(startpage, num_pages): output_file.addPage(input_file.getPage(i)) tmp.close() outputStream = open(filename, "wb") output_file.write(outputStream)
def print_by_server(doctype, name, print_format=None, doc=None, no_letterhead=0): print_settings = frappe.get_doc("Print Settings") try: import cups except ImportError: frappe.throw("You need to install pycups to use this feature!") return try: cups.setServer(print_settings.server_ip) cups.setPort(print_settings.port) conn = cups.Connection() output = PdfFileWriter() output = frappe.get_print(doctype, name, print_format, doc=doc, no_letterhead=no_letterhead, as_pdf = True, output = output) file = os.path.join("/", "tmp", "frappe-pdf-{0}.pdf".format(frappe.generate_hash())) output.write(open(file,"wb")) conn.printFile(print_settings.printer_name,file , name, {}) except IOError as e: if ("ContentNotFoundError" in e.message or "ContentOperationNotPermittedError" in e.message or "UnknownContentError" in e.message or "RemoteHostClosedError" in e.message): frappe.throw(_("PDF generation failed")) except cups.IPPError: frappe.throw(_("Printing failed")) finally: cleanup(file,{})
def handle(self, *args, **options): for cert_type, ss_class_children in settings.CERT_CHILDREN.iteritems(): self.stdout.write('Certificate Type: {}\n'.format(cert_type)) for ss_class, children in ss_class_children.iteritems(): self.stdout.write('SS Class: {}\n'.format(ss_class)) for child in children: self.stdout.write('Child: {}\n'.format(child)) paf_path = os.path.join(settings.CERT_TEMPLATE_PATH, settings.CERT_FILE[cert_type]) pdf = PdfFileReader(paf_path) page = pdf.getPage(0) s = StringIO.StringIO() c = canvas.Canvas(s, pagesize=letter) # Child font_name = settings.CERT_COORD[cert_type]['child']['font']['name'] font_size = settings.CERT_COORD[cert_type]['child']['font']['size'] x = settings.CERT_COORD[cert_type]['child']['x'] y = settings.CERT_COORD[cert_type]['child']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, child) # Event font_name = settings.CERT_COORD[cert_type]['event']['font']['name'] font_size = settings.CERT_COORD[cert_type]['event']['font']['size'] x = settings.CERT_COORD[cert_type]['event']['x'] y = settings.CERT_COORD[cert_type]['event']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, 'Sunday School Summer Festival {}'.format(datetime.now().strftime('%Y'))) # Date font_name = settings.CERT_COORD[cert_type]['date']['font']['name'] font_size = settings.CERT_COORD[cert_type]['date']['font']['size'] x = settings.CERT_COORD[cert_type]['date']['x'] y = settings.CERT_COORD[cert_type]['date']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, '{}'.format(datetime.now().strftime('%B %Y'))) # Church font_name = settings.CERT_COORD[cert_type]['church']['font']['name'] font_size = settings.CERT_COORD[cert_type]['church']['font']['size'] x = settings.CERT_COORD[cert_type]['church']['x'] y = settings.CERT_COORD[cert_type]['church']['y'] c.setFont(font_name, font_size) c.drawCentredString(x, y, 'St. Mark Coptic Orthodox Church') c.save() pdf_with_custom_text = PdfFileReader(s) page.mergePage(pdf_with_custom_text.getPage(0)) writer = PdfFileWriter() writer.addPage(page) output_file = '{}_{}.pdf'.format(child, datetime.now().strftime('%Y')) output_dir = os.path.join(settings.CERT_PATH, ss_class) if not os.path.exists(output_dir): os.makedirs(output_dir) output_path = os.path.join(output_dir, output_file) with open(output_path, 'wb') as f: writer.write(f)
def _merge_pdf(documents): '''Merge PDF files into one. :param documents: list of path of pdf files :returns: path of the merged pdf ''' writer = PdfFileWriter() streams = [] # We have to close the streams *after* PdfFilWriter's call to write() try: for document in documents: pdfreport = open(document, 'rb') streams.append(pdfreport) reader = PdfFileReader(pdfreport, overwriteWarnings=False) for page in range(0, reader.getNumPages()): writer.addPage(reader.getPage(page)) merged_file_fd, merged_file_path = tempfile.mkstemp(suffix='.html', prefix='report.merged.tmp.') with closing(os.fdopen(merged_file_fd, 'w')) as merged_file: writer.write(merged_file) finally: for stream in streams: try: stream.close() except Exception: pass for stream in streams: stream.close() return merged_file_path
def generate_cards_with_no_background(pdf): merged_cards = PdfFileWriter() page = pdf.getPage(0) merged_cards.addPage(page) return merged_cards
def add_update_pdf_metadata(filename, update_dictionary): # This seems to be the only way to modify the existing PDF metadata. # # pylint: disable=protected-access, no-member def add_prefix(value): return '/' + value full_update_dictionary = {add_prefix(k): v for k, v in update_dictionary.items()} with open(filename, 'rb') as input_file: pdf_input = PdfFileReader(input_file) pdf_output = PdfFileWriter() for page in range(pdf_input.getNumPages()): pdf_output.addPage(pdf_input.getPage(page)) info_dict = pdf_output._info.getObject() info = pdf_input.documentInfo full_update_dictionary = dict(chain(info.items(), full_update_dictionary.items())) for key in full_update_dictionary: assert full_update_dictionary[key] is not None info_dict.update({NameObject(key): createStringObject(full_update_dictionary[key])}) _, temp_file_name = tempfile.mkstemp(prefix="email2pdf_add_update_pdf_metadata", suffix=".pdf") with open(temp_file_name, 'wb') as file_out: pdf_output.write(file_out) shutil.move(temp_file_name, filename)
def unit_pjspdf(request, slug, unit_slug): output = PdfFileWriter() unit = get_object_or_404(Unit, curriculum__slug=slug, slug=unit_slug) data = { "url": get_url_for_pdf(request, unit.get_absolute_url(), True), "renderType": "pdf" } url = 'http://PhantomJScloud.com/api/browser/v2/%s/' % settings.PHANTOMJS_KEY headers = {'content-type': 'application/json'} req = Request(url, json.dumps(data), headers) response = urlopen(req) results = response.read() print '\nresponse status code' print response.code print '\nresponse headers (pay attention to pjsc-* headers)' print response.headers memoryPDF = StringIO(results) localPDF = PdfFileReader(memoryPDF) output.appendPagesFromReader(localPDF) pdfresponse = HttpResponse(content_type='application/pdf') output.write(pdfresponse) pdfresponse['Content-Disposition'] = 'inline;filename=unit%s.pdf' % unit.number slack_message('slack/message.slack', { 'message': 'created a PDF from %s %s' % (slug, unit_slug), 'user': request.user, }) return pdfresponse
def imp_exp_pdf(inputfile, outputfile, size, margin, padding): "For Import and Export PDF files by resizing" output = PdfFileWriter() input = PdfFileReader(file(inputfile, 'rb'), strict=False) totalPages = input.getNumPages() p = [] for i in range(0, input.getNumPages()): p.append(input.getPage(i)) if len(p) == 10: output_one_page(p, size, margin, padding, output) p = [] echoer = "Printed {} of {} [{:.2f}%]".format( i + 1, totalPages, (i + 1) / float(totalPages) * 100) print echoer if len(p) > 0: tmppdf = PdfFileReader(file('BlankA4.pdf', 'rb'), strict=False) tmppage = tmppdf.getPage(0) (w, h) = tmppage.mediaBox.upperRight output_one_page(p, size, margin, padding, output) p = [] print print 'Completed converting.' print 'Saving...' outputStream = file(outputfile, "wb") output.write(outputStream) outputStream.close() print 'END OF PROGRAM'
def make_tile(page_number,n_tiles,row,column): path = "/pieces/diotima_quartet/arco_quartet.pdf" output_path = "/pieces/diotima_quartet/hoban_tiles/hoban%s_%s@%s.pdf" % (page_number,row,column) source = PdfFileReader(open(path, "rb")) page = source.getPage(page_number) width = float(page.mediaBox.getWidth()) height = float(page.mediaBox.getHeight()) tile_column = column tile_row = row tile_size = n_tiles tile_width = width/tile_size tile_height = height/tile_size column = tile_width*tile_column row = tile_height*tile_row page.cropBox.lowerLeft = (column,row) page.cropBox.upperRight = (column+tile_width,row+tile_height) page.trimBox.lowerLeft = (column,row) page.trimBox.upperRight = (column+tile_width,row+tile_height) page.mediaBox.lowerLeft = (column,row) page.mediaBox.upperRight = (column+tile_width,row+tile_height) # output output = PdfFileWriter() output.addPage(page) outputStream = file(output_path, "wb") output.write(outputStream) return None
def write(self, output): """ """ pdf_file_writer = PdfFileWriter() for page in self.pages: log.debug('Adding page to file for writing') pdf_file_writer.addPage(page.page) pdf_file_writer.write(output)
def send_email(request, arr): render = render_to_string(constant.email_popup_page) dajax = Dajax() html = render.replace('\n', "") dajax.script(constant.append_flight_email_popup %html) emails = UserTemp.objects.filter(employee_number=request.session[constant.usernameParam]) html = '' val = '' j = 0 for i in arr: html = html + "<div class=\"attack_file\"><input class=\"attack_file\" readonly=\"true\" name=\"attack_file_" + i + "\" value=\"flight_log_" + i + ".pdf\" type=\"text\" /><div class=\"close-attack\">x</div></div>" if(j == 0): val = val + i j = 1 else: val = val + ',' + i pdf_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + '/../helicopters/static/media/pdf_export/flight_log_' + i + '.pdf' from PyPDF2 import PdfFileWriter op = PdfFileWriter() op.addBlankPage(793, 1122) ops = file(pdf_path, "wb") op.write(ops) ops.close() html = html + '<input class="hiden_attack" name="hiden_attack" val="'+ val + '" type="input" />' dajax.script("var e_html = '" + html +"';\ jQuery('.attack_files').html(e_html);") for email in emails: dajax.script("jQuery('.email_from').val('" + email.email + "');") return dajax.json()
def getPLBURL(journal,doi,count): cj = http.cookiejar.CookieJar() # initialize the cookie jar opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj)) url = 'http://dx.doi.org/'+doi user_agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)' headers = [('User-Agent', user_agent)] opener.addheaders = headers #with opener.open(url) as response: response= opener.open(url) output = response.read() import re p = re.compile('pdfurl="(.*?)"') m = p.search(output.strip().decode('utf-8')) # need to convert from bytes to string m.group(1) response=opener.open(m.group(1)) out = response.read() type(out) f = io.BytesIO(out) if f: o = PdfFileReader(f) merged = PdfFileWriter() outName= "Single_"+str(count)+".pdf" merged.addPage(o.getPage(0)) with open(outName,'wb') as pdf: merged.write(pdf)
def post(self): json_data = json.loads(self.request.body.decode('utf-8')) order = json_data.get('order', None) pdfs = json_data.get('pdfs_list', None) writer = PdfFileWriter() loaded_pdfs = {} for pdf in pdfs: reader = PdfFileReader(pdf) loaded_pdfs[pdf] = reader for i, page in sorted(order.items()): # we need a shallow copy here else we get a ref to # the same object if there is a copy of the page source = copy(loaded_pdfs.get(page.get('pdf'))) page_number = page.get('id') - 1 rotation = page.get('rotation', 0) pdf_page = source.getPage(page_number) method = ('rotateClockwise' if rotation > 0 else 'rotateCounterClockwise') getattr(pdf_page, method)(abs(rotation)) writer.addPage(pdf_page) _file = BytesIO() writer.write(_file) _file.seek(0) self.set_header('Content-Type', 'application/pdf') self.set_header('Content-Disposition', 'attachment; filename=pdf.pdf') self.write(b64encode(_file.read()))
def generate_document(self, data): packet = StringIO() if self.template_file is not None: template = PdfFileReader(open(self.template_file, 'rb')) c = canvas.Canvas(packet, pagesize=(self.width, self.height)) i = 0 for field_cls in self.fields: # TODO: Catch exception if there is less columns than fields field = field_cls(self, c, data[i]) field.render() i += 1 # Save canvas c.save() packet.seek(0) text = PdfFileReader(packet) output = PdfFileWriter() if self.template_file is not None: # Merge text with base page = template.getPage(0) page.mergePage(text.getPage(0)) else: page = text.getPage(0) output.addPage(page) # Save file filename = "%s/%s.pdf" % (self.output_dir, self.generate_filename(data)) outputStream = open(filename, 'wb') output.write(outputStream) outputStream.close()
def merge_pdf(destination=None, pdf_files=None): try: output = PdfFileWriter() inputs = [] for pdf_file in pdf_files: reader_pdf_file = PdfFileReader(open(pdf_file, 'rb')) inputs.append(reader_pdf_file) for input_pdf in inputs: for page in input_pdf.pages: output.addPage(page) output_stream = open(destination, 'wb') output.write(output_stream) output_stream.close # merger = PdfFileMerger() # for pdf_file in pdf_files: # merger.append(open(pdf_file, 'rb')) # merger.write(open(destination), 'wb') QMessageBox.information(main, 'Success!', 'PDFs have been merged to ' + destination ) except: QMessageBox.critical(main, 'Error!', 'Critical error occured.\n\n%s' % traceback.format_exc())
def _generate(self, pdf, width=210, height=297, img=".gif"): """ generate image from given pdf """ tmp_inp = tempfile.mktemp(suffix=".pdf") tmp_img = tempfile.mktemp(suffix=img) cover = PdfFileReader(pdf).getPage(0) out = PdfFileWriter() out.addPage(cover) with open(tmp_inp, "wb") as sock: out.write(sock) # Run image magick convert # add -flatten option to fix #28943. It's prevent transparence cover # image cmd = "convert -flatten %s -resize %sx%s %s" % (tmp_inp, width, height, tmp_img) process = Popen(cmd, shell=True, stdin=PIPE, stdout=PIPE, stderr=STDOUT, close_fds=True) res = process.stdout.read() if res: logger.debug(res) data = open(tmp_img, "rb").read() self._finish(tmp_inp, tmp_img) if not data: return None return data
def split_pdf(input_pdf, list_pages, end_page = None): res = [] if not end_page: end_page = inputpdf.numPages for i in range(len(list_pages)): output_i = PdfFileWriter() if i == len(list_pages)-1: end = end_page -1 else: end = list_pages[i+1] -1 for j in range(list_pages[i], end): output_i.addPage(inputpdf.getPage(j)) #with open("document-page%i.pdf" % i, "wb") as outputStream: # output_i.write(outputStream) sio = BytesIO() output_i.write(sio) res.append(sio) return res
def scale_to_size(reader, unit_size): # Create a writer and populate with pages at unit_size. # Scale and merge pages from reader, centered, onto these pages. # return open BytesIO object written to by writer # For now, just write to aux.pdf # determine scaling factor: input_unit_size = reader.pages[0].mediaBox.upperRight factors = [a / b for a, b in zip(unit_size, input_unit_size)] if factors[0] <= factors[1]: # Use x-factor for scaling if this is lowest, or equal to, y-factor factor = factors[0] else: # Otherwise, use the y-factor factor = factors[1] writer = PdfFileWriter() for in_page in reader.pages: out_page = add_blank_page(writer, unit_size) out_page.mergeScaledPage(in_page, factor, expand=True) with open("aux.pdf", "wb") as f: writer.write(f)
def tearpage(filename, startpage=0, lastpage=0): """ Copy filename to a tempfile, write pages startpage..N to filename. :param filename: PDF filepath :param startpage: number of pages to delete from the cover :param lastpage: number of pages to delete from the bacl """ # Copy the pdf to a tmp file with tempfile.NamedTemporaryFile() as tmp: shutil.copy(filename, tmp.name) # Read the copied pdf try: input_file = PdfFileReader(open(tmp.name, 'rb')) except PdfReadError: fixPdf(filename, tmp.name) input_file = PdfFileReader(open(tmp.name, 'rb')) # Seek for the number of pages num_pages = input_file.getNumPages() if startpage >= num_pages - lastpage: raise ValueError('Incorrect number of pages') # Write pages excepted the first one output_file = PdfFileWriter() for i in range(startpage, num_pages-lastpage): output_file.addPage(input_file.getPage(i)) with open(filename, "wb") as outputStream: output_file.write(outputStream)
def concat_pdf(in_files, out_file='concatenated.pdf'): """ Concatenate PDF list (http://stackoverflow.com/a/3444735) """ # GhostScript produces much smaller PDFs - we should use it if we can if subprocess.check_call("gs -v", shell=True): cmd = "gs -sDEVICE=pdfwrite -dCompatibilityLevel=1.4 -dPDFSETTINGS=/prepress -dNOPAUSE -dQUIET -dBATCH -dDetectDuplicateImages -dCompressFonts=true -dAutoFilterColorImages=false " \ "-dAutoFilterGrayImages=false " \ "-dColorImageFilter=/FlateEncode " \ "-dGrayImageFilter=/FlateEncode " \ "-dColorConversionStrategy=/LeaveColorUnchanged " \ "-dDownsampleMonoImages=false " \ "-dDownsampleGrayImages=false " \ "-dDownsampleColorImages=false " \ "-sOutputFile={out_file} {in_files}" subprocess.check_call(cmd.format(**{"out_file": out_file, "in_files": " ".join(in_files)}), shell=True) else: from PyPDF2 import PdfFileWriter, PdfFileReader with open(out_file, 'wb') as out_pdffile: outpdf = PdfFileWriter() for in_file in in_files: with open(in_file, 'rb') as in_pdffile: inpdf = PdfFileReader(in_pdffile) for fpdf in range(inpdf.numPages): outpdf.addPage(inpdf.getPage(fpdf)) outpdf.write(out_pdffile) return out_file
def pdf_merge(inputs: [str], output: str, delete: bool=False): """ Merge multiple Pdf input files in one output file. :param inputs: input files :param output: output file :param delete: delete input files after completion if true """ writer = PdfFileWriter() if os.path.isfile(output): ans = input("The file '%s' already exists. " "Overwrite? Yes/Abort [Y/a]: " % output).lower() if ans == "a": return outputfile = open(output, "wb") try: infiles = [] for filename in inputs: f = open(filename, 'rb') reader = PdfFileReader(f) for page in reader.pages: writer.addPage(page) infiles.append(f) writer.write(outputfile) except FileNotFoundError as e: print(e.strerror + ": " + e.filename) finally: outputfile.close() for f in infiles: f.close() if delete: for filename in inputs: os.remove(filename)
def print_pdf(request): i = request.GET["i"] arr = i.split(',') for nu in arr: pdf = render_to_pdf_response("flights/pdf.html", '', 'flight_log_' + nu + '.pdf') # save pdf file in server pdf_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + '/../helicopters/static/media/pdf_export/flight_log_' + nu + '.pdf' op = PdfFileWriter() op.addBlankPage(793, 1122) ops = file(pdf_path, "wb") op.write(ops) ops.close() # zip all file to one buffer = StringIO.StringIO() zf = zipfile.ZipFile(buffer, mode='w') for nu in arr: try: zf.write(os.path.abspath(os.path.dirname(os.path.dirname(__file__))) + '/../helicopters/static/media/pdf_export/flight_log_' + nu + '.pdf', 'flight_log_' + nu + '.pdf') finally: pass zf.close() buffer.seek(0) response = HttpResponse(buffer.read()) if(i.find(',') == -1): return pdf else: name = i.replace(',','') response['Content-Disposition'] = 'attachment; filename=flight_log_' + name + '.zip' response['Content-Type'] = 'application/x-zip' return response
def split_pages(input_file, page_range, out_file): """ Take a pdf file and copy a range of pages into a new pdf file Args: input_file: The source PDF file page_range: A string containing a range of pages to copy: 1-3,4 out_file: File name for the destination PDF """ output = PdfFileWriter() input_pdf = PdfFileReader(open(input_file, "rb")) output_file = open(out_file, "wb") # https://stackoverflow.com/questions/5704931/parse-string-of-integer-sets-with-intervals-to-list page_ranges = (x.split("-") for x in page_range.split(",")) range_list = [i for r in page_ranges for i in range(int(r[0]), int(r[-1]) + 1)] for p in range_list: # Need to subtract 1 because pages are 0 indexed try: output.addPage(input_pdf.getPage(p - 1)) except IndexError: # Alert the user and stop adding pages app.infoBox("Info", "Range exceeded number of pages in input.\nFile will still be saved.") break output.write(output_file) if(app.questionBox("File Save", "Output PDF saved. Do you want to quit?")): app.stop()
def pdf_copy(input: str, output: str, pages: [int], yes_to_all=False): """ Copy pages from the input file in a new output file. :param input: name of the input pdf file :param output: name of the output pdf file :param pages: list containing the page numbers to copy in the new file """ if not os.path.isfile(input): print("Error. The file '%s' does not exist." % input) return if os.path.isfile(output) and not yes_to_all and not overwrite_dlg(output): return with open(input, "rb") as inputfile: reader = PdfFileReader(inputfile) outputfile = open(output, "wb") writer = PdfFileWriter() if pages is None: pages = range(len(reader.pages)) else: pages = parse_rangearg(pages, len(reader.pages)) for pagenr in sorted(pages): page = reader.getPage(pagenr) writer.addPage(page) writer.write(outputfile) outputfile.close()
def convert_page(path, note_name, notebook_path, directory, pdf_file, page_number): page = papyrus_pb2.Page() # Open and parse papyrus page using protobuf page.ParseFromString(open(path, 'rb').read()) # Create a new pdf surface for drawing if page.background.width == 0 and page.background.height == 0: print("\tInfinite page!") max_x = 0 max_y = 0 for item in page.layer.item: bounds = None if item.type == papyrus_pb2.Item.Type.Value('Stroke'): bounds = item.stroke.bounds elif item.type == papyrus_pb2.Item.Type.Value('Shape'): if item.shape.type == 'Ellipse': bounds = item.shape.ellipse.bounds elif item.type == papyrus_pb2.Item.Type.Value('Text'): bounds = item.text.bounds else: print(item) if bounds is not None: if bounds.right > max_x: max_x = bounds.right if bounds.bottom > max_y: max_y = bounds.bottom page.background.width = max_x + 1 page.background.height = max_y + 1 note_name = titlesafe(note_name) print("\t%s" % note_name) note_path = directory + '/' + notebook_path + '/' + dirsafe(note_name) new_note_path = note_path num = 1 #while os.path.exists(new_note_path): # new_note_path = note_path + '(' + str(num) + ')' # num += 1 makedir(note_path) note_path = new_note_path pdfpath = note_path + '/pdf' makedir(pdfpath) pdffile = pdfpath + '/page' + str(page_number) + '.pdf' print("\tSource: %s\n\tOutput: %s" % (path, pdffile)) pdf_out = open(pdffile, 'w') surface = cairocffi.PDFSurface(pdf_out, cm_to_point(page.background.width), cm_to_point(page.background.height)) context = cairocffi.Context(surface) # Paint the page white context.set_source_rgba(0, 0, 0, 0) context.paint() for item in page.layer.item: if item.type == papyrus_pb2.Item.Type.Value('Stroke'): context.save() # Translate to reference_point (stroke origin) context.translate(cm_to_point(item.stroke.reference_point.x), cm_to_point(item.stroke.reference_point.y)) # Set source color argb = u32_to_4f(item.stroke.color) context.set_source_rgba(argb[1], argb[2], argb[3], argb[0]) # Set line width width = cm_to_point(item.stroke.weight) # Other parameter context.set_line_join(cairocffi.LINE_JOIN_ROUND) context.set_line_cap(cairocffi.LINE_CAP_ROUND) context.move_to(0, 0) if item.stroke.stroke_type == papyrus_pb2.Stroke.Highlight: context.push_group() context.set_source_rgba(argb[1], argb[2], argb[3], 1) context.fill_preserve() context.set_line_cap(cairocffi.LINE_CAP_SQUARE) for point in item.stroke.point: context.line_to(cm_to_point(point.x), cm_to_point(point.y)) if item.stroke.stroke_type == papyrus_pb2.Stroke.Highlight: context.set_line_width(width) #context. elif point.HasField('pressure'): context.set_line_width(width * point.pressure) else: context.set_line_width(width) context.stroke() context.move_to(cm_to_point(point.x), cm_to_point(point.y)) if item.stroke.stroke_type == papyrus_pb2.Stroke.Highlight: context.pop_group_to_source() context.paint_with_alpha(argb[0]) context.restore() elif item.type == papyrus_pb2.Item.Type.Value( 'Shape') and item.shape.ellipse is not None: width = item.shape.ellipse.weight * 0.3 context.save() context.new_sub_path() context.translate(cm_to_point(item.shape.ellipse.center_x), cm_to_point(item.shape.ellipse.center_y)) context.set_line_width(item.shape.ellipse.weight) argb = u32_to_4f(item.shape.ellipse.color) context.set_line_width(width) context.set_source_rgba(argb[1], argb[2], argb[3], argb[0]) context.scale(cm_to_point(item.shape.ellipse.radius_x), cm_to_point(item.shape.ellipse.radius_y)) context.arc(0, 0, 1, (item.shape.ellipse.start_angle / 360) * 2 * math.pi, (item.shape.ellipse.sweep_angle / 360) * 2 * math.pi) context.close_path() context.stroke() context.restore() elif item.type == papyrus_pb2.Item.Type.Value('Text'): context.save() context.set_font_size(item.text.weight) # Color argb = u32_to_4f(item.text.color) context.set_source_rgba(argb[1], argb[2], argb[3], argb[0]) context.move_to(cm_to_point(item.text.bounds.left), cm_to_point(item.text.bounds.top)) tw = int(item.text.weight) size_m = cairocffi.Matrix(tw, 0, 0, tw, 0, 0) scaledFont = cairocffi.ScaledFont( cairocffi.ToyFontFace("sans-serif"), size_m) glyphs = scaledFont.text_to_glyphs( cm_to_point(item.text.bounds.left), cm_to_point(item.text.bounds.bottom), item.text.text, False) context.show_glyphs(glyphs) context.restore() elif item.type == papyrus_pb2.Item.Type.Value('Image'): if (DEBUG): print("Got an image!") print(item.image.image_hash) # Convert JPEG image to PNG im = Image.open(base_directory + "data/imgs/" + item.image.image_hash) im = im.crop( (item.image.crop_bounds.left, item.image.crop_bounds.top, item.image.crop_bounds.right, item.image.crop_bounds.bottom)) im.save( base_directory + "data/imgs/" + item.image.image_hash + ".png", "PNG") im.close() matrix = cairocffi.Matrix() scale_x = cm_to_point(item.image.bounds.right - item.image.bounds.left) / ( item.image.crop_bounds.right - item.image.crop_bounds.left) scale_y = cm_to_point(item.image.bounds.bottom - item.image.bounds.top) / ( item.image.crop_bounds.bottom - item.image.crop_bounds.top) if (DEBUG): print("Scale X: %d" % (1 / scale_x)) print("Scale Y: %d" % (1 / scale_y)) print("Translate: %d" % cm_to_point(item.image.bounds.left)) matrix.scale(1 / scale_x, 1 / scale_y) matrix.translate(-cm_to_point(item.image.bounds.left), -cm_to_point(item.image.bounds.top)) im_surface = cairocffi.ImageSurface.create_from_png( base_directory + "./data/imgs/" + item.image.image_hash + ".png") im_surface_pattern = cairocffi.SurfacePattern(im_surface) im_surface_pattern.set_filter(cairocffi.FILTER_GOOD) im_surface_pattern.set_matrix(matrix) context.save() context.set_source(im_surface_pattern) context.rectangle( cm_to_point(item.image.bounds.left), cm_to_point(item.image.bounds.top), cm_to_point(item.image.bounds.right - item.image.bounds.left), cm_to_point(item.image.bounds.bottom - item.image.bounds.top)) context.fill() context.restore() else: print(item) print("Item of type {} not supported".format( papyrus_pb2.Item.Type.Name(item.type))) surface.flush() surface.finish() pdf_out.close() if page.background.HasField("pdf_background"): try: output_file = PdfFileWriter() input_file = PdfFileReader(file(pdffile, "rb")) pdf_file = PdfFileReader( file(base_directory + "data/docs/" + pdf_file, "rb")) pdf_page = pdf_file.getPage( page.background.pdf_background.page_number) input_page = input_file.getPage(0) pdf_page.mergePage(input_page) output_file.addPage(pdf_page) with open(pdffile + ".tmp", "wb") as outputStream: output_file.write(outputStream) os.rename(pdffile + ".tmp", pdffile) except: print( "\t%sUnable to merge PDFs - maybe the PDF was malformed? Result was %s%s" % (color.RED, sys.exc_info()[0], color.END)) print("") return pdffile
def pdf_insert(dest: str, source: str, pages: [str] = None, index: int = None, output: str = None): """ Insert pages from one file into another. :param dest: Destination file :param source: Source file :param pages: list of page numbers to insert :param index: index in destination file where to insert the pages :param output: output file """ if output is not None and os.path.isfile(output): ans = input("The file '%s' already exists. " "Overwrite? Yes/Abort [Y/a]: " % output).lower() if ans not in ['y', '']: return writer = PdfFileWriter() # read pages from file1 destfile = open(dest, 'rb') destreader = PdfFileReader(destfile) for page in destreader.pages: writer.addPage(page) # read pages from file2 srcfile = open(source, 'rb') srcreader = PdfFileReader(srcfile) # if no page numbers are given insert all pages index = limit(index - 1, 0, len(destreader.pages)) if pages is None: for i, page in enumerate(srcreader.pages): if index is None: writer.addPage(page) else: writer.insertPage(page, index + i) else: pages = parse_rangearg(pages, len(srcreader.pages)) for i, pagenr in enumerate(pages): page = srcreader.getPage(pagenr) if index is None: writer.addPage(page) else: writer.insertPage(page, index + i) if output is None: # Write into Temporary File first and then overwrite dest file ans = input("Overwrite the file '%s'? Yes/Abort [Y/a]: " % dest).lower() if ans in ['y', '']: tempfile = NamedTemporaryFile(delete=False) writer.write(tempfile) tempfile.close() move(tempfile.name, dest) else: with open(output, "wb") as outfile: writer.write(outfile) destfile.close() srcfile.close()
AUTHOR: Alex Leontiev ([email protected]) ORGANIZATION: VERSION: --- CREATED: 2022-03-11T00:29:18.044931 REVISION: --- ===============================================================================""" import sys from PyPDF2 import PdfFileReader, PdfFileWriter import tqdm # code below adapted from https://realpython.com/pdf-python/ _, pdf_fn, names_fn, out_path = sys.argv with open(names_fn) as f: names = f.readlines() names = [name.strip() for name in names] pdf = PdfFileReader(pdf_fn) assert pdf.getNumPages() == len( names ), f"number of pages in pdf ({pdf.getNumPages()}) and number of names ({len(names)}) are different" for page, name in tqdm.tqdm(list(zip(range(pdf.getNumPages()), names))): pdf_writer = PdfFileWriter() pdf_writer.addPage(pdf.getPage(page)) output = f'{out_path}/{name}.pdf' #print(f"creating {output}") with open(output, 'wb') as output_pdf: pdf_writer.write(output_pdf)
def extract_pages(input_files, pages, output_file, one_file=True): """Extract pages from a single or multiple .pdf files and combine them. Parameters ---------- input_files : list a list of input file names pages : str a list of pages in a string form with no blank spaces, e.g. '1,3-5' output_file : str an output file location. If the ``one_file=False``, the corresponding suffix consisting of the page numbers will be added one_file : bool, optional a flag to save the outputs into one .pdf file (default: True) """ pdfs = {} for f, p in zip(input_files, pages): pdfs[f] = parse_ranges(p) # NOTE: This reversed order sorting may have adverse effect on the resulted # file. Need to rely on user's input to properly order the files. # pdfs = OrderedDict(sorted(pdfs.items(), reverse=True)) logger.info(pdfs) output_files = [] if one_file: output = PdfFileWriter() for pdf_name, pdf_pages in tqdm(pdfs.items()): print(pdf_name, pdf_pages) full_path = Path(pdf_name) inputpdf = PdfFileReader(open(full_path, 'rb')) msg = 'specified pages range {} is out of range ({})' num_pages = inputpdf.numPages for page in pdf_pages: assert page <= num_pages, msg.format(pdf_name, num_pages) n, e = os.path.splitext(os.path.basename(full_path)) out_dir = os.path.dirname(os.path.abspath(output_file)) pages_range = '-'.join([str(x) for x in pdf_pages]) out_name = os.path.join(out_dir, '{}_{}{}'.format(n, pages_range, e)) tqdm.write(' Input file: {} (pages: {} out of total {})'.format( full_path, pages_range, num_pages)) if not one_file: tqdm.write(' Output file: {}'.format(out_name)) output = PdfFileWriter() for i in pdf_pages: tqdm.write(' Getting page {}...'.format(i)) output.addPage(inputpdf.getPage(i-1)) if not one_file: with open(out_name, 'wb') as oStream: output_files.append(out_name) output.write(oStream) tqdm.write('') if one_file: out_name = output_file tqdm.write('\n\tOutput file: {}'.format(out_name)) with open(out_name, 'wb') as oStream: output_files.append(out_name) output.write(oStream) return output_files
def save_page(filepath, page_number): infile = PdfFileReader(open(filepath, "rb"), strict=False) page = infile.getPage(page_number - 1) outfile = PdfFileWriter() outfile.addPage(page) outpath = os.path.join(os.path.dirname(filepath), "page-{}.pdf".format(page_number)) with open(outpath, "wb") as f: outfile.write(f) froot, fext = os.path.splitext(outpath) layout, __ = get_page_layout(outpath) # fix rotated PDF chars = get_text_objects(layout, ltype="char") horizontal_text = get_text_objects(layout, ltype="horizontal_text") vertical_text = get_text_objects(layout, ltype="vertical_text") rotation = get_rotation(chars, horizontal_text, vertical_text) if rotation != "": outpath_new = "".join([froot.replace("page", "p"), "_rotated", fext]) os.rename(outpath, outpath_new) infile = PdfFileReader(open(outpath_new, "rb"), strict=False) if infile.isEncrypted: infile.decrypt("") outfile = PdfFileWriter() p = infile.getPage(0) if rotation == "anticlockwise": p.rotateClockwise(90) elif rotation == "clockwise": p.rotateCounterClockwise(90) outfile.addPage(p) with open(outpath, "wb") as f: outfile.write(f)
def handle(self, *args, **options): def set_need_appearances_writer(writer): # See 12.7.2 and 7.7.2 for more information: # http://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf try: catalog = writer._root_object # get the AcroForm tree and add "/NeedAppearances attribute if "/AcroForm" not in catalog: writer._root_object.update({ NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)}) need_appearances = NameObject("/NeedAppearances") writer._root_object["/AcroForm"][need_appearances] = BooleanObject( True) return writer except Exception as e: print('set_need_appearances_writer() catch : ', repr(e)) return writer def calculate_age(age): today = date.today() return today.year - age.year - ((today.month, today.day) < (age.month, age.day)) infile = os.path.join(os.path.abspath('source_files'), '2021_entrega.pdf') inputStream = open(infile, "rb") pdf_reader = PdfFileReader(inputStream, strict=False) if "/AcroForm" in pdf_reader.trailer["/Root"]: pdf_reader.trailer["/Root"]["/AcroForm"].update( {NameObject("/NeedAppearances"): BooleanObject(True)}) pdf_writer = PdfFileWriter() set_need_appearances_writer(pdf_writer) if "/AcroForm" in pdf_writer._root_object: pdf_writer._root_object["/AcroForm"].update( {NameObject("/NeedAppearances"): BooleanObject(True)}) # personas = Persona.objects.exclude(covid=True).exclude(active=False) personas = Persona.objects.filter(active=True).exclude(covid=True) # print([ p.nombre_apellido for p in personas]) # print(personas.count()) pdf_writer.addPage(pdf_reader.getPage(0)) for persona in personas: if persona.active: familiares = persona.hijo.all() mayores = 0 menores = 0 for f in familiares: if calculate_age(f.fecha_nacimiento) > 3: mayores += 1 else: menores += 1 print(mayores) print(menores) field_dictionary = { "NombreOAR": "ADRA TORREJON", "DireccioOAR": "C/ Primavera 15", "Nombre y apellidos del representante de la unidad familiar": f"{persona.nombre_apellido}", "DNINIEPasaporte 1": f"{persona.dni}", "Teléfono": f"{persona.telefono}", "Domicilio": f"{persona.domicilio}", "Localidad": f"{persona.ciudad}", "CP": "28850", "TOTAL MIEMBROS UNIDAD FAMILIAR": f"{mayores + menores + 1}", "Niños 02 ambos inclusive": f"{menores}", "numarAdra": f"{persona.numero_adra}" } pdf_writer.updatePageFormFieldValues( pdf_writer.getPage(0), field_dictionary) # outputStream = open(outfile, "wb") # pdf_writer.write(outputStream) # outputStream.close() # pdf_writer.encrypt(str.lower(f"{persona.numero_adra}")) with open(f"./entregas/{persona.numero_adra}.pdf", "wb") as out_file: pdf_writer.write(out_file)
def _post_pdf(self, save_in_attachment, pdf_content=None, res_ids=None): '''Merge the existing attachments by adding one by one the content of the attachments and then, we add the pdf_content if exists. Create the attachments for each record individually if required. :param save_in_attachment: The retrieved attachments as map record.id -> attachment_id. :param pdf_content: The pdf content newly generated by wkhtmltopdf. :param res_ids: the ids of record to allow postprocessing. :return: The pdf content of the merged pdf. ''' def close_streams(streams): for stream in streams: try: stream.close() except Exception: pass # Check special case having only one record with existing attachment. if len(save_in_attachment) == 1 and not pdf_content: return base64.decodestring( list(save_in_attachment.values())[0].datas) # Create a list of streams representing all sub-reports part of the final result # in order to append the existing attachments and the potentially modified sub-reports # by the postprocess_pdf_report calls. streams = [] # In wkhtmltopdf has been called, we need to split the pdf in order to call the postprocess method. if pdf_content: pdf_content_stream = io.BytesIO(pdf_content) # Build a record_map mapping id -> record record_map = { r.id: r for r in self.env[self.model].browse( [res_id for res_id in res_ids if res_id]) } # If no value in attachment or no record specified, only append the whole pdf. if not record_map or not self.attachment: streams.append(pdf_content_stream) else: if len(res_ids) == 1: # Only one record, so postprocess directly and append the whole pdf. if res_ids[0] in record_map and not res_ids[ 0] in save_in_attachment: self.postprocess_pdf_report(record_map[res_ids[0]], pdf_content_stream) streams.append(pdf_content_stream) else: # In case of multiple docs, we need to split the pdf according the records. # To do so, we split the pdf based on outlines computed by wkhtmltopdf. # An outline is a <h?> html tag found on the document. To retrieve this table, # we look on the pdf structure using pypdf to compute the outlines_pages that is # an array like [0, 3, 5] that means a new document start at page 0, 3 and 5. reader = PdfFileReader(pdf_content_stream) if reader.trailer['/Root'].get('/Dests'): outlines_pages = sorted([ outline.getObject()[0] for outline in reader.trailer['/Root']['/Dests'].values() ]) assert len(outlines_pages) == len(res_ids) for i, num in enumerate(outlines_pages): to = outlines_pages[i + 1] if i + 1 < len( outlines_pages) else reader.numPages attachment_writer = PdfFileWriter() for j in range(num, to): attachment_writer.addPage(reader.getPage(j)) stream = io.BytesIO() attachment_writer.write(stream) if res_ids[i] and res_ids[ i] not in save_in_attachment: self.postprocess_pdf_report( record_map[res_ids[i]], stream) streams.append(stream) close_streams([pdf_content_stream]) else: # If no outlines available, do not save each record streams.append(pdf_content_stream) # If attachment_use is checked, the records already having an existing attachment # are not been rendered by wkhtmltopdf. So, create a new stream for each of them. if self.attachment_use: for attachment_id in save_in_attachment.values(): content = base64.decodestring(attachment_id.datas) streams.append(io.BytesIO(content)) # Build the final pdf. writer = PdfFileWriter() for stream in streams: reader = PdfFileReader(stream) writer.appendPagesFromReader(reader) result_stream = io.BytesIO() streams.append(result_stream) writer.write(result_stream) result = result_stream.getvalue() # We have to close the streams after PdfFileWriter's call to write() close_streams(streams) return result
def pdf_add(dest: str, source: str, pages: [str], output: str): """ Add pages from a source pdf file to an output file. If the output file does not exist a new file will be created. :param source: source pdf file :param dest: destination pdf file :param pages: list of page numbers or range expressions :param output: output pdf file """ if output is not None and os.path.isfile(output): if not overwrite_dlg(output): return writer = PdfFileWriter() # read pages from destination file destfile = open(dest, 'rb') destreader = PdfFileReader(destfile) for page in destreader.pages: writer.addPage(page) # read pages from source file srcfile = open(source, 'rb') srcreader = PdfFileReader(srcfile) # if no page numbers are given add all pages from source if pages is None: for i, page in enumerate(srcreader.pages): writer.addPage(page) else: pages = parse_rangearg(pages, len(srcreader.pages)) for pagenr in pages: page = srcreader.getPage(pagenr) writer.addPage(page) if output is None: # Write into Temporary File first and then overwrite dest file if overwrite_dlg(dest): tempfile = NamedTemporaryFile(delete=False) writer.write(tempfile) tempfile.close() destfile.close() srcfile.close() os.remove(dest) move(tempfile.name, dest) else: with open(output, "wb") as outfile: writer.write(outfile) destfile.close() srcfile.close()
from urllib.request import Request, urlopen from PyPDF2 import PdfFileWriter, PdfFileReader from pushbullet.pushbullet import PushBullet try: from StringIO import StringIO except ImportError: from io import StringIO, BytesIO url = "http://www.simsburybank.com/download-todays-rates/" writer = PdfFileWriter() remoteFile = urlopen(Request(url, headers={'User-Agent': 'Mozilla/5.0'})).read() memoryFile = BytesIO(remoteFile) pdfFile = PdfFileReader(memoryFile) pageObj = pdfFile.getPage(0) # print(pageObj.extractText().splitlines()[96]) rate96 = pageObj.extractText().splitlines()[96] rate149 = pageObj.extractText().splitlines()[149] rate_list = [rate96, rate149] rate_list_temp = [] for i in rate_list: if len(i) < 7: rate_list_temp.append(i)
def pdf_split(input: str, output: str, stepsize: int = 1, sequence: [int] = None): """ Split the input file in multiple output files :param input: name of the input file :param output: name of the output files :param stepsize: how many pages per file, only if sequence is None :param sequence: list with number of pages per file """ output = output or os.path.splitext(input)[0] if not os.path.isfile(input): print("Error. The file '%s' does not exist." % input) return with open(input, "rb") as inputfile: reader = PdfFileReader(inputfile) pagenr = 0 outputfile = None if sequence is None: for i, page in enumerate(reader.pages): if not i % stepsize: pagenr += 1 outputfile = open(output + "_%i.pdf" % pagenr, "wb") writer = PdfFileWriter() writer.addPage(page) if not (i + 1) % stepsize: writer.write(outputfile) outputfile.close() else: sequence = map(int, sequence) iter_pages = iter(reader.pages) for filenr, pagecount in enumerate(sequence): with open(output + "_%i.pdf" % (filenr + 1), "wb") as outputfile: writer = PdfFileWriter() for i in range(pagecount): try: page = next(iter_pages) writer.addPage(page) except StopIteration: writer.write(outputfile) return writer.write(outputfile) if not outputfile.closed: writer.write(outputfile) outputfile.close()
from PyPDF2 import PdfFileReader, PdfFileWriter write_obj = PdfFileWriter() pdf_list = [ "E:\\Demo3\\1. Values & Ethics In Profession.pdf", "E:\\Demo3\\EM Theory SKB Sir Updated.pdf" ] for i in pdf_list: read_obj = PdfFileReader(i) pages = read_obj.getNumPages() #print(pages) for p in range(pages): pd = read_obj.getPage(p) write_obj.addPage(pd) write_obj.encrypt('Subha123', 'Misti123', True) pdf_concat = open("E:\\Demo3\\Concat_1st.pdf", 'wb') write_obj.write(pdf_concat)
# Aqui solicitamos dados nescessários para execução do programa. ficheiro_frente = input( 'Qual nome ficheiros para colocar o fundo? \nDigite sem a extenção :') pg_fundo = input('Qual nnome do ficheiro de fundo? \nDigite sem a extenção :') # Aqui definimos na variável 'pdf_fundo' o PDF que será a nossa marca d'água pdf_fundo = PdfFileReader(pg_fundo + '.pdf') #Do PDF que solicitou para fundo vamos informar qual página será utilizada, neste caso á Página e a '(0)' watermark_page = pdf_fundo.getPage(0) # Aqui definimos na variável 'pdf_frente' o PDF que receberá a marca d'água. pdf_frente = PdfFileReader(ficheiro_frente + '.pdf') pdf_writer = PdfFileWriter() # Definindo um contador cont = 0 #Penango a quantidade páginas que serão tratadas total = pdf_frente.getNumPages() # Adicionando os fundos em todas as páginas do ficheiro. for page in range(pdf_frente.getNumPages()): page = pdf_frente.getPage(page) page.mergePage(watermark_page) pdf_writer.addPage(page) cont += 1 # Exibe a quantidade de páginas já processdas de um valor total. print(str(cont) + '- página de um total de :' + str(total)) # Abrindo e escrevendo o ficheiro.
def proccess_cp(file, form): #CONST species_dict = { '0000': '------', 'ALGO': 'Algodón', 'AVEN': 'Avena', 'CART': 'Cártamo', 'CEBA': 'Cebada', 'CECE': 'Cebada Cervecera', 'COLZ': 'Colza', 'COL0': 'Colza Doble 00', 'CUAR': 'Cuarta de Cebada', 'GIRA': 'Girasol', 'LINO': 'Lino', 'MAIZ': 'Maíz', 'MAMA': 'Maíz Mav', 'MAPI': 'Maíz Pisingallo', 'MANI': 'Maní', 'SOJA': 'Soja', 'SORG': 'Sorgo', 'TRIG': 'Trigo', 'TRIC': 'Trigo Candeal', 'TRIP': 'Trigo Pan', } harvest_dict = {'0000': '------'} date = datetime.datetime.now().year for year in range(date-3,date+1): harvest_dict[str(year)[-2:] + str(year+1)[-2:]] = str(year)[-2:] + '/' + str(year+1)[-2:] # Init InMemory PDF file & canvas packet = io.BytesIO() can = canvas.Canvas(packet, pagesize=A4) # Init fields through form data ownership_line = form.get('ownership_line', None) if ownership_line: ownership_height = 20 else: ownership_height = 0 destination_load = form.get('destination_load', None) species = species_dict.get(form['species'], None) harvest = harvest_dict.get(form['harvest'], None) fcarga_year = str(datetime.datetime.strptime(form['load_date'], "%Y-%m-%d").date().year) fcarga_month = ('0'+str(datetime.datetime.strptime(form['load_date'], "%Y-%m-%d").date().month))[-2:] fcarga_day = ('0'+str(datetime.datetime.strptime(form['load_date'], "%Y-%m-%d").date().day))[-2:] observations = can.beginText() observations.setTextOrigin(410, 430 - ownership_height) observations.textLines(form['observations']) # Write form fields to canvas can.setFont('Helvetica', 12) can.drawString(235, 747, form['ctg']) can.drawString(320, 747, form['renspa']) can.setFont('Helvetica', 10) can.drawString(506, 761, fcarga_day) can.drawString(524, 761, fcarga_month) can.drawString(541, 761, fcarga_year) can.drawString(183, 683 - ownership_height, form['intermediary']) can.drawString(183, 663 - ownership_height, form['sender']) can.drawString(183, 643 - ownership_height, form['broker']) can.drawString(183, 623 - ownership_height, form['mat']) can.drawString(183, 603 - ownership_height, form['broker_seller']) can.drawString(183, 583 - ownership_height, form['representative']) can.drawString(183, 563 - ownership_height, form['addressee']) can.drawString(183, 543 - ownership_height, form['destination']) can.drawString(183, 523 - ownership_height, form['freight_broker']) can.drawString(183, 504 - ownership_height, form['carrier']) can.drawString(183, 484 - ownership_height, form['driver']) can.drawString(471, 684 - ownership_height, form['intermediary_cuit']) can.drawString(471, 664 - ownership_height, form['sender_cuit']) can.drawString(471, 644 - ownership_height, form['broker_cuit']) can.drawString(471, 624 - ownership_height, form['mat_cuit']) can.drawString(471, 604 - ownership_height, form['broker_seller_cuit']) can.drawString(471, 584 - ownership_height, form['representative_cuit']) can.drawString(471, 564 - ownership_height, form['addressee_cuit']) can.drawString(471, 544 - ownership_height, form['destination_cuit']) can.drawString(471, 524 - ownership_height, form['freight_broker_cuit']) can.drawString(471, 505 - ownership_height, form['carrier_cuit']) can.drawString(471, 486 - ownership_height, form['driver_cuit']) can.drawString(370, 460 - ownership_height, harvest) can.drawString(130, 460 - ownership_height, species.upper()) can.drawString(246, 460 - ownership_height, form['species_type']) can.drawString(500, 460 - ownership_height, form['contract']) if destination_load: can.drawString(133, 430 - ownership_height, 'X') can.drawString(125, 410 - ownership_height, form['estimated_kg']) if form['quality'] == 'DECLARACION': can.drawString(257, 445 - ownership_height, 'X') elif form['quality'] == 'CONFORME': can.drawString(257, 427 - ownership_height, 'X') else: can.drawString(257, 411 - ownership_height, 'X') can.drawString(353, 442 - ownership_height, form['gross_kg']) can.drawString(353, 425 - ownership_height, form['tare_kg']) can.drawString(353, 409 - ownership_height, form['net_kg']) can.drawText(observations) can.drawString(423, 395 - ownership_height, form['stablishment']) can.drawString(423, 381 - ownership_height, form['city']) can.drawString(423, 367 - ownership_height, form['state']) can.drawString(110, 375 - ownership_height, form['address']) can.drawString(360, 349 - ownership_height, form['destination_city']) can.drawString(360, 332 - ownership_height, form['destination_state']) can.drawString(80, 332 - ownership_height, form['destination_address']) can.drawString(345, 311 - ownership_height, form['freight_payer']) can.drawString(95, 294 - ownership_height, form['truck']) can.drawString(95, 277 - ownership_height, form['trailer']) can.drawString(95, 260 - ownership_height, form['km']) can.drawString(242, 277 - ownership_height, form['ref_rate']) can.drawString(242, 260 - ownership_height, form['rate']) can.setFont('Helvetica', 8) can.drawString(463, 18 - ownership_height, form['fumigant_dni']) can.drawString(293, 18 - ownership_height, form['fumigant_observation']) can.save() # Move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # Get the canvas content new_pdf_page = new_pdf.getPage(0) existing_pdf = PdfFileReader(file) output = PdfFileWriter() for numpage in range(0, existing_pdf.getNumPages()): page = existing_pdf.getPage(numpage) # Merge uploaded PDF page with canvas content page.mergePage(new_pdf_page) page.compressContentStreams() # Save pages to new PDF output.addPage(page) # Write final PDF to buffer output.write(packet) # Return buffer stream return packet.getvalue()
# Otherwise, the printing order is intended for automatic double-sided printing: # Outward outside # Inward outside # Outward inside # Inward inside import sys from PyPDF2 import PdfFileWriter, PdfFileReader, PdfFileMerger inputOneFileName = sys.argv[1] outputOneFileName = sys.argv[2] print "input 1: " + inputOneFileName print "output 1: " + outputOneFileName inputReaderOne = PdfFileReader(open(inputOneFileName, "rb")) outputWriterOne = PdfFileWriter() pageIndex = [0, 1, 2, 3, 4, 5, 6, 7, 8] pageIndex[2] = 0 if sys.argv[3] == "manual": pageIndex[3] = 5 pageIndex[4] = 2 pageIndex[5] = 1 pageIndex[6] = 6 else: pageIndex[3] = 1 pageIndex[4] = 6 pageIndex[5] = 5 pageIndex[6] = 2
def creerPDF(chemin, page): output = PdfFileWriter() output.addPage(page) outputStream = open(chemin, "wb") output.write(outputStream) outputStream.close()
import gc title_list = [ "0-16-1577-1577-Sherry-A_Treatise_of_the_Figures_of_Grammar_and_Rhetorike.pdf", "1-16-1582-1582-Mulcaster-The_First_Part_of_the_Elementarie.pdf", "2-16-1586-1586-Bullokar-Brief_Grammar_of_English.pdf", "4-16-1596-1596-Coote-The_English_Schoole_Maister.pdf" ] for title in title_list: file_name = os.path.join("transcription_tool", title) title = file_name.split("-")[-1].split(".")[0] inputpdf = PdfFileReader(open(file_name, "rb")) for i in range(inputpdf.numPages): print(i) output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) folder_name = "{}-pages".format(title) jpeg_folder_name = "{}-pictures".format(title) if not os.path.exists(jpeg_folder_name): os.mkdir(jpeg_folder_name) if not os.path.exists(folder_name): os.mkdir(folder_name) file_name = os.path.join(folder_name, "{}.pdf".format(i)) jpeg_file_name = os.path.join(jpeg_folder_name, "{}.jpg".format(i)) if not os.path.exists(jpeg_file_name): if not os.path.exists(file_name): with open(file_name, "wb") as outputStream: output.write(outputStream) images = convert_from_path(file_name) for image in images:
translatePageDown = (float(pageHeight) / 72) * 25.4 * sqrt(2) existingPdfPage.mergeRotatedTranslatedPage( new_pdf.getPage(0), rotation=90, tx=translatePageDown, ty=translatePageDown) output.addPage(existingPdfPage) # outputStream = file(filepath, "wb") # output.write(outputStream) # outputStream.close() #GLOBAL VARIABLE output = PdfFileWriter() INPUT_FILE_PATH = "docs/doc3.pdf" OUTPUT_FILE_PATH = "output/d56.pdf" sizes = { "None": (0, 0), "A": (8.5, 11), "B": (11, 17), "C": (17, 22), "D": (22, 34), "E": (34, 44), "F": (28, 40) } key = "A" scaledPageMax = sizes[key][1] scaledPageMin = sizes[key][0]
#!/bin/python3 from PyPDF2 import PdfFileWriter, PdfFileReader import io from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import A4 pdf = PdfFileReader('path/to/file') pdf_writer = PdfFileWriter() #pdf.getNumPages() for page in range(pdf.getNumPages()): packet = io.BytesIO() can = canvas.Canvas(packet, pagesize=A4) can.drawString(525, 30, "ADDTEXT" + str(page)) can.save() packet.seek(0) watermark = PdfFileReader(packet) watermark_page = watermark.getPage(0) pdf_page = pdf.getPage(page) pdf_page.mergePage(watermark_page) pdf_writer.addPage(pdf_page) with open('out.pdf', 'wb') as fh: pdf_writer.write(fh)
can.setFillColor(white) can.setFont("AdonisC", 10) can.drawString(30 + 15 + 30, y - 5, head) # надпись в шапке can.setFillColor(black) text.wrapOn(can, 560, textHeight) text.drawOn(can, 30 + 5, y - textHeight - 10) # text = can.beginText() #текст # text.setTextOrigin(30 + 5, y - 5 - 10 - 5) # text.setFont("AdonisC", 10) # text.textLines(lines) # can.drawText(text) output = PdfFileWriter() packet = io.BytesIO() can = canvas.Canvas(packet, pagesize=letter) #ruler(can) text = '''I am drawing text atop a base image via PIL. One of the requirements is for it to overflow to the next line(s) if the combined width of all characters exceeds the width of the base image. Currently I'm using textwrap.wrap(text, width=16) to accomplish this. Here width defines the number of characters to accommodate in one line. Now the text can be anything since it's user generated. So the problem is that hard-coding width won't take into account width variability due to font type, font size and character selection. What do I mean? Well imagine I'm using DejaVuSans.ttf, size 14. A W is 14 in length, whereas an 'i' is 4. For a base image of width 400, up to 100 i characters can be accommodated in a single line. But only 29 W characters. I need to formulate a smarter way of wrapping to the next line, one where the string is broken when the sum of character-widths exceeds the base image width. Can someone help me formulate this? An illustrative example would be great!''' drawRamka(can, 283, text)
def whitepaper_access(request, ratelimited=False): context = { 'active': 'whitepaper', 'title': _('Whitepaper'), 'minihero': _('Whitepaper'), 'suppress_logo': True, } if not request.POST.get('submit', False): return TemplateResponse(request, 'whitepaper_accesscode.html', context) if ratelimited: context['msg'] = _( "You're ratelimited. Please contact [email protected]") return TemplateResponse(request, 'whitepaper_accesscode.html', context) context['accesskey'] = request.POST.get('accesskey') context['email'] = request.POST.get('email') access_codes = AccessCodes.objects.filter( invitecode=request.POST.get('accesskey')) valid_access_code = access_codes.exists() if not valid_access_code: context['msg'] = _( "Invalid Access Code. Please contact [email protected]") return TemplateResponse(request, 'whitepaper_accesscode.html', context) ac = access_codes.first() if ac.uses >= ac.maxuses: context['msg'] = _( "You have exceeded your maximum number of uses for this access code. Please contact [email protected]" ) return TemplateResponse(request, 'whitepaper_accesscode.html', context) valid_email = True try: validate_email(request.POST.get('email', False)) except Exception as e: valid_email = False if not request.POST.get('email', False) or not valid_email: context['msg'] = _("Invalid Email. Please contact [email protected]") return TemplateResponse(request, 'whitepaper_accesscode.html', context) ip = get_ip(request) wa = WhitepaperAccess.objects.create( invitecode=request.POST.get('accesskey', False), email=request.POST.get('email', False), ip=ip, ) send_mail(settings.CONTACT_EMAIL, settings.CONTACT_EMAIL, _("New Whitepaper Generated"), str(wa)) # bottom watermark packet1 = BytesIO() can = canvas.Canvas(packet1, pagesize=letter) grey = Color(22 / 255, 6 / 255, 62 / 255, alpha=0.3) can.setFillColor(grey) can.setFontSize(8) lim = 30 email__etc = wa.email if len(wa.email) < lim else wa.email[0:lim] + "..." msg = gettext( "Generated for access code {} by email {} at {} via ip: {}. https://gitcoin.co/whitepaper" ).format(wa.invitecode, email__etc, wa.created_on.strftime("%Y-%m-%d %H:%M"), wa.ip) charlength = 3.5 width = len(msg) * charlength left = (600 - width) / 2 can.drawString(left, 7, msg) can.save() # middle watermark packet2 = BytesIO() can = canvas.Canvas(packet2, pagesize=letter) grey = Color(22 / 255, 6 / 255, 62 / 255, alpha=0.02) can.setFillColor(grey) can.setFontSize(100) msg = "WP{}".format(str(wa.pk).zfill(5)) charlength = 55 width = len(msg) * charlength left = (600 - width) / 2 can.rotate(45) can.drawString(320, 50, msg) can.save() # move to the beginning of the StringIO buffer path_to_file = 'assets/other/wp.pdf' new_pdf1 = PdfFileReader(packet1) new_pdf2 = PdfFileReader(packet2) # read your existing PDF existing_pdf = PdfFileReader(open(path_to_file, "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page try: for i in range(0, 50): page = existing_pdf.getPage(i) page.mergePage(new_pdf1.getPage(0)) if i != 0: page.mergePage(new_pdf2.getPage(0)) output.addPage(page) except Exception as e: print(e) # finally, write "output" to a real file outputfile = "output/whitepaper_{}.pdf".format(wa.pk) outputStream = open(outputfile, "wb") output.write(outputStream) outputStream.close() filename = outputfile wrapper = FileWrapper(open(filename, 'rb')) response = HttpResponse(wrapper, content_type='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename="GitcoinWhitepaper.pdf"' response['Content-Length'] = os.path.getsize(filename) return response
def main_program(pdf_file_path, csv_file_path, email_subj, email_body): pdf_pages_cnt = 0 try: os.mkdir("C:\\temp_pdf_page_by_page") except OSError: pass inputpdf = PdfFileReader(open(pdf_file_path, "rb")) for i in range(inputpdf.numPages): pdf_pages_cnt = pdf_pages_cnt + 1 output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) with open("C:\\temp_pdf_page_by_page\\document-page%s.pdf" % i, "wb") as outputStream: output.write(outputStream) for i in range(0, pdf_pages_cnt, 1): temp = "C:\\temp_pdf_page_by_page\\document-page%s.pdf" % i temp1 = "C:\\temp_pdf_page_by_page\\document-page%s.jpg" % i pdf2jpeg(temp, temp1) txt = [] for i in range(0, pdf_pages_cnt, 1): text = tess.image_to_string( r"C:\\temp_pdf_page_by_page\\document-page%s.jpg" % i) txt.append(text.split(" ")) # Check if file is xls if (check_for_xls_file(csv_file_path)): fname = convert_xls_to_xlsx(csv_file_path) else: fname = csv_file_path #fname = csv_file_path wb = openpyxl.load_workbook(fname) # ws = wb.get_sheet_by_name("Worksheet") ws = wb["Worksheet"] mylist = [] raw_position_in_excel = [] i = 0 for cell in ws['S']: #Gia ta AFM i = i + 1 if str(cell.value) != "None": print(cell.value) mylist.append(cell.value) raw_position_in_excel.append(i) cnt = 0 final = [] positions = [] position_in_excel = [] for i in range(1, len(mylist), 1): for j in range(0, len(txt), 1): for k in range(0, len(txt[j]), 1): if str(mylist[i]) in txt[j][k]: final.append(mylist[i]) positions.append(j + 1) position_in_excel.append(raw_position_in_excel[i]) cnt = cnt + 1 emails = [] sheet = xlrd.open_workbook(csv_file_path).sheet_by_name("Worksheet") len3 = len(position_in_excel) for i in range(0, len3, 1): emails.append(sheet.cell_value(position_in_excel[i] - 1, 10)) #Gia ta emails print(cnt) email_subject_final = email_subj email_body_final = email_body # NEW FEATURES # DELETE ANY ITEM IN LIST THAT HAS EMPTY EMAIL deleteEmptyItems(final, positions, emails) #Make the DB Connection conn = pyodbc.connect(r'Driver={Microsoft Access Driver (*.mdb, ' r'*.accdb)};DBQ=' + os.getcwd() + '/program.accdb;') cursor = conn.cursor() #INSERT the VAT number, email address and page position to DB for i in range(0, len(emails), 1): cursor.execute( "INSERT INTO program_invoices (VAT, email, position) VALUES" "(" + str(final[i]) + ", '" + str(emails[i]) + "', " + str(positions[i]) + ")") #Loop to select one by one the emails duplicated_counter = 0 for vat in final: #Execute query to get all emails and page positions to this VAT number cursor.execute( "SELECT program_invoices.VAT, program_invoices.email, program_invoices.position " "FROM program_invoices " "WHERE VAT=" + str(vat)) #Fetch all information dataBaseData = cursor.fetchall() if len(dataBaseData) == 1: #If recipient is only one person, then just send the email, and delete the entry from the database send_email( dataBaseData[0].email, email_subject_final, email_body_final, 'C:/temp_pdf_page_by_page/document-page%s.pdf' % str(int(dataBaseData[0].position) - 1)) cursor.execute("DELETE * " "FROM program_invoices " "WHERE VAT=" + str(vat)) elif len(dataBaseData) > 1: # If recipient is NOT one person, then merge all pages together, then send the email, and final delete the entries from the database pdfsToMerge = [] for pdf in dataBaseData: pdfsToMerge.append( 'C:/temp_pdf_page_by_page/document-page%s.pdf' % str(int(pdf.position) - 1)) mergePDFs( pdfsToMerge, 'C:/temp_pdf_page_by_page/duplicated-item%s.pdf' % str(duplicated_counter)) send_email( dataBaseData[0].email, email_subject_final, email_body_final, 'C:/temp_pdf_page_by_page/duplicated-item%s.pdf' % str(duplicated_counter)) cursor.execute("DELETE * " "FROM program_invoices. " "WHERE VAT=" + str(vat)) duplicated_counter = duplicated_counter + 1 if len(files_not_send) != 0: try: os.mkdir("C:\\INVOICES_NOT_SEND") output_error_file = open( "C:\\INVOICES_NOT_SEND\\Email που δεν στάλθηκαν.txt", "w") output_error_file.write( "Δεν κατάφερα να στείλω τα παρακάτω τιμολόγια.\n") for error_file in files_not_send: output_error_file.write(error_file + '\n') output_error_file.write( "Στον φάκελο θα βρείτε και τα αρχεία που δεν κατάφερα να στείλω.\n" ) output_error_file.close() counter = 0 for error_file in files_not_send: counter = counter + 1 destination = "C:/INVOICES_NOT_SEND/document-page%s.pdf" % counter copyfile(error_file, destination) except: pass shutil.rmtree("C:\\temp_pdf_page_by_page")
if result is not None: print("\tThis note has this associated document: %s" % (result)) pdfFile = result[0] pages = getPages(j[0]) count = 1 files = [] for k in pages: print("\tProcessing page %d/%d of %s" % (count, len(pages), j[1])) files.append( convert_page(base_directory + 'data/pages/' + k[0] + '.page', j[1], dirsafe(i[2]), directory, pdfFile, count)) count += 1 # Merge pages output_file = PdfFileWriter() for k in files: input_file = PdfFileReader(file(k, "rb")).getPage(0) output_file.addPage(input_file) final_pdf = directory + "/" + dirsafe(i[2]) + "/" + dirsafe( titlesafe(j[1])) + ".pdf" with open(final_pdf, "wb") as outputStream: output_file.write(outputStream) try: shutil.rmtree(directory + "/" + dirsafe(i[2]) + "/" + dirsafe(titlesafe(j[1]))) except: "" unix_ts = int(j[3] / 1000)
def make_pdf_writer() -> PdfFileWriter: """ Creates and returns a PyPDF2 writer. """ return PdfFileWriter()
import os from PyPDF2 import PdfFileReader, PdfFileWriter for index in range(1, 4): #总共有4个文件夹 pdfFiles = [] for root, dirs, files in os.walk('./photo/%s' % (index)): for file in files: if file.endswith('.pdf'): pdfFiles.append(os.path.join(root, file)) pdfFiles.sort() OutputContainer = PdfFileWriter() for file in pdfFiles: inputPdf = PdfFileReader(open(file, 'rb')) num = inputPdf.getNumPages() for i in range(num): OutputContainer.addPage(inputPdf.getPage(i)) os.mkdir('./res/res%s' % (index)) OutputContainer.write(open('./res/res%s/res%s.pdf' % (index, index), 'wb')) #输出也按照文件的序号完成命名
def karta_zgon_gen(pacjent): def removeNonAscii(s): return "".join(i for i in s if ord(i) < 128) imie = pacjent.imie nazwisko = pacjent.nazwisko nazwisko_rodowe = pacjent.nazwisko_rodowe nr_dowodu = pacjent.nr_dowodu pesel = pacjent.pesel plec = pacjent.plec miejsce_urodzenia = pacjent.miejsce_urodzenia data_zgonu = str(pacjent.zgon.data) godzina_zgonu = str(pacjent.zgon.godzina) data_urodzenia = str('19' + pacjent.pesel[0] + pacjent.pesel[1] + '.' + pacjent.pesel[2] + pacjent.pesel[3] + '.' + pacjent.pesel[4] + pacjent.pesel[5]) przyczyna_wtorna = pacjent.zgon.wtorna przyczyna_wyjsciowa = pacjent.zgon.wyjsciowa przyczyna_bezposrednia = pacjent.zgon.bezposrednia stwierdzajacy_zgon = pacjent.zgon.user pdfmetrics.registerFont( TTFont('regular', settings.BASE_DIR + '/app/static/regular.ttf')) buffer = BytesIO() p = canvas.Canvas(buffer, pagesize=A4) p.setFont('regular', 10) p.drawString(170, 660, "{}".format(nazwisko)) p.drawString(170, 632, "{}".format(nazwisko_rodowe)) p.drawString(100, 595, "{}".format(imie)) p.drawString(430, 595, "{}".format(pesel)) p.drawString(200, 562, "{}".format(nr_dowodu)) p.drawString(227, 540, "{}".format(data_zgonu[0:4])) p.drawString(353, 540, "{}".format(data_zgonu[5:7])) p.drawString(303, 540, "{}".format(data_zgonu[8:10])) p.drawString(420, 540, "{}".format(godzina_zgonu[0:2])) p.drawString(467, 540, "{}".format(godzina_zgonu[3:5])) p.drawString(227, 490, "{}".format(data_urodzenia[0:4])) p.drawString(353, 490, "{}".format(data_urodzenia[5:7])) p.drawString(303, 490, "{}".format(data_urodzenia[8:10])) if plec == 'M': p.line('120', '433', '180', '433') p.line('120', '423', '180', '423') p.line('120', '433', '120', '423') p.line('180', '423', '180', '433') else: p.line('120', '423', '180', '423') p.line('120', '413', '180', '413') p.line('120', '423', '120', '413') p.line('180', '413', '180', '423') p.drawString(227, 292, "{}".format(miejsce_urodzenia)) p.drawString(90, 185, "{}".format(przyczyna_bezposrednia)) p.showPage() p.setFont('regular', 10) p.drawString(70, 768, "{}".format(przyczyna_wtorna)) p.drawString(70, 712, "{}".format(przyczyna_wyjsciowa)) p.drawString( 70, 552, "{}".format('lek. ' + stwierdzajacy_zgon.first_name + ' ' + stwierdzajacy_zgon.last_name)) p.drawString(120, 418, "{}".format(data_zgonu[0:4])) p.drawString(243, 418, "{}".format(data_zgonu[5:7])) p.drawString(193, 418, "{}".format(data_zgonu[8:10])) p.showPage() p.save() buffer.seek(0) new_pdf = PdfFileReader(buffer) existing_pdf = PdfFileReader( open(settings.BASE_DIR + "/app/static/karta.pdf", "rb")) page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) page_two = existing_pdf.getPage(1) page_two.mergePage(new_pdf.getPage(1)) output = PdfFileWriter() output.addPage(page) output.addPage(page_two) output_stream = open( settings.BASE_DIR + "/app/static/karty/" + removeNonAscii(pacjent.imie) + "_" + removeNonAscii(pacjent.nazwisko) + "_kz" + ".pdf", "wb") output.write(output_stream) output_stream.close()
def main(args): """Scan through PDF and split PDF and images.""" filename = args[0] split_path = args[1] qr_prefix = args[2] qr_suffix = args[3] log_file_path = args[4] use_ocr = args[5] buff = "Process " + str(os.getpid()) + ": " try: os.chdir(split_path) pdfPages = PdfFileReader(filename) pdf_writer = PdfFileWriter() i = id_index = 0 page_count = 1 prev_file = data = "BLANK" output = {"filename": filename, "is_qr": True, "use_ocr": use_ocr} json_file = os.path.join(split_path, "decoded.json") for page_number in range(pdfPages.numPages): # convert pdf to series of images for scanning page = convert_from_bytes(open(filename, 'rb').read(), first_page=page_number + 1, last_page=page_number + 2)[0] # increase contrast of image for better QR decoding cv_img = numpy.array(page) img_grey = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) ret2, thresh = cv2.threshold(img_grey, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # decode img - only look for QR codes val = pyzbar.decode(thresh, symbols=[ZBarSymbol.QRCODE]) if val != []: # found a new qr code, split here # convert byte literal to string data = val[0][0].decode("utf-8") if not use_ocr: buff += "Found a QR code with value \'" + data + "\' on" buff += " page " + str(page_number) + ", " if data == "none": # blank exam with 'none' qr code data = "BLANK EXAM" else: pre = data[0:len(qr_prefix)] suf = data[(len(data) - len(qr_suffix)):len(data)] if qr_prefix != '' and pre == qr_prefix: data = data[len(qr_prefix):] if qr_suffix != '' and suf == qr_suffix: data = data[:-len(qr_suffix)] # since QR splitting doesn't know the max page assume length of 3 prepended_index = str(i).zfill(3) cover_filename = '{}_{}_cover.pdf'.format( filename[:-4], prepended_index) output_filename = '{}_{}.pdf'.format(filename[:-4], prepended_index) output[output_filename] = {} # if we're looking for a student's ID, use that as the value instead if use_ocr: data, confidences = scanner.getDigits(thresh, val) buff += "Found student ID number of \'" + data + "\' on" buff += " page " + str(page_number) + ", " buff += "Confidences: " + str(confidences) + " " output[output_filename]["confidences"] = str(confidences) output[output_filename]['id'] = data # save pdf if i != 0 and prev_file != '': output[prev_file]['page_count'] = page_count # update json file logger.write_to_json(json_file, output) with open(prev_file, 'wb') as out: pdf_writer.write(out) if id_index == 1: # correct first pdf's page count and print file output[prev_file]['page_count'] = page_count with open(prev_file, 'wb') as out: pdf_writer.write(out) # start a new pdf and grab the cover cover_writer = PdfFileWriter() pdf_writer = PdfFileWriter() cover_writer.addPage(pdfPages.getPage(i)) pdf_writer.addPage(pdfPages.getPage(i)) # save cover with open(cover_filename, 'wb') as out: cover_writer.write(out) # save cover image page.save('{}.jpg'.format(cover_filename[:-4]), "JPEG", quality=100) id_index += 1 page_count = 1 prev_file = output_filename # save page as image, start indexing at 1 page.save(prev_file[:-4] + '_' + str(page_count).zfill(3) + '.jpg', "JPEG", quality=100) else: # the first pdf page doesn't have a qr code if i == 0: prepended_index = str(i).zfill(3) output_filename = '{}_{}.pdf'.format( filename[:-4], prepended_index) cover_filename = '{}_{}_cover.pdf'.format( filename[:-4], prepended_index) output[output_filename] = {} # set the value as blank so a human can check what happened output[output_filename]['id'] = "BLANK" prev_file = output_filename id_index += 1 cover_writer = PdfFileWriter() # save cover cover_writer.addPage(pdfPages.getPage(i)) with open(cover_filename, 'wb') as out: cover_writer.write(out) # save cover image page.save('{}.jpg'.format(cover_filename[:-4]), "JPEG", quality=100) # add pages to current split_pdf page_count += 1 pdf_writer.addPage(pdfPages.getPage(i)) # save page as image, start indexing at 1 page.save(prev_file[:-4] + '_' + str(page_count).zfill(3) + '.jpg', "JPEG", quality=100) i += 1 buff += "Finished splitting into {} files\n".format(id_index) # save whatever is left prepended_index = str(i).zfill(3) output_filename = '{}_{}.pdf'.format(filename[:-4], prepended_index) output[prev_file]['id'] = data output[prev_file]['page_count'] = page_count if use_ocr: output[prev_file]['confidences'] = str(confidences) logger.write_to_json(json_file, output) with open(prev_file, 'wb') as out: pdf_writer.write(out) # write the buffer to the log file, so everything is on one line logger.write_to_log(log_file_path, buff) except Exception: msg = "Failed when splitting pdf " + filename print(msg) traceback.print_exc() # print everything in the buffer just in case it didn't write logger.write_to_log(log_file_path, buff) logger.write_to_log(log_file_path, msg + "\n" + traceback.format_exc())
def main(): pdf_file_list = get_read_file_list(DIRECTORY) print(pdf_file_list) all_comment_data_set = {} if not os.path.isdir('each_public_comment_pdf'): os.mkdir('each_public_comment_pdf') if not os.path.isdir('each_public_comment'): os.mkdir('each_public_comment') if not os.path.isdir('pdf_text_box'): os.mkdir('pdf_text_box') for i, pdf_file in enumerate(pdf_file_list): try: date = get_military_date(pdf_file) abs_pdf_path = f'{os.getcwd()}/{DIRECTORY}/{pdf_file}' data = tdfp.convert_pdf_to_xml(abs_pdf_path) page_data = process.get_word_block(data) public_comment_summary_page_list = get_public_comment_summary_page(page_data) communication_number = get_communication_num(page_data) comment_data_set = {} index = '' for page in public_comment_summary_page_list: scraping_start = False for word in page_data[page]['word_list']: if PUBLIC_COMMENT_EXPLANATION.search(word['word']): index = get_index(word) comment_data_set[index] = { 'comment_number': None, 'summary': word['word'], 'page_list': get_page_list(page_data, f'{communication_number}\.{index}\.[a-z]'), 'public_comment_path': '', 'public_comment_path_pdf': '', 'date': date, } scraping_start = True elif PUBLIC_COMMENT_NUM.search(word['word']): assert not comment_data_set[index]['comment_number'] comment_data_set[index]['comment_number'] = word['word'] comment_data_set[index]['summary'] = comment_data_set[index]['summary'].strip() comment_data_set[index]['public_comment_path_pdf'] = f'each_public_comment_pdf/{comment_data_set[index]["comment_number"].strip()}.pdf' comment_data_set[index]['public_comment_path'] = f'each_public_comment/{comment_data_set[index]["comment_number"].strip()}.txt' index = '' scraping_start = False elif scraping_start: comment_data_set[index]['summary'] += f" {word['word']}" with open(abs_pdf_path, 'rb') as infile: for index, comment_data in comment_data_set.items(): reader = PdfFileReader(infile) writer = PdfFileWriter() for page in comment_data['page_list']: writer.addPage(reader.getPage(page)) with open(comment_data['public_comment_path_pdf'], 'wb') as outfile: writer.write(outfile) comment_data_set = transform_comment_data_set(comment_data_set) all_comment_data_set = {**all_comment_data_set, **comment_data_set} except Exception as ex: _, _, ex_traceback = sys.exc_info() log_traceback(ex, ex_traceback, pdf_file) save_text_box_as_txt(page_data, pdf_file) with open(f'all_comment_metadata.json', 'w') as write: write.write(json.dumps(all_comment_data_set))
def main(args): """Scan through PDF and split PDF and images.""" filename = args[0] split_path = args[1] qr_prefix = args[2] qr_suffix = args[3] log_file_path = args[4] buff = "Process " + str(os.getpid()) + ": " try: os.chdir(split_path) pdfPages = PdfFileReader(filename) pdf_writer = PdfFileWriter() i = cover_index = id_index = 0 page_count = 1 prev_file = data = "BLANK" output = {"filename": filename, "is_qr": True} json_file = os.path.join(split_path, "decoded.json") for page_number in range(pdfPages.numPages): # convert pdf to series of images for scanning page = convert_from_bytes(open(filename, 'rb').read(), first_page=page_number + 1, last_page=page_number + 2)[0] # increase contrast of image for better QR decoding cv_img = numpy.array(page) mask = cv2.inRange(cv_img, (0, 0, 0), (200, 200, 200)) inverted = 255 - cv2.cvtColor(mask, cv2.COLOR_GRAY2BGR) # decode img - only look for QR codes val = pyzbar.decode(inverted, symbols=[ZBarSymbol.QRCODE]) if val != []: # found a new qr code, split here # convert byte literal to string data = val[0][0].decode("utf-8") buff += "Found a QR code with value \'" + data + "\' on" buff += " page " + str(page_number) + ", " if data == "none": # blank exam with 'none' qr code data = "BLANK EXAM" else: pre = data[0:len(qr_prefix)] suf = data[(len(data) - len(qr_suffix)):len(data)] if qr_prefix != '' and pre == qr_prefix: data = data[len(qr_prefix):] if qr_suffix != '' and suf == qr_suffix: data = data[:-len(qr_suffix)] cover_index = i cover_filename = '{}_{}_cover.pdf'.format(filename[:-4], i) output_filename = '{}_{}.pdf'.format(filename[:-4], cover_index) output[output_filename] = {} output[output_filename]['id'] = data # save pdf if i != 0 and prev_file != '': output[prev_file]['page_count'] = page_count # update json file logger.write_to_json(json_file, output) with open(prev_file, 'wb') as out: pdf_writer.write(out) page.save('{}_{}.jpg'.format(prev_file[:-4], i), "JPEG", quality=100) if id_index == 1: # correct first pdf's page count and print file output[prev_file]['page_count'] = page_count with open(prev_file, 'wb') as out: pdf_writer.write(out) page.save('{}_{}.jpg'.format(prev_file[:-4], i), "JPEG", quality=100) # start a new pdf and grab the cover cover_writer = PdfFileWriter() pdf_writer = PdfFileWriter() cover_writer.addPage(pdfPages.getPage(i)) pdf_writer.addPage(pdfPages.getPage(i)) # save cover with open(cover_filename, 'wb') as out: cover_writer.write(out) # save cover image page.save('{}.jpg'.format(cover_filename[:-4]), "JPEG", quality=100) id_index += 1 page_count = 1 prev_file = output_filename else: # the first pdf page doesn't have a qr code if i == 0: output_filename = '{}_{}.pdf'.format(filename[:-4], i) cover_filename = '{}_{}_cover.pdf'.format(filename[:-4], i) output[output_filename] = {} # set the value as blank so a human can check what happened output[output_filename]['id'] = "BLANK" prev_file = output_filename id_index += 1 cover_writer = PdfFileWriter() # save cover cover_writer.addPage(pdfPages.getPage(i)) with open(cover_filename, 'wb') as out: cover_writer.write(out) # save cover image page.save('{}.jpg'.format(cover_filename[:-4]), "JPEG", quality=100) # add pages to current split_pdf page_count += 1 pdf_writer.addPage(pdfPages.getPage(i)) i += 1 buff += "Finished splitting into {} files\n".format(id_index) # save whatever is left output_filename = '{}_{}.pdf'.format(filename[:-4], cover_index) output[output_filename]['id'] = data output[output_filename]['page_count'] = page_count logger.write_to_json(json_file, output) with open(output_filename, 'wb') as out: pdf_writer.write(out) # write the buffer to the log file, so everything is on one line logger.write_to_log(log_file_path, buff) except Exception: msg = "Failed when splitting pdf " + filename print(msg) traceback.print_exc() # print everything in the buffer just in case it didn't write logger.write_to_log(log_file_path, buff) logger.write_to_log(log_file_path, msg + "\n" + traceback.format_exc())
def generatePairings(promotional_id, color): #query for all applicants of the selected color belonging to the selected #promotional, ordered by number, which is ordered by rank and age applications = session.query(Application).filter_by(promotional_id=promotional_id, color=color).order_by(Application.number).all() #instatiate a PdfFileWriter for output output = PdfFileWriter() #a counter used to count rows in the table and create a new page once #the last row on the page has been reached # counter = 0 #a method used to loop through applications of a given rank and create #a pdf page promotional = session.query(Promotional).filter_by(id=promotional_id).one() date = promotional.date.strftime("%B %d, %Y") def generatePairingsPage(): infoBuffer = StringIO.StringIO() c = canvas.Canvas(infoBuffer) cover = PdfFileReader(open("Pairings_packet_template.pdf", "rb")) coverPage = cover.getPage(0) previousRank = applications[0].rank offset = 38 coverCounter = 0 for app in applications: # if app.rank == rank: if coverCounter == 15 or app.rank != previousRank: c.showPage() c.save() infoBuffer.seek(0) info = PdfFileReader(infoBuffer) coverPage.mergePage(info.getPage(0)) output.addPage(coverPage) infoBuffer.close() infoBuffer = StringIO.StringIO() c = canvas.Canvas(infoBuffer) cover = PdfFileReader(open("Pairings_packet_template.pdf", "rb")) coverPage = cover.getPage(0) coverCounter = 0 if coverCounter == 0: c.setFont('Helvetica', 24) c.drawCentredString(300, 735, date) #Page title c.drawCentredString(300, 675, app.rankInfo + " " + color.title() + " Belt Pairings") #table header c.setFont('Helvetica-Bold', 18) c.drawString(80, 624, app.rankInfo + " " + color.title() + " Belt") c.setFont('Helvetica', 18) # put "A) " or "B) " depending on whether the application # is on side A or B. Same for the pairing partner. if app.sideA_id: c.drawString(80, 590 - coverCounter*offset, "A) " + app.fullName) sideB = session.query(Application).filter_by(promotional_id=promotional_id, id=app.sideA_id).one() c.drawString(340, 590 - coverCounter*offset, "B) " + sideB.fullName) elif app.sideB_id: c.drawString(80, 590 - coverCounter*offset, "B) " + app.fullName) sideA = session.query(Application).filter_by(promotional_id=promotional_id, id=app.sideB_id).one() c.drawString(340, 590 - coverCounter*offset, "A) " + sideA.fullName) else: c.drawString(80, 590 - coverCounter*offset, "A) " + app.fullName) c.drawString(340, 590 - coverCounter*offset, "SUB") #number listing c.drawCentredString(310, 590 - coverCounter*offset, str(app.number)) coverCounter += 1 previousRank = app.rank c.showPage() c.save() infoBuffer.seek(0) info = PdfFileReader(infoBuffer) coverPage.mergePage(info.getPage(0)) output.addPage(coverPage) infoBuffer.close() generatePairingsPage() outputStream = StringIO.StringIO() output.write(outputStream) pdfOut = outputStream.getvalue() outputStream.close() fileName = color + " pairings.pdf" response = make_response(pdfOut) response.headers['Content-Disposition'] = "attachment; filename=" + fileName response.mimetype = 'application/pdf' return response
#Match Attribute Extraction txt with associated PDF file MatchingPDF=filter(lambda x: AttFiletemp in x, FilesPDF) # Open Attribute Extraction txt AttFilePath=os.path.join(hyperlink,AttFile) AttFilePathtemp=open(AttFilePath,'r') Attributes=[] for row in AttFilePathtemp: Attributes.append(row.strip().split(',')) # Open PDF file. Because we can't rewrite Metadata per se. # We have to make a copy and rename the modified copy to overwrite original file filename=os.path.join(hyperlink,MatchingPDF[0]) fin=file(filename,'rb') # Open File pdf_in=PdfFileReader(fin) # use PyPDF2 file reader info=pdf_in.getDocumentInfo() # Grab document info from original pdf info=dict(info) # convert to python dictionary writer=PdfFileWriter() # Write new pdf file for page in range(pdf_in.getNumPages()): # Get all the pages in the pdf writer.addPage(pdf_in.getPage(page)) MetadataTitles=['/Subject','/Title','/Keywords'] # Metadata titles found in PDF # Run through list of Metadata tags to replace all with block attributes # Subject AttributesTitle=Attributes[0][0] AttributesTitle=AttributesTitle.replace("'",'') # Eliminate extra quotes info[MetadataTitles[0]]=AttributesTitle # Add new Metadata # Title AttributesTitle=Attributes[0][1]+' '+Attributes[0][2]+' '+Attributes[0][3] AttributesTitle=AttributesTitle.replace("'",'') # Eliminate extra quotes info[MetadataTitles[1]]=AttributesTitle # Add new Metadata # Additional Info. AttributesTitle=Attributes[0][4]+';'+Attributes[0][5]
def fill(self, fname, pagesize, events, topspace, bottomspace, margins): tf = tempfile.NamedTemporaryFile(delete=False) pagesize = (pagesize[0] / 2 - 6, pagesize[1]) doc = BaseDocTemplate(tf.name, pagesize=pagesize, leftMargin=margins, bottomMargin=bottomspace, rightMargin=margins, topMargin=topspace) column = Frame(doc.leftMargin+6, doc.bottomMargin+0.5*inch, doc.width-6, 3.3*inch) rsvp = Frame(doc.leftMargin+6, doc.bottomMargin, doc.width-6, 0.5*inch) doc.addPageTemplates(PageTemplate(frames=[rsvp, column])) # render one side story = [] story.append(Paragraph("Please RSVP at map.berniesanders.com", styles["default"])) story.append(FrameBreak()) for e in events: story.append(Event(e).render()) doc.build(story) # now duplicate for 2-up src = PdfFileReader(open(tf.name, "rb")) out = PdfFileWriter() lhs = src.getPage(0) lhs.mergeTranslatedPage(lhs, lhs.mediaBox.getUpperRight_x(), 0, True) out.addPage(lhs) with open(fname.name, "wb") as outfile: out.write(outfile) os.remove(tf.name)