def getannots(pdfannots, pageno, fh): global index annots = [] input1 = PdfFileReader(fh) output = PdfFileWriter() targetPage = input1.getPage(pageno) newpath = "./images/" for pa in pdfannots: # print(pa) subtype = pa.get('Subtype') if subtype is not None and subtype.name not in ANNOT_SUBTYPES: continue print(subtype) if (subtype.name == "Ink" or subtype.name == "Square"): print("yes") print(type(pa.get('Rect'))) coord = pa.get('Rect') targetPage.cropBox.lowerLeft = (coord[0], coord[1]) targetPage.trimBox.lowerLeft = (coord[0], coord[1]) targetPage.mediaBox.lowerLeft = (coord[0], coord[1]) targetPage.cropBox.upperRight = (coord[2], coord[3]) targetPage.trimBox.upperRight = (coord[2], coord[3]) targetPage.mediaBox.upperRight = (coord[2], coord[3]) pdf_bytes = io.BytesIO() output.addPage(targetPage) output.write(pdf_bytes) pdf_bytes.seek(0) img = Image(file=pdf_bytes, resolution=300) img.convert("png") if not os.path.exists(newpath): os.makedirs(newpath) img.save(filename=newpath + str(index) + ".png") colour = pa.get('C') contents = pa.get('Contents') def getcolour(colour): if (colour == [1.0, 0.90196, 0.0]): return "yellow" elif (colour == [0.26667, 0.78431, 0.96078]): return "blue" elif (colour == [0.92549, 0.0, 0.54902]): return "pink" elif (colour == [0.90196, 0.10588, 0.10588]): return "red" else: return "none" if contents is not None: contents = str(contents, 'iso8859-15') #'utf-8' contents = contents.replace('\r\n', '\n').replace('\r', '\n') a = Annotation(index, pageno, subtype.name.lower(), pa.get('QuadPoints'), pa.get('Rect'), contents, getcolour(colour)) annots.append(a) index += 1 return annots
def joinpdf(folder=TMPFOLDER,startpage=INDEX,outputname='freecad.pdf'): "creates one pdf file from several others, following order from startpage" if VERBOSE: print ("Building table of contents...") f = open(folder+os.sep+startpage+'.html') html = '' for line in f: html += line f.close() html = html.replace("\n"," ") html = html.replace("> <","><") html = re.findall("<ul.*/ul>",html)[0] pages = re.findall('href="(.*?)"',html) pages.insert(1,startpage+".html") result = PdfFileWriter() for p in pages: if exists(p[:-5]): if VERBOSE: print ('Appending',p) try: inputfile = PdfFileReader(open(folder+os.sep+p[:-5]+'.pdf','rb')) except: print ('Unable to append',p) else: for i in range(inputfile.getNumPages()): result.addPage(inputfile.getPage(i)) outputfile = open(OUTPUTPATH + os.sep + outputname,'wb') result.write(outputfile) outputfile.close() if VERBOSE: print ('Successfully created',OUTPUTPATH,os.sep,outputname)
def split_chapters(*t_args): """ Split a large pdf into chunks (i.e. chapters) """ if len(t_args)>0: args=t_args[0] if len(args)<1: print "usage: utils_pdf split_chapters configfile" return from pyPdf import PdfFileWriter, PdfFileReader f = open(args[0]) P = json.loads(f.read()) f.close() input = PdfFileReader(file(P["source"], "rb")) i0 = P["first_chapter_index"] ends = P["chapters_ends"] for i in xrange(0, len(ends)): ch_num = i0+i fmt = P["chapter_fmt"] % (ch_num, ) output = PdfFileWriter() if not os.path.exists(P["outputdir"]): os.mkdir( P["outputdir"]) fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt) j0 = P["firstpage"] if i==0 else ends[i-1] for j in xrange(j0, ends[i]): output.addPage(input.getPage(j)) outputStream = file(fn_out, "wb") output.write(outputStream) outputStream.close() print "wrote %s" % (fn_out,)
def split_file(f, filename): """Split our file into 10-page sub-files and add those to the queue in order. """ global file_queue curr_page = 0 pages_left = f.getNumPages() log('Splitting file ' + filename + " with " + str(pages_left) + " pages.") while pages_left > 0: # Create the new file pages_processed = 0 fname = filename[:-4] + '_' + str(curr_page) + '.pdf' output = PdfFileWriter() # Get 10 pages for it for i in range(curr_page, 10+curr_page): if pages_processed >= pages_left: break pages_processed += 1 output.addPage(f.getPage(i)) # Write and save file fout = file(fname, 'wb') output.write(fout) fout.flush() fout.close() file_queue.append(fname) curr_page += pages_processed pages_left -= pages_processed # Delete the file now that it's in pieces' os.remove(filename)
def cat(infilenames, outputfilename, verbose): inputs = [] for infilename in infilenames: print infilename if not os.path.exists(infilename): halp() print ("error: "+infilename+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+outputfilename+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... try: for i in infilenames: inputs.append(PdfFileReader(file(i, "rb"))) except: halp() sys.exit(2) # pdf file is no pdf file... i = 0 output = PdfFileWriter() for pdf in inputs: for pagenr in range(pdf.getNumPages()): output.addPage(pdf.getPage(pagenr)) i=i+1 outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() if verbose: print (str(i)+" pages processed")
def concatenate_pdf(self,book_title): fileList = os.listdir(os.getcwd()) num_chapters=0 for i in range(1,40): if not fileList.__contains__(book_title+str(i)+".pdf"): num_chapters= i-1 print "numero capitulos"+str(num_chapters) break print"Uniendo pfs..." output = PdfFileWriter() for i in range (1,num_chapters): f=open(book_title+str(i)+".pdf", "rb") num_pages=PdfFileReader(f).getNumPages() if num_pages==0: pdfOne = PdfFileReader(f).getPage(0) output.addPage(pdfOne) else: for a in range (0,num_pages): pdfOne = PdfFileReader(f).getPage(a) output.addPage(pdfOne) outputStream = file(r""+book_title+".pdf", "wb") output.write(outputStream) outputStream.close() print"Union finalizada" for i in range(1,num_chapters+1): print "borrando... capitulo: "+str(i) os.remove(book_title+str(i)+".pdf")
def showpdf(request): sign = os.path.join(settings.MEDIA_ROOT, "signature.png") mimetypes.init() response = None if 'f' in request.GET: fr = open(os.path.join(settings.MEDIA_ROOT,'pdffiles','extracted','%s' % request.GET['f']), "rb") imgTemp = StringIO() imgDoc = canvas.Canvas(imgTemp) if request.GET['o'] == 'l': imgDoc.drawImage(sign, 529, 40, 290/2, 154/2) else: imgDoc.drawImage(sign, 70, 40, 290/2, 154/2) imgDoc.save() overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0) page = PdfFileReader(fr).getPage(0) page.mergePage(overlay) pdf_out = PdfFileWriter() pdf_out.addPage(page) response = HttpResponse(mimetype='application/pdf') response['Content-Disposition'] = 'attachment; filename=%s' % request.GET['f'] pdf_out.write(response) return response
def applica_firma(file_firma, pdf_file): # Using ReportLab to insert image into PDF imgTemp = StringIO() imgDoc = canvas.Canvas(imgTemp) buff = 50 # Draw image on Canvas and save PDF in buffer imgPath = file_firma imgDoc.drawImage(imgPath, 200, 190 - buff, 200, 75) ## at (399,760) with size 160x160 p = imgDoc.beginPath() p.moveTo(200, 210 - buff) p.lineTo(400, 210 - buff) imgDoc.drawPath(p, stroke=1, fill=1) imgDoc.setFont("Helvetica", 8) imgDoc.drawString(260, 195 - buff, "(Firma del Richiedente)") imgDoc.save() # Use PyPDF to merge the image-PDF into the template page = PdfFileReader(file(pdf_file, "rb")).getPage(0) overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0) page.mergePage(overlay) #Save the result output = PdfFileWriter() output.addPage(page) output.write(file(pdf_file, "w"))
def add_omr_marks(self, pdf_data, is_latest_document): # Documentation # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf # https://pythonhosted.org/PyPDF2/PdfFileReader.html # https://stackoverflow.com/a/17538003 # https://gist.github.com/kzim44/5023021 # https://www.blog.pythonlibrary.org/2013/07/16/ # pypdf-how-to-write-a-pdf-to-memory/ self.ensure_one() pdf_buffer = StringIO.StringIO() pdf_buffer.write(pdf_data) existing_pdf = PdfFileReader(pdf_buffer) output = PdfFileWriter() total_pages = existing_pdf.getNumPages() # print latest omr mark on latest pair page (recto) latest_omr_page = total_pages // 2 for page_number in range(total_pages): page = existing_pdf.getPage(page_number) # only print omr marks on pair pages (recto) if page_number % 2 is 0: is_latest_page = is_latest_document and \ page_number == latest_omr_page marks = self._compute_marks(is_latest_page) omr_layer = self._build_omr_layer(marks) page.mergePage(omr_layer) output.addPage(page) out_buffer = StringIO.StringIO() output.write(out_buffer) return out_buffer.getvalue()
def delete(filesandranges, outputfilename, verbose): for i in range(len(filesandranges)): if not os.path.exists(filesandranges[i]['name']): halp() print("error: " + filesandranges[i]['name'] + " does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print("error: " + filesandranges[i]['name'] + " does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... output = PdfFileWriter() try: for pdf in filesandranges: print(pdf["name"]) fiel = PdfFileReader(file(pdf["name"], "rb")) for pagenr in range(1, fiel.getNumPages() + 1): if (pagenr not in pdf["pages"]): output.addPage(fiel.getPage(pagenr - 1)) # else: # print ("skipping page nr: "+str(pagenr)) except: halp() sys.exit(2) # pdf file is no pdf file... if (not os.path.exists(outputfilename)): outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() else: print("file exists, discontinuing operation")
def cat(infilenames, outputfilename, verbose): inputs = [] for infilename in infilenames: print infilename if not os.path.exists(infilename): halp() print("error: " + infilename + " does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print("error: " + outputfilename + " does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... try: for i in infilenames: inputs.append(PdfFileReader(file(i, "rb"))) except: halp() sys.exit(2) # pdf file is no pdf file... i = 0 output = PdfFileWriter() for pdf in inputs: for pagenr in range(pdf.getNumPages()): output.addPage(pdf.getPage(pagenr)) i = i + 1 outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() if verbose: print(str(i) + " pages processed")
def split(files, verbose): for infilename in files: if not os.path.exists(infilename): halp() print("error: " + infilename + " does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... inputs = [] try: for i in files: inputs.append(PdfFileReader(file(i, "rb"))) except: halp() print("there has been an error of unfortunate proportions") sys.exit(2) # pdf file is no pdf file... i = 0 j = 0 for pdf in inputs: for pagenr in range(pdf.getNumPages()): output = PdfFileWriter() output.addPage(pdf.getPage(pagenr)) (name, ext) = splitext(files[i]) my_str = "%0" + str(math.ceil(math.log10(pdf.getNumPages()))) + "d" my_str = my_str % (pagenr + 1) print(name + "p" + my_str + ext) outputStream = file(name + "p" + my_str + ext, "wb") output.write(outputStream) outputStream.close() j = j + 1 i = i + 1 if verbose: print(str(j) + " pages in " + str(i) + " files processed")
def run(self): """ Run the report """ self.doc = SimpleDocTemplate("test.pdf") self.story = [Spacer(1, 1*inch)] self.createLineItems() self.doc.build(self.story, onFirstPage=self.first_page, onLaterPages=self.later_page) print "finished!" with open("test.pdf", "rb") as f: print "merginig" new_pdf = PdfFileReader(f) existing_pdf = PdfFileReader(file("report_template.pdf", "rb")) output = PdfFileWriter() page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) for page in range(new_pdf.getNumPages()-1): output.addPage(new_pdf.getPage(page+1)) outputStream = file("final_report.pdf", "wb") output.write(outputStream) outputStream.close()
def uploadFile(self): '''Store pdf in package, gets sides from pdf, if self.sides isn't empty ''' filePath = self.path log.debug(u"uploadFile " + unicode(filePath)) if not self.parentNode or not self.parentNode.package: log.error('something is wrong with the file') ## replace all non-digits and non-usefull stuff with '' self.pages = sub('[^\d,-]', '', self.pages) if self.pages != "": input = PdfFileReader(file(filePath, "rb")) lastPage = input.getNumPages() - 1 # last page toimport = PdfIdevice.__parseImportPages(self.pages, lastPage) log.debug("Parsed pages: " + str(toimport)) output = PdfFileWriter() for page in toimport: output.addPage(input.getPage(page)) log.debug("Found pages to import %s" % toimport) tmp = os.tmpnam() + ".pdf" log.debug('Tempfile is %s' % tmp) outputStream = file(tmp, "wb") output.write(outputStream) outputStream.close() resourceFile = Path(tmp) self.file = Resource(self, resourceFile) log.debug("Uploaded %s, pages: %s" % (tmp, toimport)) os.remove(tmp) filePath = tmp resourceFile = Path(filePath) if resourceFile.isfile(): self.file = Resource(self, resourceFile) log.debug(u"uploaded " + self.path)
def into_half(src, dst): _src = file(src, 'rb') _dst = file(dst, 'wb') input = PdfFileReader(_src) output = PdfFileWriter() for i in range(input.getNumPages()): p = input.getPage(i) q = copy.copy(p) q.mediaBox = copy.copy(p.mediaBox) #x1, x2 = p.mediaBox.lowerLeft #x3, x4 = p.mediaBox.upperRight (w, h) = p.mediaBox.upperRight print w, h p.mediaBox.upperRight = (w/2, h) q.mediaBox.upperLeft = (w/2, h) output.addPage(p) output.addPage(q) output.write(_dst) _src.close() _dst.close()
def concatenate_pdfs(output_fn, input_fns): from pyPdf import PdfFileWriter, PdfFileReader outfile = PdfFileWriter() for fn in input_fns: infile = PdfFileReader(open(fn, 'rb')) outfile.addPage(infile.getPage(0)) outfile.write(open(output_fn, "wb"))
def renderToPdf(envLL, filename, sizex, sizey): """Renders the specified Box2d and zoom level as a PDF""" basefilename = os.path.splitext(filename)[0] mergedpdf = None for mapname in MAPNIK_LAYERS: print 'Rendering', mapname # Render layer PDF. localfilename = basefilename + '_' + mapname + '.pdf'; file = open(localfilename, 'wb') surface = cairo.PDFSurface(file.name, sizex, sizey) envMerc = LLToMerc(envLL) map = mapnik.Map(sizex, sizey) mapnik.load_map(map, mapname + ".xml") map.zoom_to_box(envMerc) mapnik.render(map, surface) surface.finish() file.close() # Merge with master. if not mergedpdf: mergedpdf = PdfFileWriter() localpdf = PdfFileReader(open(localfilename, "rb")) page = localpdf.getPage(0) mergedpdf.addPage(page) else: localpdf = PdfFileReader(open(localfilename, "rb")) page.mergePage(localpdf.getPage(0)) output = open(filename, 'wb') mergedpdf.write(output) output.close()
def split(file_name): input1 = PdfFileReader(file(file_name, "rb")) output = PdfFileWriter() numPages = input1.getNumPages() print "document has %s pages." % numPages for i in range(numPages): page1 = input1.getPage(i) page2 = copy.copy(page1) w = page1.mediaBox.getUpperRight_y() h = page1.mediaBox.getUpperRight_x() #The width and height are weird page1.cropBox.lowerLeft = (0, 0) page1.cropBox.upperRight = (h, w/2) page2.cropBox.lowerLeft = ( 0,w/2) page2.cropBox.upperRight = (h, w) output.addPage(page1) output.addPage(page2) outputStream = file("out.pdf", "wb") output.write(outputStream) outputStream.close() print 'Finished'
def add_guides(self): pdf_in = PdfFileReader(open('sig.pdf', 'rb')) pdf_out = PdfFileWriter() for i in xrange(pdf_in.getNumPages()): page = pdf_in.getPage(i) if not i: guides = StringIO() if self.args.longarm: create_pdf( guides, a4lwidth_pt, a4lheight_pt, generate_longarm()) else: if self.args.a5: w, h = a5width_pt, a5height_pt else: w, h = a4lwidth_pt, a4lheight_pt create_pdf(guides, w, h, generate_shortarm( self.args.a5, bool(self.args.signature))) pdf_guides = PdfFileReader(guides) page.mergePage(pdf_guides.getPage(0)) pdf_out.addPage(page) pdf_out.write(open('sigs.pdf', 'wb'))
def joinpdf(folder=TMPFOLDER, startpage=INDEX, outputname='freecad.pdf'): "creates one pdf file from several others, following order from startpage" if VERBOSE: print("Building table of contents...") f = open(folder + os.sep + startpage + '.html') html = '' for line in f: html += line f.close() html = html.replace("\n", " ") html = html.replace("> <", "><") html = re.findall("<ul.*/ul>", html)[0] pages = re.findall('href="(.*?)"', html) pages.insert(1, startpage + ".html") result = PdfFileWriter() for p in pages: if exists(p[:-5]): if VERBOSE: print('Appending', p) try: inputfile = PdfFileReader( file(folder + os.sep + p[:-5] + '.pdf', 'rb')) except: print('Unable to append', p) else: for i in range(inputfile.getNumPages()): result.addPage(inputfile.getPage(i)) outputfile = file(OUTPUTPATH + os.sep + outputname, 'wb') result.write(outputfile) outputfile.close() if VERBOSE: print('Successfully created', OUTPUTPATH, os.sep, outputname)
def pdf(coursesid,examsid): ''' Creates a blank PDF of this exam ''' # TODO: Obviously fix this up to generate actual PDFs; this is just a proof of concept from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from pyPdf import PdfFileWriter, PdfFileReader from io import BytesIO output = BytesIO() p = canvas.Canvas(output, pagesize=letter) p.drawString(100, 100, 'Hello') p.save() output.seek(0) new_pdf = PdfFileReader(output) existing_pdf = PdfFileReader(open('/home/treece/src/web/bubbleck/res/Template.pdf', 'rb')) out = PdfFileWriter() page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) out.addPage(page) a = BytesIO() pdf_out = out.write(a) response = make_response(pdf_out) response.headers['Content-Disposition'] = "filename='sakulaci.pdf" response.mimetype = 'application/pdf' return response
def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None): if not context: context = {} pool = pooler.get_pool(cr.dbname) attach = report_xml.attachment if attach: objs = self.getObjects(cr, uid, ids, context) results = [] for obj in objs: aname = eval(attach, {'object': obj, 'time': time}) result = False if report_xml.attachment_use and aname and context.get( 'attachment_use', True): aids = pool.get('ir.attachment').search( cr, uid, [('datas_fname', '=', aname + '.pdf'), ('res_model', '=', self.table), ('res_id', '=', obj.id)]) if aids: brow_rec = pool.get('ir.attachment').browse( cr, uid, aids[0]) if not brow_rec.datas: continue d = base64.decodestring(brow_rec.datas) results.append((d, 'pdf')) continue result = self.create_single_pdf(cr, uid, [obj.id], data, report_xml, context) if not result: return False if aname: try: name = aname + '.' + result[1] pool.get('ir.attachment').create( cr, uid, { 'name': aname, 'datas': base64.encodestring(result[0]), 'datas_fname': name, 'res_model': self.table, 'res_id': obj.id, }, context=context) except Exception: #TODO: should probably raise a proper osv_except instead, shouldn't we? see LP bug #325632 logging.getLogger('report').error( 'Could not create saved report attachment', exc_info=True) results.append(result) if results: if results[0][1] == 'pdf': from pyPdf import PdfFileWriter, PdfFileReader output = PdfFileWriter() for r in results: reader = PdfFileReader(cStringIO.StringIO(r[0])) for page in range(reader.getNumPages()): output.addPage(reader.getPage(page)) s = cStringIO.StringIO() output.write(s) return s.getvalue(), results[0][1] return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
def select(filesandranges, outputfilename, verbose): if verbose: print (str(filesandranges)+"\noutput: "+str(outputfilename)) for i in range(len(filesandranges)): if not os.path.exists(filesandranges[i]['name']): halp() print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... output = PdfFileWriter() try: for pdf in filesandranges: fiel = PdfFileReader(file(pdf["name"], "rb")) for pagenr in pdf["pages"]: if (not (pagenr > fiel.getNumPages()) and not(pagenr < 1)): output.addPage(fiel.getPage(pagenr-1)) else: print("one or more pages are not in the chosen PDF") halp() sys.exit(3) #wrong pages or ranges except: halp() sys.exit(2) # pdf file is no pdf file...h if (not os.path.exists(outputfilename)): outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() else: print ("file exists, discontinuing operation")
def editPDF(filename): """ function to add metadata to pdf files""" INPUT = filename OUTPUT = filename[:-4] + '_updated.pdf' output = PdfFileWriter() fin = file(INPUT, 'rb') pdf_in = PdfFileReader(fin) infoDict = output._info.getObject() ########################################################### # I've added random tags here, use what needs to be added # # # ########################################################### infoDict.update({ NameObject('/Tags'): createStringObject(tag_dict[filename]), NameObject('/Keywords'): createStringObject(tag_dict[filename]) }) for page in range(pdf_in.getNumPages()): output.addPage(pdf_in.getPage(page)) outputStream = file(os.path.join(directory, OUTPUT), 'wb') output.write(outputStream) fin.close() outputStream.close()
def __call__(self, data, attachments=[], pages=None): self.rendered = {} for field, ctx in self.fields.items(): if "template" not in ctx: continue self.context = ctx kwargs = self.template_args(data) template = self.context["template"] try: rendered_field = template.render(**kwargs) except Exception as err: logger.error("%s: %s %s", field, template, err) else: # Skip the field if it is already rendered by filter if field not in self.rendered: self.rendered[field] = rendered_field filled = PdfFileReader(self.exec_pdftk(self.rendered)) for pagenumber, watermark in self.watermarks: page = filled.getPage(pagenumber) page.mergePage(watermark) output = PdfFileWriter() pages = pages or xrange(filled.getNumPages()) for p in pages: output.addPage(filled.getPage(p)) for attachment in attachments: output.addBlankPage().mergePage(attachment.pdf()) return output
def addPdfOverlay(self, pdf_doc, overlay_doc, output_doc, repeatOverlay=False): ''' Essentially merging two PDF documents. pdf_doc: (string) Path to PDF document. overlay_doc: (string) Path to PDF overlay document to overlay pdf_doc. repeatOverlay: (boolean) If set to True, page 1 of the overlay document is repeated for each page of the pdf_doc. (default: False) ''' pdf = PdfFileReader(file(pdf_doc, "rb")) pdf_overlay = PdfFileReader(file(overlay_doc, "rb")) page_cnt = pdf.numPages if repeatOverlay: overlay_pages = [pdf_overlay.getPage(0) for n in range(page_cnt)] else: overlay_pages = pdf_overlay.pages outputWriter = PdfFileWriter() for n in range(page_cnt): pg = pdf.getPage(n) pg.mergePage(overlay_pages[n]) outputWriter.addPage(pg) # Output outputStream = file(output_doc, "wb") outputWriter.write(outputStream) # Close streams outputStream.close() pdf.stream.close() pdf_overlay.stream.close()
def parse_file(pdfFile,nameFile): pdfReader = PdfFileReader(file(pdfFile,"rb")) # read the names and emails from csv file names = get_names(nameFile) # create an instance in SMTP server smtp = smtplib.SMTP('localhost') # loop through the pages of the pdf # when a name is found, write pages to a new pdf until next name is found # then write the file and email as attachment i = 0 prevName = "" while i<pdfReader.getNumPages(): page = pdfReader.getPage(i) pageStr = page.extractText() # extract the pdf text for name in names.keys(): if pageStr.lower().find(name.lower())!=-1: if 'pdfWriter' in locals(): # send the current pdf send_email(smtp,pdfWriter,prevName,names) pdfWriter = PdfFileWriter() # create new pdfWriter file and add current page prevName = name # save off previous name break if 'pdfWriter' in locals(): pdfWriter.addPage(page) i+=1 # send the last file if 'pdfWriter' in locals(): send_email(smtp,pdfWriter,prevName,names) # quit the smtp server smtp.quit()
def appendDocuments(self, pdf_docs, output_doc): ''' Append PDF documents together. pdf_docs: (list) List of PDF document paths. output_doc: (string) Path to the outputed PDF document. ''' try: outputWriter = PdfFileWriter() pdf_readers = [] for doc in pdf_docs: # Need to add new PdfFileReader objects to # list so stream can be closed after the loop. pdf_readers.append(PdfFileReader(file(doc , "rb"))) for pg in pdf_readers[-1].pages: outputWriter.addPage(pg) # Output outputStream = file(output_doc, "wb") outputWriter.write(outputStream) outputStream.close() for pdf_reader in pdf_readers: pdf_reader.stream.close() return True except: return False
def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}): packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont(font['name'], font['size']) for i in context: can.drawString(i['x'], i['y'], i['value']) can.save() # move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file(self.path, "rb")) output = PdfFileWriter() # merge the new file with the existing page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) # finally, write "output" to a real file outputStream = file(self.destination, "wb") output.write(outputStream) outputStream.close() return True
def replicatePage(self, pdf_doc, count=1, pageNumber=1): ''' Replicate a page in a document, appends replicated page to the end of the document. pdf_doc: (string) Path to PDF document. count: (integer) Number of times to replicate page. (default 1) pageNumber: (integer) Page number to replicate. (default 1) ''' pdf_reader = PdfFileReader(file(pdf_doc, "rb")) page = pdf.getPage(pageNumber-1) pdf_dir = os.path.dirname(pdf_doc) unique_filename = self.__uniqueName() outputWriter = PdfFileWriter() # Copy oringal pages to new document. for pg in pdf_reader.pages: outputWriter.addPage(pg) # Added replicated pages. for n in range(count): outputWriter.addPage(page) # Output temp_file = os.path.join(pdf_dir, unique_filename+".pdf") outputStream = file(temp_file, "wb") outputWriter.write(outputStream) outputStream.close() pdf_reader.stream.close() shutil.move(temp_file, pdf_doc)
def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None): flag=False if not context: context={} pool = pooler.get_pool(cr.dbname) attach = report_xml.attachment #~ #~ Check in the new model if this report allow to reprint, #~ Allowtoreprint should mandate over attach, if attach: objs = self.getObjects(cr, uid, ids, context) results = [] for obj in objs: aname = eval(attach, {'object':obj, 'time':time}) result = False if report_xml.attachment_use and aname and context.get('attachment_use', True): aids = pool.get('ir.attachment').search(cr, uid, [('datas_fname','=',aname+'.pdf'),('res_model','=',self.table),('res_id','=',obj.id)]) if aids: brow_rec = pool.get('ir.attachment').browse(cr, uid, aids[0]) if not brow_rec.datas: continue d = base64.decodestring(brow_rec.datas) results.append((d,'pdf')) continue result = self.create_single_pdf(cr, uid, [obj.id], data, report_xml, context) if not result: return False try: if aname: flag=True #ya que entra solo la primera vez sin attachment name = aname+'.'+result[1] pool.get('ir.attachment').create(cr, uid, { 'name': aname, 'datas': base64.encodestring(result[0]), 'datas_fname': name, 'res_model': self.table, 'res_id': obj.id, }, context=context ) cr.commit() except Exception,e: import traceback, sys tb_s = reduce(lambda x, y: x+y, traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) netsvc.Logger().notifyChannel('report', netsvc.LOG_ERROR,str(e)) results.append(result) if results: if results[0][1]=='pdf': if not context.get('allow',False): return self.create_single_pdf(cr, uid, ids, data, report_xml, context) else: from pyPdf import PdfFileWriter, PdfFileReader output = PdfFileWriter() for r in results: reader = PdfFileReader(cStringIO.StringIO(r[0])) for page in range(reader.getNumPages()): output.addPage(reader.getPage(page)) s = cStringIO.StringIO() output.write(s) return s.getvalue(), results[0][1]
def showpdf(request): sign = os.path.join(settings.MEDIA_ROOT, "signature.png") mimetypes.init() response = None if 'f' in request.GET: fr = open( os.path.join(settings.MEDIA_ROOT, 'pdffiles', 'extracted', '%s' % request.GET['f']), "rb") imgTemp = StringIO() imgDoc = canvas.Canvas(imgTemp) if request.GET['o'] == 'l': imgDoc.drawImage(sign, 529, 40, 290 / 2, 154 / 2) else: imgDoc.drawImage(sign, 70, 40, 290 / 2, 154 / 2) imgDoc.save() overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0) page = PdfFileReader(fr).getPage(0) page.mergePage(overlay) pdf_out = PdfFileWriter() pdf_out.addPage(page) response = HttpResponse(mimetype='application/pdf') response[ 'Content-Disposition'] = 'attachment; filename=%s' % request.GET[ 'f'] pdf_out.write(response) return response
def join_pages(composites): # latex_buf = StringIO() page_fnames = [] for page_num, collection in enumerate(collect_pages(composites)): fnames, transcriptions, types = [], [], [] for r in collection: fnames.append(r['location']) transcriptions.append(r['transcription']) types.append(r['type']) page_fnames.append(paint_original_segments(fnames, transcriptions, page_num)) # latex_buf.write(assemble_latex(fnames, transcriptions, types)) # latex_buf.write(LATEX_NEWPAGE_SNIPPET) # raw_latex = LATEX_WRAP.format(raw_latex=latex_buf.getvalue(), font_size=LATEX_FONT_SIZE) # # transcribed pdf # latex_pdf_fname = latex_to_pdf(raw_latex) # --- # searchable pdf pdf_writer = PdfFileWriter() pdf_pages = [] for page_fname in page_fnames: pdf_pages.append(open(page_fname, 'rb')) pdf_reader = PdfFileReader(pdf_pages[-1]) pdf_writer.addPage(pdf_reader.getPage(0)) searchable_pdf = NamedTemporaryFile(prefix='searchable_', suffix='.pdf', dir=path.abspath('./static/images/'), delete=False) pdf_writer.write(searchable_pdf) searchable_pdf.close() map(lambda f: f.close(), pdf_pages) json.dump({ # 'transcribed': latex_pdf_fname, 'searchable': searchable_pdf.name }, sys.stdout)
def receipts_view(request): customer = request.META.get('customer', None) employer = request.user.recruiter.employer charges = stripe.Charge.all(count=100, customer = customer.id).data pdf_name = "Umeqo %s Charges.pdf" % (employer) path = "%semployer/receipts/" % (s.MEDIA_ROOT) pdf_path = "%s%s" % (path, pdf_name) output = PdfFileWriter() for charge in charges: try: invoice = stripe.Invoice.retrieve(charge.invoice) except InvalidRequestError as e: pass receipt_path = get_or_create_receipt_pdf(charge, invoice, employer.name) receipt_file = open(receipt_path, "rb") output.addPage(PdfFileReader(receipt_file).getPage(0)) if not os.path.exists(path): os.makedirs(path) outputStream = file(pdf_path, "wb") output.write(outputStream) outputStream.close() receipt_file.close() mimetype = "application/pdf" response = HttpResponse(file(pdf_path, "rb").read(), mimetype=mimetype) response["Content-Disposition"] = 'inline; filename="%s"' % pdf_name return response
def setMetadata(self, metadata): """Returns a document with new metadata. Keyword arguments: metadata -- expected an dictionary with metadata. """ # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate input_pdf = PdfFileReader(open(self.document.getUrl(), "rb")) output_pdf = PdfFileWriter() modification_date = metadata.pop("ModificationDate", None) if modification_date: metadata['ModDate'] = modification_date if type(metadata.get('Keywords', None)) is list: metadata['Keywords'] = metadata['Keywords'].join(' ') args = {} for key, value in list(metadata.items()): args[NameObject('/' + key.capitalize())] = createStringObject(value) output_pdf._info.getObject().update(args) for page_num in range(input_pdf.getNumPages()): output_pdf.addPage(input_pdf.getPage(page_num)) output_stream = io.BytesIO() output_pdf.write(output_stream) return output_stream.getvalue()
def split_file(f, filename): """Split our file into 10-page sub-files and add those to the queue in order. """ global file_queue curr_page = 0 pages_left = f.getNumPages() log('Splitting file ' + filename + " with " + str(pages_left) + " pages.") while pages_left > 0: # Create the new file pages_processed = 0 fname = filename[:-4] + '_' + str(curr_page) + '.pdf' output = PdfFileWriter() # Get 10 pages for it for i in range(curr_page, 10 + curr_page): if pages_processed >= pages_left: break pages_processed += 1 output.addPage(f.getPage(i)) # Write and save file fout = file(fname, 'wb') output.write(fout) fout.flush() fout.close() file_queue.append(fname) curr_page += pages_processed pages_left -= pages_processed # Delete the file now that it's in pieces' os.remove(filename)
def generate(donor): os.system('mkdir -p output') donor_url = donor.replace(' ','%20') page1 = 'output/%s1' % (donor.replace(' ','-').lower()) page2 = 'output/%s2' % (donor.replace(' ','-').lower()) combined = 'output/%s.pdf' % (donor.replace(' ','-').lower()) if os.path.exists(combined): return os.system('cp "%s" "%s.svg"' % (page1_svg, page1)) os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1)) os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1)) os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1)) os.system('cp "%s" "%s.svg"' % (page2_svg, page2)) os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2)) os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2)) os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2)) # Merge pages input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb')) input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb')) output = PdfFileWriter() output.addPage(input1.getPage(0)) output.addPage(input2.getPage(0)) outputStream = file(combined, 'wb') output.write(outputStream) outputStream.close() sleep(2)
def build_output_pdf(self, output_stream): # Move to the beginning of the StringIO buffer # And initialize a PDF file reader to read that in # as source asset PDF to be merged into the original self._modification_stream.seek(0) source_pdf = PdfFileReader(self._modification_stream) # Now read in the destination/original PDF as the merge target self._original_pdf_stream.seek(0) original_pdf = self._get_pdf_reader(self._original_pdf_stream) # Now create the output PDF as the merge result holder output_pdf = PdfFileWriter() # Enumerate through the list of pages from the original PDF # * Merge the specified page, and # * For other pages, simply add them as is to the new PDF for page_index in range(0, original_pdf.numPages): page = original_pdf.getPage(page_index) if (page_index < source_pdf.numPages): page.mergePage(source_pdf.getPage(page_index)) output_pdf.addPage(page) # If the modification doc has more pages than the original # also just append them to the resultant document for page_index in range(0, source_pdf.numPages): if (page_index >= original_pdf.numPages): page = source_pdf.getPage(page_index) output_pdf.addPage(page) # Finally, write the result PDF to the given output stream output_pdf.write(output_stream)
def output(self): # get the output filename using the file dialog (out_filename, filter) = \ QFileDialog.getSaveFileName(parent = self, caption = self.tr(u'Export'), dir = '', filter = self.tr('pdf (*.pdf)')) # file IO out_file = open(out_filename, 'wb') in_file = open(self.in_filename, 'rb') in_reader = PdfFileReader(in_file) out_writer = PdfFileWriter() # extract input pages_string = self.pages_line_edit.text() # Get the indices of pages to extract pages = pages_parser(in_reader.getNumPages()).parse(pages_string) # append pages to output writer for page_index in pages: out_writer.addPage(in_reader.getPage(page_index)) # write to file out_writer.write(out_file) # close files in_file.close() out_file.close()
def renderToPdf(envLL, filename, sizex, sizey): """Renders the specified Box2d and zoom level as a PDF""" basefilename = os.path.splitext(filename)[0] mergedpdf = None for mapname in MAPNIK_LAYERS: print 'Rendering', mapname # Render layer PDF. localfilename = basefilename + '_' + mapname + '.pdf' file = open(localfilename, 'wb') surface = cairo.PDFSurface(file.name, sizex, sizey) envMerc = LLToMerc(envLL) map = mapnik.Map(sizex, sizey) mapnik.load_map(map, mapname + ".xml") map.zoom_to_box(envMerc) mapnik.render(map, surface) surface.finish() file.close() # Merge with master. if not mergedpdf: mergedpdf = PdfFileWriter() localpdf = PdfFileReader(open(localfilename, "rb")) page = localpdf.getPage(0) mergedpdf.addPage(page) else: localpdf = PdfFileReader(open(localfilename, "rb")) page.mergePage(localpdf.getPage(0)) output = open(filename, 'wb') mergedpdf.write(output) output.close()
def watermark( self, pdfStr, watermarkFile, spec ): # Read the watermark- and document pdf file inputWatermark = PdfFileReader( file( watermarkFile, "rb" ) ) generatedPdf = PdfFileReader( pdfStr ) outputPdf = PdfFileWriter() # flag for the first page of the source file firstPage = True # Loop over source document pages and merge with the first page of the watermark # file. watermarkPage = inputWatermark.getPage(0) for page in generatedPdf.pages: if (spec == Mark.FIRST_PAGE and firstPage) or spec == Mark.ALL_PAGES: # deep copy the watermark page here, otherwise the watermark page # gets merged over and over because p would only be a reference p = copy.copy( watermarkPage ) p.mergePage( page ) outputPdf.addPage( p ) firstPage = False else: outputPdf.addPage(page) if self.outputFile: # Write to outputfile outputStream = file( self.outputFile, "wb" ) outputPdf.write( outputStream ) outputStream.close() return self.outputFile else: stringIO = StringIO.StringIO(); outputPdf.write( stringIO ) return stringIO.getvalue()
def scalePDF(inputFile,pageNumber,zoomFactor): #print "entered scalepdf" #print "SCALING PDF TO INCREASE IMAGE QUALITY FOR TESSERACT" #print "---------------------------------------------------" #Proper indexing pageNumber=pageNumber-1 #Generate output filename (Puts everything in its own directory) outputDirectory=inputFile[:inputFile.rindex('.')]#+inputFile[inputFile.rindex('/'):inputFile.rindex('.')]+inputFile[inputFile.rindex('/'):inputFile.rindex('.')] #print outputDirectory if not os.path.exists(outputDirectory): os.makedirs(outputDirectory) outputFile=inputFile[:inputFile.rindex('/')]+inputFile[inputFile.rindex('/'):inputFile.rindex('.')]+inputFile[inputFile.rindex('/'):inputFile.rindex('.')]+'_'+str(pageNumber+1)+'.pdf' #outputFile=inputFile[:inputFile.rindex('.')]+'_'+str(pageNumber+1)+'.pdf' output=PdfFileWriter() input1=PdfFileReader(file(inputFile,"rb")) page = input1.getPage(pageNumber) # I ran into some trouble with scaling a certain page, I # still can't figure out what it was. So I use try here. try: page.scaleBy(zoomFactor) except: print "---PAGE WAS NOT SCALED: "+str(pageNumber+1) #print "---------------------------------------------------" #Add page to output output.addPage(page) #Print just the file name #print "SAVING SCALED PDF AS: "+outputFile[outputFile.rindex('/')+1:] #print "---------------------------------------------------" outputStream = file(outputFile, "wb") output.write(outputStream) outputStream.close() return outputFile
def main(): """ """ # Parse command line pdf_files = sys.argv[1:] if len(pdf_files) == 0: print __usage__ sys.exit() # Make sure there is more than one pdf file if len(pdf_files) == 1: print "In the spirit of gnu tar, this script cowardly refuses to" print "combine one pdf file!" sys.exit() # Create unique name for output file localtime = time.localtime() localtime = [str(x) for x in localtime] localtime = [x.zfill(2) for x in localtime] localtime[0] = localtime[0].zfill(4) output_file = "%s-%s-%s_%s-%s-%s.pdf" % tuple(localtime[:6]) # Combine pdf files in order output = PdfFileWriter() for pdf in pdf_files: input = PdfFileReader(file(pdf,"rb")) num_pages = input.getNumPages() for i in range(num_pages): output.addPage(input.getPage(i)) # Write final pdf stream = file(output_file,"wb") output.write(stream) stream.close()
def pdf_watermark_fast_first_page(self, pathname, Wm_f, wt1='', **kwargs): try: url_watermark = kwargs['url_wtm'] except: pass from pyPdf import PdfFileWriter, PdfFileReader import StringIO from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.drawString(10, 100, url_watermark) can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file(pathname, "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) # finally, write "output" to a real file outputStream = file(Wm_f, "wb") # import sys;sys.setrecursionlimit(11500) output.write(outputStream) outputStream.close() return Wm_f
def split(files, verbose): for infilename in files: if not os.path.exists(infilename): halp() print ("error: "+infilename+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... inputs = [] try: for i in files: inputs.append(PdfFileReader(file(i, "rb"))) except: halp() print ("there has been an error of unfortunate proportions") sys.exit(2) # pdf file is no pdf file... i=0 j=0 for pdf in inputs: for pagenr in range(pdf.getNumPages()): output = PdfFileWriter() output.addPage(pdf.getPage(pagenr)) (name, ext) = splitext(files[i]) my_str = "%0" + str(math.ceil(math.log10(pdf.getNumPages()))) + "d" my_str = my_str % (pagenr+1) print (name+"p"+my_str+ext) outputStream = file(name+"p"+my_str+ext, "wb") output.write(outputStream) outputStream.close() j=j+1 i=i+1 if verbose: print (str(j)+" pages in "+str(i)+" files processed")
def write_pdf(pdf, part_count_ye, part_count_ye_end, fen, output_file): out = PdfFileWriter() for pp in range(part_count_ye, part_count_ye_end): out.addPage(pdf.getPage(pp)) ous = file(output_file+'_'+str(fen+1)+'.pdf', 'wb') out.write(ous) ous.close()
def delete(filesandranges, outputfilename, verbose): for i in range(len(filesandranges)): if not os.path.exists(filesandranges[i]['name']): halp() print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... output = PdfFileWriter() try: for pdf in filesandranges: print (pdf["name"]) fiel = PdfFileReader(file(pdf["name"], "rb")) for pagenr in range(1,fiel.getNumPages()+1): if (pagenr not in pdf["pages"]): output.addPage(fiel.getPage(pagenr-1)) # else: # print ("skipping page nr: "+str(pagenr)) except: halp() sys.exit(2) # pdf file is no pdf file... if (not os.path.exists(outputfilename)): outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() else: print ("file exists, discontinuing operation")
def save_ready_template(request, id): person_print = FIO.objects.get(id=id) packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.drawString(284, 579, "{} {}".format(person_print.name, person_print.surname)) can.showPage() can.drawString(260, 494, "{} {}".format(person_print.name, person_print.surname)) can.showPage() can.save() # move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader( file("/Users/danilakimov/Desktop/template1.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) page = existing_pdf.getPage(1) page.mergePage(new_pdf.getPage(1)) output.addPage(page) # finally, write "output" to a real file outputStream = file("/Users/danilakimov/Desktop/readytemplate.pdf", "wb") output.write(outputStream) outputStream.close() return render(request, 'template_page.html', {'person_template': person_print})
def split_pset(): if (not options.pset or not options.probs): print_err_and_die("You must enter both arguements! run with -h for help") path = "pset%s/latex/"%options.pset try: filename = "%spset%s_answers.pdf"%(path, options.pset) inp = PdfFileReader(file(filename, "rb")) except IOError: print_err_and_die("Error! File, %s was not found." % filename) ##loop over user input and break up pdf questionNum = 1 probs = options.probs.split(",") for prob in probs: print "Processing question", questionNum prob = prob.strip() #kill whitespace out = PdfFileWriter() pages = get_pages(prob, inp.getNumPages()) for page in pages: print "page num", str(page) out.addPage(inp.getPage(int(page)-1)) outStream = file("%spset%s-%s_answer.pdf"%(path, options.pset, questionNum), "wb") out.write(outStream) outStream.close() questionNum +=1 print "Done!"
def _merge_pdf(self, documents): """Merge PDF files into one. :param documents: list of path of pdf files :returns: path of the merged pdf """ writer = PdfFileWriter() streams = [ ] # We have to close the streams *after* PdfFilWriter's call to write() for document in documents: pdfreport = file(document, 'rb') streams.append(pdfreport) reader = PdfFileReader(pdfreport) for page in range(0, reader.getNumPages()): writer.addPage(reader.getPage(page)) merged_file_fd, merged_file_path = tempfile.mkstemp( suffix='.html', prefix='report.merged.tmp.') with closing(os.fdopen(merged_file_fd, 'w')) as merged_file: writer.write(merged_file) for stream in streams: stream.close() return merged_file_path
class cleanpdf: def __init__(self,pathFile): self.pathFile = pathFile self.inputFile = file(self.pathFile,"rb") self.pdfInput = PdfFileReader(self.inputFile) self.pyPdfOutput = PdfFileWriter() self.dataToUpdate = self.pyPdfOutput._info.getObject() self.__modifyData() self.__copyPDF() def __modifyData(self): for data in self.dataToUpdate: self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii')) def __copyPDF(self): for page in range(0,self.pdfInput.getNumPages()): self.pyPdfOutput.addPage(self.pdfInput.getPage(page)) outputFile = file(self.__changeName(),"wb") self.pyPdfOutput.write(outputFile) def __changeName(self): newName = self.pathFile[0:self.pathFile.rfind(".")]+"5.pdf" return newName
def splitXPDF(pdfFileName): try: inputpdf = PdfFileReader(open(pdfFileName, "rb")) print '[+] Total Page : ' + str(inputpdf.getNumPages()) setpath = pdfFileName[pdfFileName.find('\\') + 1:pdfFileName.find('.')] lstName = [] with open("nameFile.base", "r") as nameFile: lstName = nameFile.read().split('\n') if (inputpdf.getNumPages() == len(lstName)): for i in xrange(inputpdf.numPages): output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) if (os.path.isdir('resault') != True): os.mkdir('resault') if (os.path.isdir('resault\\' + setpath) != True): os.mkdir('resault\\' + setpath) with open('resault\\' + setpath + '\\' + lstName[i] + '.pdf', 'wb') as outputStream: output.write(outputStream) print '[+] Generate Page ' + str( i + 1) + ' with File : ' + lstName[i] + '.pdf' else: print '[-] Number of Name in \'nameFile.base\' is not match with Number Page in PDF.' except IOError: print '[-] Cannot Openfile.'
def save(self, to): origin = self.get_origin() if not origin: raise RuntimeError("Please implement get_origin method or origin attribute") try: existing_pdf = PdfFileReader(file(origin, "rb")) except IOError: raise RuntimeError(u"Failed to open origin file") output = PdfFileWriter() for page_id, page_class in enumerate(self.pages): new_page = page_class(self.instance).save() base_page = existing_pdf.getPage(0) base_page.mergePage(new_page) output.addPage(base_page) if isinstance(to, basestring): outputStream = file(to, "wb") else: outputStream = to output.write(outputStream) outputStream.close()
def make_pdf_2(dni=''): packet = StringIO.StringIO() can = canvas.Canvas(packet, pagesize=letter) can.drawString(257, 568, '28 de Julio de 2018') can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file("contracte_voluntariat.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page #print 'hola '+str(existing_pdf.getNumPages()) page = existing_pdf.getPage(1) page.mergePage(new_pdf.getPage(0)) output.addPage(page) filename = str(dni)+'file'+str(1)+'.pdf' # finally, write "output" to a real file outputStream = file(filename, "wb") output.write(outputStream) outputStream.close() return filename
def print_danfe(xml_nfe): from pysped.nfe.leiaute import ProcNFe_310 from pysped.nfe.danfe import DANFE procnfe = ProcNFe_310() paths = [] procnfe.xml = xml_nfe danfe = DANFE() danfe.NFe = procnfe.NFe danfe.protNFe = procnfe.protNFe danfe.caminho = "/tmp/" danfe.gerar_danfe() paths.append(danfe.caminho + danfe.NFe.chave + '.pdf') output = PdfFileWriter() s = StringIO() for path in paths: pdf = PdfFileReader(file(path, "rb")) for i in range(pdf.getNumPages()): output.addPage(pdf.getPage(i)) output.write(s) str_pdf = s.getvalue() s.close() return str_pdf
def createPDFHttpResponse(filepath, output_filename, user, access_time): """ Creates a HttpResponse from a watermarked PDF file. Watermark contains the user who accessed the document and the time of access. :param filepath: Path to the file :param output_filename: File name sent to the user :param user: :param access_time: :return: HttpResponse with the file content, or HttpResponseNotFound """ #Add access watermark buffer = StringIO() p = canvas.Canvas(buffer) p.drawString(0,0, "Downloaded by %s at %s" %(user, access_time.isoformat(' '))) p.showPage() p.save() buffer.seek(0) watermark = PdfFileReader(buffer) #Read the PDF to be accessed attachment = PdfFileReader(open(filepath, 'rb')) output = PdfFileWriter() #Attach watermark to each page for page in attachment.pages: page.mergePage(watermark.getPage(0)) output.addPage(page) response = HttpResponse(mimetype='application/pdf') response['Content-Disposition'] = 'inline; filename=%s' % output_filename.encode('utf-8') output.write(response) return response
def duplicated_pdf(stream): """Creates a duplicated pdf, from html stream (A.K.A. StringIO)""" o_text = "<center><h3>-- Original --</h3></center>" c_text = "<center><h3>-- Duplicado --</h3></center>" pdf_conv = html_to_pdf.HTMLToPDFConverter() original = PdfFileReader(StringIO(pdf_conv.convert(stream, o_text, o_text))) stream.seek(0) copy = PdfFileReader(StringIO(pdf_conv.convert(stream, c_text, c_text))) out = PdfFileWriter() for n in xrange(0, original.getNumPages()): out.addPage(original.getPage(n)) for n in xrange(0, copy.getNumPages()): out.addPage(copy.getPage(n)) encoded_pdf = StringIO() out.write(encoded_pdf) encoded_pdf.seek(0) encoded_pdf = encoded_pdf.read() return encoded_pdf
def convert(filename): inp = PdfFileReader(open(filename, 'rb')) outp = PdfFileWriter() for page in inp.pages: page1 = copy.copy(page) page2 = copy.copy(page) UL = page.mediaBox.upperLeft UR = page.mediaBox.upperRight LL = page.mediaBox.lowerLeft LR = page.mediaBox.lowerRight # left column page1.mediaBox.upperLeft = (UL[0], UL[1]) page1.mediaBox.upperRight = (UR[0]/2, UR[1]) page1.mediaBox.lowerLeft = (LL[0], LL[1]) page1.mediaBox.lowerRight = (LR[0]/2, LR[1]) outp.addPage(page1) # right column page2.mediaBox.upperLeft = (UR[0]/2, UL[1]) page2.mediaBox.upperRight = (UR[0], UR[1]) page2.mediaBox.lowerLeft = (LR[0]/2, LR[1]) page2.mediaBox.lowerRight = (LR[0], LR[1]) outp.addPage(page2) outp.write(open(filename+'.2', 'wb'))
def split_chapters(*t_args): """ Split a large pdf into chunks (i.e. chapters) """ if len(t_args) > 0: args = t_args[0] if len(args) < 1: print "usage: utils_pdf split_chapters configfile" return from pyPdf import PdfFileWriter, PdfFileReader f = open(args[0]) P = json.loads(f.read()) f.close() input = PdfFileReader(file(P["source"], "rb")) i0 = P["first_chapter_index"] ends = P["chapters_ends"] for i in xrange(0, len(ends)): ch_num = i0 + i fmt = P["chapter_fmt"] % (ch_num, ) output = PdfFileWriter() if not os.path.exists(P["outputdir"]): os.mkdir(P["outputdir"]) fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt) j0 = P["firstpage"] if i == 0 else ends[i - 1] for j in xrange(j0, ends[i]): output.addPage(input.getPage(j)) outputStream = file(fn_out, "wb") output.write(outputStream) outputStream.close() print "wrote %s" % (fn_out, )