def merge(fppath, bppath, outputpath, no_delete, fed_backwards): fpfile = PdfFileReader(open(fppath)) bpfile = PdfFileReader(open(bppath)) outputfile = PdfFileWriter() outputpages = [] for i in range(fpfile.getNumPages()): backpages = True try: outputpages.append(fpfile.getPage(i)) if backpages: if fed_backwards: outputpages.append(bpfile.getPage(bpfile.getNumPages() - i - 1)) else: outputpages.append(bpfile.getPage(i)) except IndexError: backpages = False if not no_delete: outputpages = [page for page in outputpages if page.extractText() != ''] [outputfile.addPage(page) for page in outputpages] outputfile.write(open(os.path.expanduser(outputpath), 'w'))
def convert(filename): inp = PdfFileReader(open(filename, 'rb')) outp = PdfFileWriter() for page in inp.pages: page1 = copy.copy(page) page2 = copy.copy(page) UL = page.mediaBox.upperLeft UR = page.mediaBox.upperRight LL = page.mediaBox.lowerLeft LR = page.mediaBox.lowerRight # left column page1.mediaBox.upperLeft = (UL[0], UL[1]) page1.mediaBox.upperRight = (UR[0]/2, UR[1]) page1.mediaBox.lowerLeft = (LL[0], LL[1]) page1.mediaBox.lowerRight = (LR[0]/2, LR[1]) outp.addPage(page1) # right column page2.mediaBox.upperLeft = (UR[0]/2, UL[1]) page2.mediaBox.upperRight = (UR[0], UR[1]) page2.mediaBox.lowerLeft = (LR[0]/2, LR[1]) page2.mediaBox.lowerRight = (LR[0], LR[1]) outp.addPage(page2) outp.write(open(filename+'.2', 'wb'))
def delete(filesandranges, outputfilename, verbose): for i in range(len(filesandranges)): if not os.path.exists(filesandranges[i]['name']): halp() print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... output = PdfFileWriter() try: for pdf in filesandranges: print (pdf["name"]) fiel = PdfFileReader(file(pdf["name"], "rb")) for pagenr in range(1,fiel.getNumPages()+1): if (pagenr not in pdf["pages"]): output.addPage(fiel.getPage(pagenr-1)) # else: # print ("skipping page nr: "+str(pagenr)) except: halp() sys.exit(2) # pdf file is no pdf file... if (not os.path.exists(outputfilename)): outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() else: print ("file exists, discontinuing operation")
def _merge_pdf(self, documents): """Merge PDF files into one. :param documents: list of path of pdf files :returns: path of the merged pdf """ writer = PdfFileWriter() streams = [ ] # We have to close the streams *after* PdfFilWriter's call to write() for document in documents: pdfreport = file(document, 'rb') streams.append(pdfreport) reader = PdfFileReader(pdfreport) for page in range(0, reader.getNumPages()): writer.addPage(reader.getPage(page)) merged_file_fd, merged_file_path = tempfile.mkstemp( suffix='.html', prefix='report.merged.tmp.') with closing(os.fdopen(merged_file_fd, 'w')) as merged_file: writer.write(merged_file) for stream in streams: stream.close() return merged_file_path
def createPDFHttpResponse(filepath, output_filename, user, access_time): """ Creates a HttpResponse from a watermarked PDF file. Watermark contains the user who accessed the document and the time of access. :param filepath: Path to the file :param output_filename: File name sent to the user :param user: :param access_time: :return: HttpResponse with the file content, or HttpResponseNotFound """ #Add access watermark buffer = StringIO() p = canvas.Canvas(buffer) p.drawString(0,0, "Downloaded by %s at %s" %(user, access_time.isoformat(' '))) p.showPage() p.save() buffer.seek(0) watermark = PdfFileReader(buffer) #Read the PDF to be accessed attachment = PdfFileReader(open(filepath, 'rb')) output = PdfFileWriter() #Attach watermark to each page for page in attachment.pages: page.mergePage(watermark.getPage(0)) output.addPage(page) response = HttpResponse(mimetype='application/pdf') response['Content-Disposition'] = 'inline; filename=%s' % output_filename.encode('utf-8') output.write(response) return response
def pdfMerge(): """ Merges generated PDFs into one called <filename>_summary Deletes the individual consituent PDFs """ def append_pdf(input, output): """ Combines PDF pages to be merged """ [output.addPage(input.getPage(page_num)) for page_num in range(input.numPages)] # Merge PDFs output = PdfFileWriter() print outDir append_pdf(PdfFileReader(file('%s%s_meta.pdf' %(outDir, filename), 'rb')), output) append_pdf(PdfFileReader(file('%s%s_plots.pdf' %(outDir, filename), 'rb')), output) outputFile = file('%s%s_summary.pdf' %(outDir, filename), 'wb') output.write(outputFile) outputFile.close() # Delete PDFs os.remove('%s%s_plots.pdf' %(outDir, filename)) os.remove('%s%s_meta.pdf' %(outDir, filename))
def split_file(f, filename): """Split our file into 10-page sub-files and add those to the queue in order. """ global file_queue curr_page = 0 pages_left = f.getNumPages() log('Splitting file ' + filename + " with " + str(pages_left) + " pages.") while pages_left > 0: # Create the new file pages_processed = 0 fname = filename[:-4] + '_' + str(curr_page) + '.pdf' output = PdfFileWriter() # Get 10 pages for it for i in range(curr_page, 10+curr_page): if pages_processed >= pages_left: break pages_processed += 1 output.addPage(f.getPage(i)) # Write and save file fout = file(fname, 'wb') output.write(fout) fout.flush() fout.close() file_queue.append(fname) curr_page += pages_processed pages_left -= pages_processed # Delete the file now that it's in pieces' os.remove(filename)
def joinpdf(folder=TMPFOLDER,startpage=INDEX,outputname='freecad.pdf'): "creates one pdf file from several others, following order from startpage" if VERBOSE: print ("Building table of contents...") f = open(folder+os.sep+startpage+'.html') html = '' for line in f: html += line f.close() html = html.replace("\n"," ") html = html.replace("> <","><") html = re.findall("<ul.*/ul>",html)[0] pages = re.findall('href="(.*?)"',html) pages.insert(1,startpage+".html") result = PdfFileWriter() for p in pages: if exists(p[:-5]): if VERBOSE: print ('Appending',p) try: inputfile = PdfFileReader(open(folder+os.sep+p[:-5]+'.pdf','rb')) except: print ('Unable to append',p) else: for i in range(inputfile.getNumPages()): result.addPage(inputfile.getPage(i)) outputfile = open(OUTPUTPATH + os.sep + outputname,'wb') result.write(outputfile) outputfile.close() if VERBOSE: print ('Successfully created',OUTPUTPATH,os.sep,outputname)
class cleanpdf: def __init__(self,pathFile): self.pathFile = pathFile self.inputFile = file(self.pathFile,"rb") self.pdfInput = PdfFileReader(self.inputFile) self.pyPdfOutput = PdfFileWriter() self.dataToUpdate = self.pyPdfOutput._info.getObject() self.__modifyData() self.__copyPDF() def __modifyData(self): for data in self.dataToUpdate: self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii')) def __copyPDF(self): for page in range(0,self.pdfInput.getNumPages()): self.pyPdfOutput.addPage(self.pdfInput.getPage(page)) outputFile = file(self.__changeName(),"wb") self.pyPdfOutput.write(outputFile) def __changeName(self): newName = self.pathFile[0:self.pathFile.rfind(".")]+"5.pdf" return newName
def save(self, to): origin = self.get_origin() if not origin: raise RuntimeError("Please implement get_origin method or origin attribute") try: existing_pdf = PdfFileReader(file(origin, "rb")) except IOError: raise RuntimeError(u"Failed to open origin file") output = PdfFileWriter() for page_id, page_class in enumerate(self.pages): new_page = page_class(self.instance).save() base_page = existing_pdf.getPage(0) base_page.mergePage(new_page) output.addPage(base_page) if isinstance(to, basestring): outputStream = file(to, "wb") else: outputStream = to output.write(outputStream) outputStream.close()
def add_omr_marks(self, pdf_data, is_latest_document): # Documentation # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf # https://pythonhosted.org/PyPDF2/PdfFileReader.html # https://stackoverflow.com/a/17538003 # https://gist.github.com/kzim44/5023021 # https://www.blog.pythonlibrary.org/2013/07/16/ # pypdf-how-to-write-a-pdf-to-memory/ self.ensure_one() pdf_buffer = StringIO.StringIO() pdf_buffer.write(pdf_data) existing_pdf = PdfFileReader(pdf_buffer) output = PdfFileWriter() total_pages = existing_pdf.getNumPages() # print latest omr mark on latest pair page (recto) latest_omr_page = total_pages // 2 for page_number in range(total_pages): page = existing_pdf.getPage(page_number) # only print omr marks on pair pages (recto) if page_number % 2 is 0: is_latest_page = is_latest_document and \ page_number == latest_omr_page marks = self._compute_marks(is_latest_page) omr_layer = self._build_omr_layer(marks) page.mergePage(omr_layer) output.addPage(page) out_buffer = StringIO.StringIO() output.write(out_buffer) return out_buffer.getvalue()
def split_chapters(*t_args): """ Split a large pdf into chunks (i.e. chapters) """ if len(t_args)>0: args=t_args[0] if len(args)<1: print "usage: utils_pdf split_chapters configfile" return from pyPdf import PdfFileWriter, PdfFileReader f = open(args[0]) P = json.loads(f.read()) f.close() input = PdfFileReader(file(P["source"], "rb")) i0 = P["first_chapter_index"] ends = P["chapters_ends"] for i in xrange(0, len(ends)): ch_num = i0+i fmt = P["chapter_fmt"] % (ch_num, ) output = PdfFileWriter() if not os.path.exists(P["outputdir"]): os.mkdir( P["outputdir"]) fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt) j0 = P["firstpage"] if i==0 else ends[i-1] for j in xrange(j0, ends[i]): output.addPage(input.getPage(j)) outputStream = file(fn_out, "wb") output.write(outputStream) outputStream.close() print "wrote %s" % (fn_out,)
def split_pset(): if (not options.pset or not options.probs): print_err_and_die("You must enter both arguements! run with -h for help") path = "pset%s/latex/"%options.pset try: filename = "%spset%s_answers.pdf"%(path, options.pset) inp = PdfFileReader(file(filename, "rb")) except IOError: print_err_and_die("Error! File, %s was not found." % filename) ##loop over user input and break up pdf questionNum = 1 probs = options.probs.split(",") for prob in probs: print "Processing question", questionNum prob = prob.strip() #kill whitespace out = PdfFileWriter() pages = get_pages(prob, inp.getNumPages()) for page in pages: print "page num", str(page) out.addPage(inp.getPage(int(page)-1)) outStream = file("%spset%s-%s_answer.pdf"%(path, options.pset, questionNum), "wb") out.write(outStream) outStream.close() questionNum +=1 print "Done!"
def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None): flag=False if not context: context={} pool = pooler.get_pool(cr.dbname) attach = report_xml.attachment #~ #~ Check in the new model if this report allow to reprint, #~ Allowtoreprint should mandate over attach, if attach: objs = self.getObjects(cr, uid, ids, context) results = [] for obj in objs: aname = eval(attach, {'object':obj, 'time':time}) result = False if report_xml.attachment_use and aname and context.get('attachment_use', True): aids = pool.get('ir.attachment').search(cr, uid, [('datas_fname','=',aname+'.pdf'),('res_model','=',self.table),('res_id','=',obj.id)]) if aids: brow_rec = pool.get('ir.attachment').browse(cr, uid, aids[0]) if not brow_rec.datas: continue d = base64.decodestring(brow_rec.datas) results.append((d,'pdf')) continue result = self.create_single_pdf(cr, uid, [obj.id], data, report_xml, context) if not result: return False try: if aname: flag=True #ya que entra solo la primera vez sin attachment name = aname+'.'+result[1] pool.get('ir.attachment').create(cr, uid, { 'name': aname, 'datas': base64.encodestring(result[0]), 'datas_fname': name, 'res_model': self.table, 'res_id': obj.id, }, context=context ) cr.commit() except Exception,e: import traceback, sys tb_s = reduce(lambda x, y: x+y, traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback)) netsvc.Logger().notifyChannel('report', netsvc.LOG_ERROR,str(e)) results.append(result) if results: if results[0][1]=='pdf': if not context.get('allow',False): return self.create_single_pdf(cr, uid, ids, data, report_xml, context) else: from pyPdf import PdfFileWriter, PdfFileReader output = PdfFileWriter() for r in results: reader = PdfFileReader(cStringIO.StringIO(r[0])) for page in range(reader.getNumPages()): output.addPage(reader.getPage(page)) s = cStringIO.StringIO() output.write(s) return s.getvalue(), results[0][1]
def into_half(src, dst): _src = file(src, 'rb') _dst = file(dst, 'wb') input = PdfFileReader(_src) output = PdfFileWriter() for i in range(input.getNumPages()): p = input.getPage(i) q = copy.copy(p) q.mediaBox = copy.copy(p.mediaBox) #x1, x2 = p.mediaBox.lowerLeft #x3, x4 = p.mediaBox.upperRight (w, h) = p.mediaBox.upperRight print w, h p.mediaBox.upperRight = (w/2, h) q.mediaBox.upperLeft = (w/2, h) output.addPage(p) output.addPage(q) output.write(_dst) _src.close() _dst.close()
def get_chapters(): for url in download_list: output = PdfFileWriter() errored = False if url: filename = url[0].split('/')[-1] p = re.compile('(?<=kap)\d{1,3}') chap = p.search(filename).group() chap = chap[:-1] print "Doing chapter", chap for u in url: try: pdf = urllib2.urlopen(u).read() mem_file = StringIO(pdf) append_pdf(PdfFileReader(mem_file), output) except Exception, e: print "Error for chapter " + chap + ": " + str(e) errored = True pass try: if not errored: output.write(file("algs-kap" + chap + ".pdf", "wb")) print "Assembled pdf at algs-"+ chap + ".pdf" else: print "Couldn't get chapter, not assembled" except Exception, e: print "Error ocurred!" print e
def split(files, verbose): for infilename in files: if not os.path.exists(infilename): halp() print ("error: "+infilename+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... inputs = [] try: for i in files: inputs.append(PdfFileReader(file(i, "rb"))) except: halp() print ("there has been an error of unfortunate proportions") sys.exit(2) # pdf file is no pdf file... i=0 j=0 for pdf in inputs: for pagenr in range(pdf.getNumPages()): output = PdfFileWriter() output.addPage(pdf.getPage(pagenr)) (name, ext) = splitext(files[i]) my_str = "%0" + str(math.ceil(math.log10(pdf.getNumPages()))) + "d" my_str = my_str % (pagenr+1) print (name+"p"+my_str+ext) outputStream = file(name+"p"+my_str+ext, "wb") output.write(outputStream) outputStream.close() j=j+1 i=i+1 if verbose: print (str(j)+" pages in "+str(i)+" files processed")
def concatenate_pdf(self,book_title): fileList = os.listdir(os.getcwd()) num_chapters=0 for i in range(1,40): if not fileList.__contains__(book_title+str(i)+".pdf"): num_chapters= i-1 print "numero capitulos"+str(num_chapters) break print"Uniendo pfs..." output = PdfFileWriter() for i in range (1,num_chapters): f=open(book_title+str(i)+".pdf", "rb") num_pages=PdfFileReader(f).getNumPages() if num_pages==0: pdfOne = PdfFileReader(f).getPage(0) output.addPage(pdfOne) else: for a in range (0,num_pages): pdfOne = PdfFileReader(f).getPage(a) output.addPage(pdfOne) outputStream = file(r""+book_title+".pdf", "wb") output.write(outputStream) outputStream.close() print"Union finalizada" for i in range(1,num_chapters+1): print "borrando... capitulo: "+str(i) os.remove(book_title+str(i)+".pdf")
def generate(donor): os.system('mkdir -p output') donor_url = donor.replace(' ','%20') page1 = 'output/%s1' % (donor.replace(' ','-').lower()) page2 = 'output/%s2' % (donor.replace(' ','-').lower()) combined = 'output/%s.pdf' % (donor.replace(' ','-').lower()) if os.path.exists(combined): return os.system('cp "%s" "%s.svg"' % (page1_svg, page1)) os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1)) os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1)) os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1)) os.system('cp "%s" "%s.svg"' % (page2_svg, page2)) os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2)) os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2)) os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2)) # Merge pages input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb')) input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb')) output = PdfFileWriter() output.addPage(input1.getPage(0)) output.addPage(input2.getPage(0)) outputStream = file(combined, 'wb') output.write(outputStream) outputStream.close() sleep(2)
def cat(infilenames, outputfilename, verbose): inputs = [] for infilename in infilenames: print infilename if not os.path.exists(infilename): halp() print ("error: "+infilename+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+outputfilename+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... try: for i in infilenames: inputs.append(PdfFileReader(file(i, "rb"))) except: halp() sys.exit(2) # pdf file is no pdf file... i = 0 output = PdfFileWriter() for pdf in inputs: for pagenr in range(pdf.getNumPages()): output.addPage(pdf.getPage(pagenr)) i=i+1 outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() if verbose: print (str(i)+" pages processed")
def setMetadata(self, metadata): """Returns a document with new metadata. Keyword arguments: metadata -- expected an dictionary with metadata. """ # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate input_pdf = PdfFileReader(open(self.document.getUrl(), "rb")) output_pdf = PdfFileWriter() modification_date = metadata.pop("ModificationDate", None) if modification_date: metadata['ModDate'] = modification_date if type(metadata.get('Keywords', None)) is list: metadata['Keywords'] = metadata['Keywords'].join(' ') args = {} for key, value in list(metadata.items()): args[NameObject('/' + key.capitalize())] = createStringObject(value) output_pdf._info.getObject().update(args) for page_num in range(input_pdf.getNumPages()): output_pdf.addPage(input_pdf.getPage(page_num)) output_stream = io.BytesIO() output_pdf.write(output_stream) return output_stream.getvalue()
def select(filesandranges, outputfilename, verbose): if verbose: print (str(filesandranges)+"\noutput: "+str(outputfilename)) for i in range(len(filesandranges)): if not os.path.exists(filesandranges[i]['name']): halp() print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao") sys.exit(2) # pdf file is no pdf file... if os.path.exists(outputfilename): halp() print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao") sys.exit(2) # pdf file is no pdf file... output = PdfFileWriter() try: for pdf in filesandranges: fiel = PdfFileReader(file(pdf["name"], "rb")) for pagenr in pdf["pages"]: if (not (pagenr > fiel.getNumPages()) and not(pagenr < 1)): output.addPage(fiel.getPage(pagenr-1)) else: print("one or more pages are not in the chosen PDF") halp() sys.exit(3) #wrong pages or ranges except: halp() sys.exit(2) # pdf file is no pdf file...h if (not os.path.exists(outputfilename)): outputStream = file(outputfilename, "wb") output.write(outputStream) outputStream.close() else: print ("file exists, discontinuing operation")
def output(self): # get the output filename using the file dialog (out_filename, filter) = \ QFileDialog.getSaveFileName(parent = self, caption = self.tr(u'Export'), dir = '', filter = self.tr('pdf (*.pdf)')) # file IO out_file = open(out_filename, 'wb') in_file = open(self.in_filename, 'rb') in_reader = PdfFileReader(in_file) out_writer = PdfFileWriter() # extract input pages_string = self.pages_line_edit.text() # Get the indices of pages to extract pages = pages_parser(in_reader.getNumPages()).parse(pages_string) # append pages to output writer for page_index in pages: out_writer.addPage(in_reader.getPage(page_index)) # write to file out_writer.write(out_file) # close files in_file.close() out_file.close()
def buildPdf(self): to_pdffile = self._takeoff() ap_pdffile = self._airplane() ck_pdffile = self._checklist() output = PdfFileWriter() self.addAllPages(output, PdfFileReader(file(ck_pdffile, "rb"))) self.addAllPages(output, PdfFileReader(file(to_pdffile, "rb"))) self.addAllPages(output, PdfFileReader(file(ap_pdffile, "rb"))) # Add AD Info Charts files = dict() for pdf in self.getExternalPdf(self.eVfrPath, self.__fligthplan.performance_takeoff.aerodrome.code): files[pdf] = file(pdf, "rb") files["%s_" % pdf] = PdfFileReader(files[pdf]) if files["%s_" % pdf].getIsEncrypted(): pdfCracked = PdfCracker().crack(pdf) files[pdf] = file(pdfCracked, "rb") files["%s_" % pdf] = PdfFileReader(files[pdf]) self.addAllPages(output=output, input=files["%s_" % pdf]) # write out the merged file outputPdf = os.path.join(self.outputdir, 'flightplan %s.pdf' % self.__fligthplan.title) outputStream = file(outputPdf, "wb") output.write(outputStream) outputStream.close() npyscreen.notify_confirm( message="Your pretty Fligthplan has been created at\n\n%s" % outputPdf )
def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}): packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont(font['name'], font['size']) for i in context: can.drawString(i['x'], i['y'], i['value']) can.save() # move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file(self.path, "rb")) output = PdfFileWriter() # merge the new file with the existing page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) # finally, write "output" to a real file outputStream = file(self.destination, "wb") output.write(outputStream) outputStream.close() return True
def showpdf(request): sign = os.path.join(settings.MEDIA_ROOT, "signature.png") mimetypes.init() response = None if 'f' in request.GET: fr = open(os.path.join(settings.MEDIA_ROOT,'pdffiles','extracted','%s' % request.GET['f']), "rb") imgTemp = StringIO() imgDoc = canvas.Canvas(imgTemp) if request.GET['o'] == 'l': imgDoc.drawImage(sign, 529, 40, 290/2, 154/2) else: imgDoc.drawImage(sign, 70, 40, 290/2, 154/2) imgDoc.save() overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0) page = PdfFileReader(fr).getPage(0) page.mergePage(overlay) pdf_out = PdfFileWriter() pdf_out.addPage(page) response = HttpResponse(mimetype='application/pdf') response['Content-Disposition'] = 'attachment; filename=%s' % request.GET['f'] pdf_out.write(response) return response
def watermark( self, pdfStr, watermarkFile, spec ): # Read the watermark- and document pdf file inputWatermark = PdfFileReader( file( watermarkFile, "rb" ) ) generatedPdf = PdfFileReader( pdfStr ) outputPdf = PdfFileWriter() # flag for the first page of the source file firstPage = True # Loop over source document pages and merge with the first page of the watermark # file. watermarkPage = inputWatermark.getPage(0) for page in generatedPdf.pages: if (spec == Mark.FIRST_PAGE and firstPage) or spec == Mark.ALL_PAGES: # deep copy the watermark page here, otherwise the watermark page # gets merged over and over because p would only be a reference p = copy.copy( watermarkPage ) p.mergePage( page ) outputPdf.addPage( p ) firstPage = False else: outputPdf.addPage(page) if self.outputFile: # Write to outputfile outputStream = file( self.outputFile, "wb" ) outputPdf.write( outputStream ) outputStream.close() return self.outputFile else: stringIO = StringIO.StringIO(); outputPdf.write( stringIO ) return stringIO.getvalue()
def main(): """ """ # Parse command line pdf_files = sys.argv[1:] if len(pdf_files) == 0: print __usage__ sys.exit() # Make sure there is more than one pdf file if len(pdf_files) == 1: print "In the spirit of gnu tar, this script cowardly refuses to" print "combine one pdf file!" sys.exit() # Create unique name for output file localtime = time.localtime() localtime = [str(x) for x in localtime] localtime = [x.zfill(2) for x in localtime] localtime[0] = localtime[0].zfill(4) output_file = "%s-%s-%s_%s-%s-%s.pdf" % tuple(localtime[:6]) # Combine pdf files in order output = PdfFileWriter() for pdf in pdf_files: input = PdfFileReader(file(pdf,"rb")) num_pages = input.getNumPages() for i in range(num_pages): output.addPage(input.getPage(i)) # Write final pdf stream = file(output_file,"wb") output.write(stream) stream.close()
def write_pdf(pdf, part_count_ye, part_count_ye_end, fen, output_file): out = PdfFileWriter() for pp in range(part_count_ye, part_count_ye_end): out.addPage(pdf.getPage(pp)) ous = file(output_file + '_' + str(fen + 1) + '.pdf', 'wb') out.write(ous) ous.close()
def join_pages(composites): # latex_buf = StringIO() page_fnames = [] for page_num, collection in enumerate(collect_pages(composites)): fnames, transcriptions, types = [], [], [] for r in collection: fnames.append(r['location']) transcriptions.append(r['transcription']) types.append(r['type']) page_fnames.append(paint_original_segments(fnames, transcriptions, page_num)) # latex_buf.write(assemble_latex(fnames, transcriptions, types)) # latex_buf.write(LATEX_NEWPAGE_SNIPPET) # raw_latex = LATEX_WRAP.format(raw_latex=latex_buf.getvalue(), font_size=LATEX_FONT_SIZE) # # transcribed pdf # latex_pdf_fname = latex_to_pdf(raw_latex) # --- # searchable pdf pdf_writer = PdfFileWriter() pdf_pages = [] for page_fname in page_fnames: pdf_pages.append(open(page_fname, 'rb')) pdf_reader = PdfFileReader(pdf_pages[-1]) pdf_writer.addPage(pdf_reader.getPage(0)) searchable_pdf = NamedTemporaryFile(prefix='searchable_', suffix='.pdf', dir=path.abspath('./static/images/'), delete=False) pdf_writer.write(searchable_pdf) searchable_pdf.close() map(lambda f: f.close(), pdf_pages) json.dump({ # 'transcribed': latex_pdf_fname, 'searchable': searchable_pdf.name }, sys.stdout)
def add_guides(self): pdf_in = PdfFileReader(open('sig.pdf', 'rb')) pdf_out = PdfFileWriter() for i in xrange(pdf_in.getNumPages()): page = pdf_in.getPage(i) if not i: guides = StringIO() if self.args.longarm: create_pdf( guides, a4lwidth_pt, a4lheight_pt, generate_longarm()) else: if self.args.a5: w, h = a5width_pt, a5height_pt else: w, h = a4lwidth_pt, a4lheight_pt create_pdf(guides, w, h, generate_shortarm( self.args.a5, bool(self.args.signature))) pdf_guides = PdfFileReader(guides) page.mergePage(pdf_guides.getPage(0)) pdf_out.addPage(page) pdf_out.write(open('sigs.pdf', 'wb'))
def renderToPdf(envLL, filename, sizex, sizey): """Renders the specified Box2d and zoom level as a PDF""" basefilename = os.path.splitext(filename)[0] mergedpdf = None for mapname in MAPNIK_LAYERS: print 'Rendering', mapname # Render layer PDF. localfilename = basefilename + '_' + mapname + '.pdf'; file = open(localfilename, 'wb') surface = cairo.PDFSurface(file.name, sizex, sizey) envMerc = LLToMerc(envLL) map = mapnik.Map(sizex, sizey) mapnik.load_map(map, mapname + ".xml") map.zoom_to_box(envMerc) mapnik.render(map, surface) surface.finish() file.close() # Merge with master. if not mergedpdf: mergedpdf = PdfFileWriter() localpdf = PdfFileReader(open(localfilename, "rb")) page = localpdf.getPage(0) mergedpdf.addPage(page) else: localpdf = PdfFileReader(open(localfilename, "rb")) page.mergePage(localpdf.getPage(0)) output = open(filename, 'wb') mergedpdf.write(output) output.close()
def logic1(self): start = time.time() print "Starting PDF Fetching at: %s" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) urls = ["https://www.example.com/1.pdf", "https://www.example.com/2.pdf", "https://www.example.com/3.pdf", "https://www.example.com/4.pdf", "https://www.example.com/5.pdf", "https://www.example.com/6.pdf", "https://www.example.com/7.pdf", "https://www.example.com/8.pdf", "https://www.example.com/9.pdf", "https://www.example.com/10.pdf", "https://www.example.com/11.pdf", "https://www.example.com/12.pdf", "https://www.example.com/13.pdf", "https://www.example.com/14.pdf", "https://www.example.com/15.pdf", "https://www.example.com/16.pdf", "https://www.example.com/17.pdf", "https://www.example.com/18.pdf", "https://www.example.com/19.pdf", "https://www.example.com/20.pdf", "https://www.example.com/21.pdf", "https://www.example.com/22.pdf", "https://www.example.com/23.pdf", "https://www.example.com/24.pdf", "https://www.example.com/25.pdf"] writer = PdfFileWriter() count = 0 threadLists = [] for i,url in enumerate(urls): thread = threading.Thread(target=self.dPdf,args=(url,i)) count = count+1 thread.name = "T%d" % count threadLists.append(thread) for it in threadLists: it.start() for it in threadLists: it.join() print "PDF Fetch completed at: %s" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) end1 = time.time() print "%s sec to fetch 100 PDFs" % (end1 - start) print "Starting PDF merging at: %s" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) for pdf in self.pdfList: for pageNum in xrange(pdf.getNumPages()): currentPage = pdf.getPage(pageNum) writer.addPage(currentPage) outputStream = open("merged_pdf.pdf","wb") writer.write(outputStream) outputStream.close() end = time.time() print "Completed PDF merging at: %s" % (datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) print "%s sec to fetch and merge 100 PDFs" % (end - start) print "Open file merged_pdf.pdf"
def protect(ifname, ofname, password): with open(ifname, "rb") as ifile, open(ofname, "wb") as ofile: reader = PdfFileReader(ifile) writer = PdfFileWriter() for i in range(reader.getNumPages()): writer.addPage(reader.getPage(i)) writer.encrypt(password) writer.write(ofile)
def pageextract(document, pagenum, outfile): infile = PdfFileReader(open(document, "rb")) pagetoextract = int(pagenum) output = PdfFileWriter() output.addPage(infile.getPage(pagetoextract)) outputStream = open(outfile, 'wb') output.write(outputStream) outputStream.close()
def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None): if not context: context = {} pool = pooler.get_pool(cr.dbname) attach = report_xml.attachment if attach: objs = self.getObjects(cr, uid, ids, context) results = [] for obj in objs: aname = eval(attach, {'object': obj, 'time': time}) result = False if report_xml.attachment_use and aname and context.get('attachment_use', True): aids = pool.get('ir.attachment').search(cr, uid, [('datas_fname', '=', aname + '.pdf'), ('res_model', '=', self.table), ('res_id', '=', obj.id)]) if aids: brow_rec = pool.get('ir.attachment').browse(cr, uid, aids[0]) if not brow_rec.datas: continue d = base64.decodestring(brow_rec.datas) results.append((d, 'pdf')) continue result = self.create_single_pdf(cr, uid, [obj.id], data, report_xml, context) if not result: return False if aname: try: name = aname + '.' + result[1] # Remove the default_type entry from the context: this # is for instance used on the account.account_invoices # and is thus not intended for the ir.attachment type # field. ctx = dict(context) ctx.pop('default_type', None) pool.get('ir.attachment').create(cr, uid, { 'name': aname, 'datas': base64.encodestring(result[0]), 'datas_fname': name, 'res_model': self.table, 'res_id': obj.id, }, context=ctx ) except Exception: # TODO: should probably raise a proper osv_except instead, shouldn't we? see LP bug #325632 _logger.error('Could not create saved report attachment', exc_info=True) results.append(result) if results: if results[0][1] == 'pdf': from pyPdf import PdfFileWriter, PdfFileReader output = PdfFileWriter() for r in results: reader = PdfFileReader(cStringIO.StringIO(r[0])) for page in range(reader.getNumPages()): output.addPage(reader.getPage(page)) s = cStringIO.StringIO() output.write(s) return s.getvalue(), results[0][1] return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
def toPdf(self, texfile=None, outdir=None, tempdir=None): """Generate the actual pdf figures. First, the document is build. Then pdflatex will run on the document. Afterwards, the new generated pdf will be splitted into single pages, each graphic on a new page. Finally, each page will be cropped by pdfcrop """ # building document and output path document = self.__buildDocument() texfile = self.__getValidTexfile(texfile) destdir = self.__getValidOutdir(outdir) tempdir = self.__getValidTempdir(tempdir) # run pdflatex info("Run pdflatex on %s..." %texfile) cmd = "pdflatex -jobname %s -output-directory %s" %(texfile, tempdir) try: if self.__debug: call(cmd, input=document) else: call(cmd, input=document, noOutput=True) except CommandFailed: error("pdflatex fails. Please re-run '%s' with --debug to get "\ "pdflatex'es output "%(os.path.basename(sys.argv[0]))) sys.exit(1) # open new generated pdf file combinedPDF = os.path.join(tempdir, "%s.pdf" %texfile) pdfIn = PdfFileReader(open(combinedPDF, "r")) # iterate over the number of figures to spit and crop the pdf for page, figure in enumerate(self.__figures): # output path splitPDF = os.path.join(tempdir, "%s.a4.pdf" %figure) cropPDF = os.path.join(tempdir, "%s.crop.pdf" %figure) # spit pdf into multiple pdf files info("Write PDF file %s..." %figure) pdfOut = PdfFileWriter() pdfOut.addPage(pdfIn.getPage(page)) filestream = open(splitPDF, "w") pdfOut.write(filestream) filestream.close() # crop pdf info("Run pdfcrop on %s..." %figure) cmd = "pdfcrop %s %s" %(splitPDF, cropPDF) if self.__debug: call(cmd) else: call(cmd, noOutput = True) # copy cropped pdfs to final destination pdfDst = os.path.join(destdir, "%s.pdf" %figure) if not self.__force and os.path.exists(pdfDst): error("%s already exists. Skipped." %pdfDst) else: shutil.copy(cropPDF, pdfDst)
def OCR(self, fn, resolution=300, verbose=False, part=''): i = 1 pdf = PdfFileReader(file(fn, 'rb')) if pdf.getIsEncrypted(): if pdf.decrypt(''): jnk = 0 else: return false pagedata = [] text = '' for p in pdf.pages: if verbose: print ' --- ' + str(i) part = str(part) # Temporary filenames for ImageMagick conversion pgfile = 'tmp-' + part + '-' + str(i) + '.pdf' pgfilejpg = 'tmp-' + part + '-' + str(i) + '.jpg' # Parse this page output = PdfFileWriter() output.addPage(p) outputStream = file(pgfile, 'wb') output.write(outputStream) outputStream.close() # Convert this page to a high-resolution JPEG img = PythonMagick.Image() img.density(str(resolution)) img.read(pgfile) img.write(pgfilejpg) # OCR the converted JPG im = Image.open(pgfilejpg) if (len(im.split()) == 4): r, g, b, a = im.split() im = Image.merge('RGB', (r, g, b)) t = image_to_string(im) # Cleanup os.remove(pgfile) os.remove(pgfilejpg) # Add to data object pagedata.append(OCRPage(i, t, self.OCRCleanup(t))) text += t i += 1 # Produce the output data object result = OCRResult(text, self.OCRCleanup(text), (i - 1), pagedata) return result
def add_terms_and_conditions(self, ids, original_report_pdf, original_report): terms_and_conditions_decoded = False default_terms_and_conditions_decoded = False user = self.env['res.users'].browse(self._uid) # todo change user language to report language (client language) language_field = original_report.terms_conditions_language_field model = original_report.model object = self.env[model].browse(ids) localdict = {'o': object} eval('document_language = o.%s' % language_field, localdict, mode="exec", nocopy=True) document_language = localdict.get('document_language', self._context.get('lang')) company = object.company_id # todo check language terms_and_conditions_list = company.terms_and_conditions for terms_and_conditions in terms_and_conditions_list: if terms_and_conditions.language == document_language: terms_and_conditions_decoded =\ base64.decodestring(terms_and_conditions.datas) if terms_and_conditions.language == 'default': default_terms_and_conditions_decoded = \ base64.decodestring(terms_and_conditions.datas) if not terms_and_conditions_decoded: terms_and_conditions_decoded = \ default_terms_and_conditions_decoded or False if terms_and_conditions_decoded: writer = PdfFileWriter() stream_original_report = StringIO(original_report_pdf) reader_original_report = PdfFileReader(stream_original_report) stream_terms_and_conditions = StringIO(terms_and_conditions_decoded) reader_terms_and_conditions = PdfFileReader( stream_terms_and_conditions) for page in range(0, reader_original_report.getNumPages()): writer.addPage(reader_original_report.getPage(page)) for page in range(0, reader_terms_and_conditions.getNumPages()): writer.addPage(reader_terms_and_conditions.getPage(page)) stream_to_write = StringIO() writer.write(stream_to_write) combined_pdf = stream_to_write.getvalue() return combined_pdf else: return original_report_pdf
def mergePDFs(PDFList, outputFileName): #merge all the PDFs in the current list output = PdfFileWriter() for names in PDFList: append_pdf(PdfFileReader(file(names, "rb")), output) os.chdir('..') outputStream = file(outputFileName, "wb") output.write(outputStream) outputStream.close()
def _put_text_fields(self): from pyPdf import PdfFileWriter, PdfFileReader from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from reportlab.pdfbase.ttfonts import TTFont from reportlab.platypus import Paragraph packet = StringIO() can = canvas.Canvas(packet, pagesize=letter, bottomup=0) width, height = letter # for page_num, labels in self.paginated_labels.items(): for page_num in xrange(self.pages_number): labels = self.paginated_labels.get(page_num + 1, []) for label in labels: font_size = int(re.findall('\d+', label['fontSize'])[0]) print '* fontSize:', font_size can.setFont('Helvetica', font_size) if label['tabLabel'] == 'paragraph': style = self.styles['BodyText'] p = Paragraph(unicode(label['value'].strip().replace( '\n', '<br />\n')), style=style) w, h = p.wrapOn(can, int(label['width']), height) p.drawOn( can, int(label['xPosition']), int(label['yPosition']) - h + 20, ) else: can.drawString( int(label['xPosition']) + self.page_offsets[page_num][0], int(label['yPosition']) + self.page_offsets[page_num][1], unicode(label['value'].strip())) can.showPage() can.save() packet.seek(0) new_pdf = PdfFileReader(packet) existing_pdf = PdfFileReader(self.input_buffer) output = PdfFileWriter() self.output_buffer = StringIO() for page_num in xrange(existing_pdf.getNumPages()): page = existing_pdf.getPage(page_num) try: page.mergePage(new_pdf.getPage(page_num)) except IndexError: pass output.addPage(page) output.write(self.output_buffer)
def generate(job_name, additional_dirs=None): print('Generating ' + job_name) total_pages = 0 if additional_dirs is None: additional_dirs = [] output = PdfFileWriter() page_of_lines = PdfFileReader( open(dir_path + "/essentials/PageOfLines.pdf", 'rb')).getPage(0) cover = PdfFileReader( open(dir_path + "/essentials/%s.pdf" % job_name, 'rb')) output.addPage(cover.getPage(0)) total_pages += 1 total_pages += add_pdf(dir_path + "/essentials/cover-white-back.pdf", output) total_pages += add_pdf(dir_path + "/essentials/cover_page.pdf", output) for dir in [dir_path + "/slides-" + job_name] + additional_dirs: for filename in sorted(list(os.listdir(dir))): if os.path.isdir(filename) or not filename.endswith(".pdf"): continue if filename.startswith("X "): continue if filename.endswith(".slides.pdf"): print(filename) pdf = PdfFileReader(open(os.path.join(dir, filename), "rb")) for page_num in xrange(pdf.numPages): p = pdf.getPage(page_num) p.scaleBy(0.8) p.cropBox = p.mediaBox = p.artBox = p.trimBox = RectangleObject( [35, 0, 455, 595]) output.addPage(p) output.addPage(page_of_lines) total_pages += 2 elif filename.endswith(".pdf"): output.addPage(page_of_lines) # even out number of pages total_pages += add_pdf(os.path.join(dir, filename), output) + 1 if total_pages % 2 != 0: output.addPage(page_of_lines) # even out number of pages total_pages += 1 if total_pages % 2 != 0: output.addPage(page_of_lines) # even out number of pages total_pages += 1 total_pages += add_pdf(dir_path + "/essentials/survey.pdf", output) total_pages += add_pdf(dir_path + "/essentials/back-cover.pdf", output) # Writing all the collected pages to a file with open(dir_path + "/final/%s.pdf" % job_name, "wb") as f: output.write(f)
def print_danfe(invoices): str_pdf = "" paths = [] for inv in invoices: if inv.nfe_version == '1.10': from pysped.nfe.leiaute import ProcNFe_110 procnfe = ProcNFe_110() elif inv.nfe_version == '2.00': from pysped.nfe.leiaute import ProcNFe_200 procnfe = ProcNFe_200() elif inv.nfe_version == '3.10': from pysped.nfe.leiaute import ProcNFe_310 procnfe = ProcNFe_310() file_xml = monta_caminho_nfe(inv.company_id, inv.nfe_access_key) if inv.state not in ('open', 'paid', 'sefaz_cancelled'): file_xml = os.path.join(file_xml, 'tmp/') procnfe.xml = os.path.join(file_xml, inv.nfe_access_key + '-nfe.xml') danfe = DANFE() danfe.logo = add_backgound_to_logo_image(inv.company_id) danfe.NFe = procnfe.NFe danfe.leiaute_logo_vertical = inv.company_id.nfe_logo_vertical danfe.protNFe = procnfe.protNFe danfe.caminho = "/tmp/" danfe.gerar_danfe() paths.append(danfe.caminho + danfe.NFe.chave + '.pdf') inv.is_danfe_printed = True if inv.cce_document_event_ids: daede = DAEDE() daede.logo = add_backgound_to_logo_image(inv.company_id) daede.NFe = procnfe.NFe daede.protNFe = procnfe.protNFe for item, event in enumerate(inv.cce_document_event_ids): proc_evento = ProcEventoCCe_100() doc_item = str(item + 1).zfill(2) proc_evento.xml = os.path.join( file_xml, inv.nfe_access_key + '-' + doc_item + '-cce.xml') daede.procEventos.append(proc_evento) daede.caminho = "/tmp/" daede.gerar_daede() paths.append(daede.caminho + 'eventos-' + daede.NFe.chave + '.pdf') output = PdfFileWriter() s = StringIO() for path in paths: pdf = PdfFileReader(file(path, "rb")) for i in range(pdf.getNumPages()): output.addPage(pdf.getPage(i)) output.write(s) str_pdf = s.getvalue() s.close() return str_pdf
def makeOnePagersOld(filename='GPO-CONAN-REV-2014.pdf' ,path='pdf/'): infile = PdfFileReader(open(filename, 'rb')) print(infile.getNumPages()) for i in range(infile.getNumPages()): p = infile.getPage(i) outfile = PdfFileWriter() outfile.addPage(p) outputStream = file(path+'pageindex-%02d.pdf' % i, 'wb') outfile.write(outputStream) outputStream.close()
def combining_PDFs(fip_list, out_path): def append_pdf(input,output): [output.addPage(input.getPage(page_num)) for page_num in range(input.numPages)] output = PdfFileWriter() for f in fip_list: print f append_pdf(PdfFileReader(open(f,"rb")),output) out_name = out_path+'/'+"CombinedPages.pdf" output.write(open(out_name,"wb"))
def create_merged_pdf(): output = PdfFileWriter() for line in filename: fields = re.split('[:-]', line.strip()) picname = '-'.join(fields) review_pdf = PdfFileReader(file('figs/' + picname + ".pdf", "rb")) output.addPage(review_pdf.getPage(0)) outputStream = file(merged, "wb") output.write(outputStream) outputStream.close()
def doc_overlay(request, document_uuid, lot_number, qrcode=True): report = Report.objects.get(lot_number=lot_number) document = Document.objects.get(uuid=document_uuid) response = HttpResponse(content_type='application/pdf') response['Content-Disposition'] = 'filename="inspection_report.pdf"' outputPDF = PdfFileWriter() packet = StringIO() # read your existing PDF f = urlopen(Request(document.file.url)).read() mem = StringIO(f) existing_pdf = PdfFileReader(mem) pages = existing_pdf.getNumPages() first_page = existing_pdf.getPage(0) width = float(first_page.mediaBox.getWidth()) height = float(first_page.mediaBox.getHeight()) # create a new PDF with Reportlab p = canvas.Canvas(packet, pagesize=letter) #p.setFillColorRGB(255,255,255) #p.rect(0*mm, 271*mm, 205*mm, 12*mm, fill=1, stroke=0) p.setFillColorRGB(0, 0, 0) p.setFont("Helvetica", 7) p.drawCentredString( width / 2.0, height - 9.0, "%s LOT # %s / %s (doc# %s)" % (settings.PDF_COMPANY_SHORT_NAME, report.lot_number, str(report.created_at.date()), document.uuid)) barcode = createBarcodeDrawing( 'QR', value="%s%s" % (request.META['HTTP_HOST'], report.get_absolute_url())) barcode.drawOn(p, 175 * mm, 10 * mm) p.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # add the "watermark" (which is the new pdf) on the existing page for x in range(0, pages): page = existing_pdf.getPage(x) page.mergePage(new_pdf.getPage(0)) outputPDF.addPage(page) # finally, write "output" to a real file outputPDF.write(response) #f.close() action.send(request.user, verb="viewed document", action_object=document, target=report) return response
def make_watermark_pdf(self, number, attachment): ''' 给PDF类型的文件打水印 :param number: :param attachment: :return: ''' print u'准备给PDF打水印----看以下输出的日志是否正确-通用模块' print attachment print number b = self._filestore() a = b + '/' + attachment[0]['store_fname'].split('/')[0] + "/" old_file_name = attachment[0]['store_fname'].split('/')[1] c = canvas.Canvas(a + u"shuiyin.pdf") c.setFont("Courier", 35) # 设置水印文字的灰度 c.setFillGray(0.4, 0.4) # 设置水印文件,并将文字倾斜45度角 c.saveState() c.rotate(45) # 旋转角度 c.translate(500, 100) # 重设中心点 c.drawCentredString(100, 270, number) # c.translate(200, 350) # 重设中心点 c.drawCentredString(0, 0, number) # 绘制一个以坐标为中心的字符串 # # c.translate(400, 50) # 重设中心点 c.drawCentredString(-100, -300, number) c.restoreState() c.save() output = PdfFileWriter() aa = file(b + '/' + attachment[0]['store_fname'], 'rb') input1 = PdfFileReader(aa) bb = file(a + u'shuiyin.pdf', 'rb') water = PdfFileReader(bb) # 获取pdf文件的页数 pageNum = input1.getNumPages() # 给每一页打水印 for i in range(pageNum): page = input1.getPage(i) page.mergePage(water.getPage(0)) output.addPage(page) # 最后输出文件 outStream = file(a + u'shuchuwenjian.pdf', 'wb') output.write(outStream) aa.close() bb.close() outStream.close() os.chdir(a) os.remove("shuiyin.pdf") # 删除水印文件 os.remove(old_file_name) # 删除上次源文件 os.rename('shuchuwenjian.pdf', old_file_name) # 添加水印的文件改名
def main(Anim_name, Frames): output = PdfFileWriter( ) # Creating an object where pdf pages are appended to for F in Frames: # For each frame, collect it in the output append_pdf(PdfFileReader(open(F + '.pdf', 'rb')), output) # Append these pages # Write all output to a single pdf output.write(open(Anim_name, "wb")) # Writing all the collected pages to a file
class PdfWriter(object): def __init__(self, outputFile): self.outputWriter = PdfFileWriter() self.__outputFile = outputFile def savePdf(self): outputStream = file(self.__outputFile, "wb") self.outputWriter.write(outputStream) outputStream.close() def addPage(self, page): self.outputWriter.addPage(page)
def make_all_pdf(name='', surname1='', dni=''): os.chdir('/Users/poooool/Desktop/RBF') make_pdf_1(name,surname1,dni) make_pdf_2(dni) make_pdf_3(name,surname1,dni) packet = StringIO.StringIO() can = canvas.Canvas(packet, pagesize=letter) can.drawString(0, 0, ' ') can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF os.chdir('/Users/poooool/Desktop/RBF/') existing_pdf = PdfFileReader(file("contracte_voluntariat.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page #print 'hola '+str(existing_pdf.getNumPages()) for i in range(2,existing_pdf.getNumPages()-1): page = existing_pdf.getPage(i) page.mergePage(new_pdf.getPage(0)) output.addPage(page) filename = str(dni)+'file'+str(i)+'.pdf' # finally, write "output" to a real file outputStream = file(filename, "wb") output.write(outputStream) outputStream.close() from PyPDF2 import PdfFileMerger pdfs = ['file0.pdf', 'file1.pdf', 'file2.pdf', 'file3.pdf','file4.pdf','file5.pdf'] merger = PdfFileMerger() for pdf in pdfs: merger.append(str(dni)+str(pdf)) #output_filename = "contractes-voluntariat-personal-"+str(name)+'-'+str(dni)+".pdf" output_filename = "contracte-voluntariat-animacio-"+str(name)+'-'+str(dni)+".pdf" os.chdir('/Users/poooool/Desktop/RBF/contractes-voluntariat-personal') #os.chdir('/Users/poooool/Desktop/RBF/animacio') merger.write(output_filename) os.chdir('/Users/poooool/Desktop/RBF/') for p in pdfs: call(['rm',str(dni)+str(p)])
class BookCollector(object): def __init__(self,jumpFirst=True,customTest=False,bottomHeight=20): """ jumpFirst = (True/False) jump to add number at the first page customTest=(True/False,message) / False Add page number -> True/Fale, Custom Message) """ self.jumpFirst=jumpFirst self.collector=PdfFileWriter() self.customTest=customTest self.pageCount=1 self.bottomHeight=bottomHeight def getNextPageNumber(self,mediaBox): pagetNumberBuffer = StringIO.StringIO() c = canvas.Canvas(pagetNumberBuffer) x,y,x1,y1 = mediaBox if isinstance(self.customTest,tuple): page,message=self.customTest if page: msg="Page: "+str(self.pageCount) +str(message) cha=len(msg) c.drawRightString(float(x1)-cha,self.bottomHeight," Page: "+str(self.pageCount)) c.drawString(float(x)+20,self.bottomHeight,str(message)) else: cha=len(str(message)) c.drawString(float(x)+20,self.bottomHeight,str(message)) else: c.drawRightString(float(x1)-50,self.bottomHeight,"Page: "+str(self.pageCount)) c.showPage() c.save() self.pageCount+=1 return pagetNumberBuffer def addPage(self,streamBuffer): if streamBuffer.len<1: return False mainPage=PdfFileReader(streamBuffer) for i in range(0,mainPage.getNumPages()): if self.jumpFirst: self.collector.addPage(mainPage.getPage(i)) self.jumpFirst=False else: numberPagerBuffer=self.getNextPageNumber(mainPage.getPage(i).mediaBox) numberPageReader=PdfFileReader(numberPagerBuffer) mainPage.getPage(i).mergePage(numberPageReader.getPage(0)) self.collector.addPage(mainPage.getPage(i)) def printToFile(self,fileName): outputStream = file(fileName, "wb") self.collector.write(outputStream) outputStream.close()
def combine_plots(pdf_list,out_file): output = PdfFileWriter() for pdf in pdf_list: if not os.path.isfile(pdf): continue pdfobj = PdfFileReader(file(pdf,"rb")) output.addPage(pdfobj.getPage(0)) out_stream = file(out_file,"wb") output.write(out_stream) out_stream.close()
def cut_pdf(ifile, ofile, leftsize, rightsize): pdf = PdfFileReader(file(ifile, 'rb')) out = PdfFileWriter() for page in pdf.pages: page.mediaBox.upperRight = rightsize page.mediaBox.lowerLeft = leftsize out.addPage(page) ous = file(ofile, 'wb') out.write(ous) ous.close()
def make_book(files, oname): from pyPdf import PdfFileWriter, PdfFileReader print 'Generating PDF booklet.' output = PdfFileWriter() for x in files: print x input1 = PdfFileReader(file(x, 'rb')) output.addPage(input1.getPage(0)) outputStream = file(oname, 'wb') output.write(outputStream) outputStream.close()
def create(self): """ Create PDF """ print "[+] The password is " + self.password writer = PdfFileWriter() input_pdf = PdfFileReader(file(self.filename, "rb")) for page in range(0, input_pdf.getNumPages()): writer.addPage(input_pdf.getPage(page)) output_stream = file(self.outputname, "wb") writer.encrypt(self.password, use_128bit=True) #max security by lib writer.write(output_stream) output_stream.close() print "[+] Done"
def _merge_pdf(self, documents, both_sides=False): print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" print "merge_pdf %s" % (both_sides) print "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" """Merge PDF files into one. :param documents: list of path of pdf files :returns: path of the merged pdf """ blankpdfstr = '''JVBERi0xLjQKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURl Y29kZT4+CnN0cmVhbQp4nDPQM1Qo5ypUMFAwALJMLU31jBQsTAz1LBSKUrnCtRTyuAIVAIcdB3IK ZW5kc3RyZWFtCmVuZG9iagoKMyAwIG9iago0MgplbmRvYmoKCjUgMCBvYmoKPDwKPj4KZW5kb2Jq Cgo2IDAgb2JqCjw8L0ZvbnQgNSAwIFIKL1Byb2NTZXRbL1BERi9UZXh0XQo+PgplbmRvYmoKCjEg MCBvYmoKPDwvVHlwZS9QYWdlL1BhcmVudCA0IDAgUi9SZXNvdXJjZXMgNiAwIFIvTWVkaWFCb3hb MCAwIDU5NSA4NDJdL0dyb3VwPDwvUy9UcmFuc3BhcmVuY3kvQ1MvRGV2aWNlUkdCL0kgdHJ1ZT4+ L0NvbnRlbnRzIDIgMCBSPj4KZW5kb2JqCgo0IDAgb2JqCjw8L1R5cGUvUGFnZXMKL1Jlc291cmNl cyA2IDAgUgovTWVkaWFCb3hbIDAgMCA1OTUgODQyIF0KL0tpZHNbIDEgMCBSIF0KL0NvdW50IDE+ PgplbmRvYmoKCjcgMCBvYmoKPDwvVHlwZS9DYXRhbG9nL1BhZ2VzIDQgMCBSCi9PcGVuQWN0aW9u WzEgMCBSIC9YWVogbnVsbCBudWxsIDBdCi9MYW5nKGZyLUZSKQo+PgplbmRvYmoKCjggMCBvYmoK PDwvQ3JlYXRvcjxGRUZGMDA1NzAwNzIwMDY5MDA3NDAwNjUwMDcyPgovUHJvZHVjZXI8RkVGRjAw NEMwMDY5MDA2MjAwNzIwMDY1MDA0RjAwNjYwMDY2MDA2OTAwNjMwMDY1MDAyMDAwMzMwMDJFMDAz NT4KL0NyZWF0aW9uRGF0ZShEOjIwMTIxMTAzMTQ0NzEwKzAxJzAwJyk+PgplbmRvYmoKCnhyZWYK MCA5CjAwMDAwMDAwMDAgNjU1MzUgZiAKMDAwMDAwMDIyNiAwMDAwMCBuIAowMDAwMDAwMDE5IDAw MDAwIG4gCjAwMDAwMDAxMzIgMDAwMDAgbiAKMDAwMDAwMDM2OCAwMDAwMCBuIAowMDAwMDAwMTUx IDAwMDAwIG4gCjAwMDAwMDAxNzMgMDAwMDAgbiAKMDAwMDAwMDQ2NiAwMDAwMCBuIAowMDAwMDAw NTYyIDAwMDAwIG4gCnRyYWlsZXIKPDwvU2l6ZSA5L1Jvb3QgNyAwIFIKL0luZm8gOCAwIFIKL0lE IFsgPEYyMjBCNDlBNjRDOEEzRDY3QUFBQzNCODAwNkI5RkRDPgo8RjIyMEI0OUE2NEM4QTNENjdB QUFDM0I4MDA2QjlGREM+IF0KL0RvY0NoZWNrc3VtIC83NzUwQTAyMEVFNEUwQkU5NjVGMzBDNTND MkRGNUFGNgo+PgpzdGFydHhyZWYKNzM2CiUlRU9GCg==''' writer = PdfFileWriter() blank_page = PdfFileReader( StringIO.StringIO(blankpdfstr.decode("base64"))).pages[0] streams = [ ] # We have to close the streams *after* PdfFilWriter's call to write() for document in documents: pdfreport = file(document, 'rb') streams.append(pdfreport) reader = PdfFileReader(pdfreport) for page in range(0, reader.getNumPages()): writer.addPage(reader.getPage(page)) if reader.getNumPages() % 2 and both_sides: writer.addPage(blank_page) merged_file_fd, merged_file_path = tempfile.mkstemp( suffix='.html', prefix='report.merged.tmp.') with closing(os.fdopen(merged_file_fd, 'w')) as merged_file: writer.write(merged_file) for stream in streams: stream.close() return merged_file_path
def _find_qrcodes(env, line_vals, inputpdf, test): """ Read the image and try to find the QR codes. The image should be currently saved as a png with the same name than :py:attr:`file_` (except for the extension). If QR Code is in wrong orientation, this method will return the given file. In case of test, the output dictonnary contains the image of the QR code too. :param env env: Odoo variable env :param dict line_vals: Dictionary that will hold values for import line :param inputpdf: PDFReader of the original pdf file :param bool test: Save the image of the QR code or not :returns: binary data of images, numpy arrays of pages to analyze further :rtype: list(str), list(np.array) """ # Holds the indexes of the pages where a new letter is detected letter_indexes = list() page_imgs = list() previous_qrcode = '' for i in xrange(inputpdf.numPages): output = PdfFileWriter() output.addPage(inputpdf.getPage(i)) page_buffer = BytesIO() output.write(page_buffer) page_buffer.seek(0) qrcode, img, test_data = _decode_page(env, page_buffer.read()) if (qrcode and qrcode.data != previous_qrcode) or i == 0: previous_qrcode = qrcode and qrcode.data letter_indexes.append(i) page_imgs.append(img) partner_id, child_id = decodeBarcode(env, qrcode) # Downsize png image for saving it in a preview field page_buffer.seek(0) with Image(blob=page_buffer.read(), resolution=150) as page_preview: page_preview.transform(resize='50%') preview_data = base64.b64encode(page_preview.make_blob('png')) values = { 'partner_id': partner_id, 'child_id': child_id, 'letter_image_preview': preview_data } if test: values['qr_preview'] = base64.b64encode(test_data) line_vals.append(values) letter_indexes.append(i + 1) return letter_indexes, page_imgs
def clean_meta_data(input_filename): output = PdfFileWriter() input_ = PdfFileReader( open(os.path.join(app.config['UPLOAD_FOLDER'], input_filename))) for page in range(input_.getNumPages()): output.addPage(input_.getPage(page)) os.remove(os.path.join(app.config['UPLOAD_FOLDER'], input_filename)) output_stream = file( os.path.join(app.config['UPLOAD_FOLDER'], input_filename), 'wb') output.write(output_stream) output_stream.close()
def remove_blank_pages(filename): reader = PdfFileReader(open(filename)) writer = PdfFileWriter() NUM_PAGES = reader.getNumPages() for i in range(NUM_PAGES): p = reader.getPage(i) if i < NUM_PAGES - 2: writer.addPage(p) new_pdf = open("new_pdfs/" + filename, 'w') writer.write(new_pdf) new_pdf.close()