def __call__(self, data, attachments=[], pages=None): self.rendered = {} for field, ctx in self.fields.items(): if "template" not in ctx: continue self.context = ctx kwargs = self.template_args(data) template = self.context["template"] try: rendered_field = template.render(**kwargs) except Exception as err: logger.error("%s: %s %s", field, template, err) else: # Skip the field if it is already rendered by filter if field not in self.rendered: self.rendered[field] = rendered_field filled = PdfFileReader(self.exec_pdftk(self.rendered)) for pagenumber, watermark in self.watermarks: page = filled.getPage(pagenumber) page.mergePage(watermark) output = PdfFileWriter() pages = pages or xrange(filled.getNumPages()) for p in pages: output.addPage(filled.getPage(p)) for attachment in attachments: output.addBlankPage().mergePage(attachment.pdf()) return output
def test_two_on_one_page(self): # Build a document with two pages pdf = PdfFileReader(self.get_pdf_stream()) output = PdfFileWriter() output.addPage(pdf.getPage(0)) output.addPage(pdf.getPage(0)) assert output.getNumPages() == 2 assert output.getPage(0).extractText().count('Test') == 1 buf = StringIO() output.write(buf) buf.seek(0) rv = self.app.get('/') self.assertEquals(rv.status_code, 200) rv = self.app.post('/handleform', data={'file': (buf, 'test.pdf')}) rv = self.combine_and_download(pages_sheet='2') pdf_download = PdfFileReader(StringIO(rv.data)) self.assertEquals(pdf_download.getPage(0).extractText().count('Test'), 2) self.assertEquals(pdf_download.getNumPages(), 1) self.clean_up()
def renderToPdf(envLL, filename, sizex, sizey): """Renders the specified Box2d and zoom level as a PDF""" basefilename = os.path.splitext(filename)[0] mergedpdf = None for mapname in MAPNIK_LAYERS: print 'Rendering', mapname # Render layer PDF. localfilename = basefilename + '_' + mapname + '.pdf'; file = open(localfilename, 'wb') surface = cairo.PDFSurface(file.name, sizex, sizey) envMerc = LLToMerc(envLL) map = mapnik.Map(sizex, sizey) mapnik.load_map(map, mapname + ".xml") map.zoom_to_box(envMerc) mapnik.render(map, surface) surface.finish() file.close() # Merge with master. if not mergedpdf: mergedpdf = PdfFileWriter() localpdf = PdfFileReader(open(localfilename, "rb")) page = localpdf.getPage(0) mergedpdf.addPage(page) else: localpdf = PdfFileReader(open(localfilename, "rb")) page.mergePage(localpdf.getPage(0)) output = open(filename, 'wb') mergedpdf.write(output) output.close()
def test_concat_pdf_files( self ): try: os.unlink( r"docs/c.pdf" ) except: pass self.assertTrue( True ) input_a = PdfFileReader( file( r"docs/a.pdf", 'rb' ) ) input_b = PdfFileReader( file( r"docs/b.pdf", 'rb' ) ) output = PdfFileWriter() for x in range( 0, input_a.getNumPages() ): output.addPage( input_a.getPage( x ) ) for x in range( 0, input_b.getNumPages() ): output.addPage( input_b.getPage( x ) ) outputStream = file( r"docs/c.pdf", 'wb' ) output.write( outputStream ) outputStream.close() count = input_a.getNumPages() + input_b.getNumPages() check = PdfFileReader( file( r"docs/c.pdf", 'rb' ) ) self.assertEqual( count, check.getNumPages() ) os.unlink( r"docs/c.pdf" )
def addPdfOverlay(self, pdf_doc, overlay_doc, output_doc, repeatOverlay=False): ''' Essentially merging two PDF documents. pdf_doc: (string) Path to PDF document. overlay_doc: (string) Path to PDF overlay document to overlay pdf_doc. repeatOverlay: (boolean) If set to True, page 1 of the overlay document is repeated for each page of the pdf_doc. (default: False) ''' pdf = PdfFileReader(file(pdf_doc, "rb")) pdf_overlay = PdfFileReader(file(overlay_doc, "rb")) page_cnt = pdf.numPages if repeatOverlay: overlay_pages = [pdf_overlay.getPage(0) for n in range(page_cnt)] else: overlay_pages = pdf_overlay.pages outputWriter = PdfFileWriter() for n in range(page_cnt): pg = pdf.getPage(n) pg.mergePage(overlay_pages[n]) outputWriter.addPage(pg) # Output outputStream = file(output_doc, "wb") outputWriter.write(outputStream) # Close streams outputStream.close() pdf.stream.close() pdf_overlay.stream.close()
def save_ready_template(request, id): person_print = FIO.objects.get(id=id) packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.drawString(284, 579, "{} {}".format(person_print.name, person_print.surname)) can.showPage() can.drawString(260, 494, "{} {}".format(person_print.name, person_print.surname)) can.showPage() can.save() # move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file("/Users/danilakimov/Desktop/template1.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) page = existing_pdf.getPage(1) page.mergePage(new_pdf.getPage(1)) output.addPage(page) # finally, write "output" to a real file outputStream = file("/Users/danilakimov/Desktop/readytemplate.pdf", "wb") output.write(outputStream) outputStream.close() return render(request, 'template_page.html', {'person_template': person_print})
def merge(fppath, bppath, outputpath, no_delete, fed_backwards): fpfile = PdfFileReader(open(fppath)) bpfile = PdfFileReader(open(bppath)) outputfile = PdfFileWriter() outputpages = [] for i in range(fpfile.getNumPages()): backpages = True try: outputpages.append(fpfile.getPage(i)) if backpages: if fed_backwards: outputpages.append(bpfile.getPage(bpfile.getNumPages() - i - 1)) else: outputpages.append(bpfile.getPage(i)) except IndexError: backpages = False if not no_delete: outputpages = [page for page in outputpages if page.extractText() != ''] [outputfile.addPage(page) for page in outputpages] outputfile.write(open(os.path.expanduser(outputpath), 'w'))
def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}): packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont(font['name'], font['size']) for i in context: can.drawString(i['x'], i['y'], i['value']) can.save() # move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file(self.path, "rb")) output = PdfFileWriter() # merge the new file with the existing page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) # finally, write "output" to a real file outputStream = file(self.destination, "wb") output.write(outputStream) outputStream.close() return True
def pdf_watermark_fast_first_page(self, pathname, Wm_f, wt1='',**kwargs): try : url_watermark=kwargs['url_wtm'] except:pass from pyPdf import PdfFileWriter, PdfFileReader import StringIO from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.drawString(10, 100, url_watermark) can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file(pathname, "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) # finally, write "output" to a real file outputStream = file(Wm_f, "wb") # import sys;sys.setrecursionlimit(11500) output.write(outputStream) outputStream.close() return Wm_f
def add_guides(self): pdf_in = PdfFileReader(open('sig.pdf', 'rb')) pdf_out = PdfFileWriter() for i in xrange(pdf_in.getNumPages()): page = pdf_in.getPage(i) if not i: guides = StringIO() if self.args.longarm: create_pdf( guides, a4lwidth_pt, a4lheight_pt, generate_longarm()) else: if self.args.a5: w, h = a5width_pt, a5height_pt else: w, h = a4lwidth_pt, a4lheight_pt create_pdf(guides, w, h, generate_shortarm( self.args.a5, bool(self.args.signature))) pdf_guides = PdfFileReader(guides) page.mergePage(pdf_guides.getPage(0)) pdf_out.addPage(page) pdf_out.write(open('sigs.pdf', 'wb'))
def duplicated_pdf(stream): """Creates a duplicated pdf, from html stream (A.K.A. StringIO)""" o_text = "<center><h3>-- Original --</h3></center>" c_text = "<center><h3>-- Duplicado --</h3></center>" pdf_conv = html_to_pdf.HTMLToPDFConverter() original = PdfFileReader(StringIO(pdf_conv.convert(stream, o_text, o_text))) stream.seek(0) copy = PdfFileReader(StringIO(pdf_conv.convert(stream, c_text, c_text))) out = PdfFileWriter() for n in xrange(0, original.getNumPages()): out.addPage(original.getPage(n)) for n in xrange(0, copy.getNumPages()): out.addPage(copy.getPage(n)) encoded_pdf = StringIO() out.write(encoded_pdf) encoded_pdf.seek(0) encoded_pdf = encoded_pdf.read() return encoded_pdf
def pdf(coursesid,examsid): ''' Creates a blank PDF of this exam ''' # TODO: Obviously fix this up to generate actual PDFs; this is just a proof of concept from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from pyPdf import PdfFileWriter, PdfFileReader from io import BytesIO output = BytesIO() p = canvas.Canvas(output, pagesize=letter) p.drawString(100, 100, 'Hello') p.save() output.seek(0) new_pdf = PdfFileReader(output) existing_pdf = PdfFileReader(open('/home/treece/src/web/bubbleck/res/Template.pdf', 'rb')) out = PdfFileWriter() page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) out.addPage(page) a = BytesIO() pdf_out = out.write(a) response = make_response(pdf_out) response.headers['Content-Disposition'] = "filename='sakulaci.pdf" response.mimetype = 'application/pdf' return response
class Packet(object): def __init__(self, results_pdf): self.results_pdf = PdfFileReader(open(results_pdf, "rb")) self.num_pages = self.results_pdf.getNumPages() self.result_list = self.result_string.split('\n') self.entry_start = 0 @property def num_rounds(self): first_entry = self.results_pdf.getPage(0).extractText().split('\n')[0] i = len(first_entry)-1 while i >= 0: if first_entry[i] in digits: return int(first_entry[i]) i-=1 @property def result_string(self): result_string = "" for i in range(0, self.num_pages): result_string+=self.results_pdf.getPage(i).extractText() return result_string def next_entry(self): school_code = school_codes(self.result_list[self.entry_start]) start, end = self.entry_start+1, self.entry_start+self.num_rounds+2 rest = self.result_list[start:end] self.entry_start+=self.num_rounds+2 return [school_code] + rest
def add_footer_pdf(in_fname, op_fname, imgPath): from pyPdf import PdfFileWriter, PdfFileReader from reportlab.pdfgen import canvas from StringIO import StringIO output = PdfFileWriter() # Using ReportLab to insert image into PDF imgTemp1 = StringIO() imgDoc1 = canvas.Canvas(imgTemp1) # Draw image on Canvas and save PDF in buffer imgDoc1.drawImage(imgPath, 210, 20, 155, 35) imgDoc1.save() overlay1 = PdfFileReader(StringIO(imgTemp1.getvalue())).getPage(0) in_file = PdfFileReader(file(in_fname,"rb")) n_pg = in_file.getNumPages() - 1 page = in_file.getPage(0) output.addPage(page) # Use PyPDF to merge the image-PDF into the template for i in range(n_pg): page = in_file.getPage(i+1) page.mergePage(overlay1) output.addPage(page) #Save the result outputStream = file(op_fname,"w") output.write(outputStream) outputStream.close()
def make_Cert(code, redeem_for): packet = StringIO.StringIO() # create a new PDF with Reportlab can = canvas.Canvas(packet, pagesize=letter) can.setFont('Helvetica', 32) can.drawString(280,540, code) can.setFont('Helvetica', 32) can.drawString(220,300, redeem_for) can.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # read your existing PDF existing_pdf = PdfFileReader(file("cert.pdf", "rb")) output = PdfFileWriter() # add the "watermark" (which is the new pdf) on the existing page page = existing_pdf.getPage(0) page.mergePage(new_pdf.getPage(0)) output.addPage(page) # finally, write "output" to a real file outputStream = file(redeem_for.replace(" ", "_") + "_cert.pdf", "wb") output.write(outputStream) outputStream.close()
def render(self): output = PdfFileWriter() base1 = "%s/lib/%s" % (path.dirname(__file__), "kfza_base.pdf") base1 = open(base1, 'rb') b1_pdf = PdfFileReader(base1) wm = b1_pdf.getPage(0) p1 = PdfFileReader(self.generate_page_one()) page1 = p1.getPage(0) page1.mergePage(wm) output.addPage(page1) bpdf = "%s/lib/%s" % (path.dirname(__file__), self.base_pdf) with open(bpdf, 'rb') as pdf: pf = PdfFileReader(pdf) if pf.isEncrypted: pf.decrypt('') for page in range(pf.getNumPages()): output.addPage(pf.getPage(page)) if self.context.course.extra_questions: b1_pdf = PdfFileReader(base1) wm = b1_pdf.getPage(0) p1 = PdfFileReader(self.generate_page_one()) page1 = p1.getPage(1) page1.mergePage(wm) output.addPage(page1) ntf = TemporaryFile() output.write(ntf) ntf.seek(0) base1.close() return ntf
def generate(donor): os.system('mkdir -p output') donor_url = donor.replace(' ','%20') page1 = 'output/%s1' % (donor.replace(' ','-').lower()) page2 = 'output/%s2' % (donor.replace(' ','-').lower()) combined = 'output/%s.pdf' % (donor.replace(' ','-').lower()) if os.path.exists(combined): return os.system('cp "%s" "%s.svg"' % (page1_svg, page1)) os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1)) os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1)) os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1)) os.system('cp "%s" "%s.svg"' % (page2_svg, page2)) os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2)) os.system('inkscape --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2)) os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2)) # Merge pages input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb')) input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb')) output = PdfFileWriter() output.addPage(input1.getPage(0)) output.addPage(input2.getPage(0)) outputStream = file(combined, 'wb') output.write(outputStream) outputStream.close() sleep(2)
def reshuffle(output_directory, input_file, filename): output_pdf = PdfFileWriter() with open(input_file, 'rb') as readfile: input_pdf = PdfFileReader(readfile) total_pages = input_pdf.getNumPages() if total_pages == 7: print filename, if "hardest_jet_phi_all_linear" in filename or "hardest_jet_eta_all_linear" in filename or "hardest_jet_pT_all_linear" in filename or "hardest_jet_pT_jec_all_linear" in filename or "area" in filename or "jec" in filename or "pfc_neutral_0_100_pT" in filename or "pfc_charged_0_100_pT" in filename or "pfc_neutral_0_5_pT" in filename or "pfc_charged_0_5_pT" in filename: output_pdf.addPage(input_pdf.getPage(4)) output_pdf.addPage(input_pdf.getPage(5)) output_pdf.addPage(input_pdf.getPage(6)) print "85" else: output_pdf.addPage(input_pdf.getPage(5)) output_pdf.addPage(input_pdf.getPage(4)) output_pdf.addPage(input_pdf.getPage(6)) print "150" for i in range(0, 4): output_pdf.addPage(input_pdf.getPage(i)) with open(output_directory + filename, "wb") as writefile: output_pdf.write(writefile)
def add_terms_and_conditions(self, ids, original_report_pdf, original_report): terms_and_conditions_decoded = False default_terms_and_conditions_decoded = False user = self.env['res.users'].browse(self._uid) # todo change user language to report language (client language) language_field = original_report.terms_conditions_language_field model = original_report.model object = self.env[model].browse(ids) localdict = {'o': object} eval('document_language = o.%s' % language_field, localdict, mode="exec", nocopy=True) document_language = localdict.get('document_language', self._context.get('lang')) company = object.company_id # todo check language terms_and_conditions_list = company.terms_and_conditions for terms_and_conditions in terms_and_conditions_list: if terms_and_conditions.language == document_language: terms_and_conditions_decoded =\ base64.decodestring(terms_and_conditions.datas) if terms_and_conditions.language == 'default': default_terms_and_conditions_decoded = \ base64.decodestring(terms_and_conditions.datas) if not terms_and_conditions_decoded: terms_and_conditions_decoded = \ default_terms_and_conditions_decoded or False if terms_and_conditions_decoded: writer = PdfFileWriter() stream_original_report = StringIO(original_report_pdf) reader_original_report = PdfFileReader(stream_original_report) stream_terms_and_conditions = StringIO(terms_and_conditions_decoded) reader_terms_and_conditions = PdfFileReader( stream_terms_and_conditions) for page in range(0, reader_original_report.getNumPages()): writer.addPage(reader_original_report.getPage(page)) for page in range(0, reader_terms_and_conditions.getNumPages()): writer.addPage(reader_terms_and_conditions.getPage(page)) stream_to_write = StringIO() writer.write(stream_to_write) combined_pdf = stream_to_write.getvalue() return combined_pdf else: return original_report_pdf
def slice(self, ifile, ofile=None, marginv=0, marginh=0, columnwidth=0, centerwidth=0, scale=0.9): output = PdfFileWriter() input = PdfFileReader(file(ifile, "rb")) # print the title of document1.pdf print "title = %s" % (input.getDocumentInfo().title) print "Processing page: " for i in xrange(input.getNumPages()): print i+1 # add left column as page page = PageObject.createBlankPage(input) page.mergePage(input.getPage(i)) if columnwidth != 0 and centerwidth != 0: page.mediaBox.upperRight = ( page.mediaBox.getUpperLeft_x() + marginh + columnwidth, page.mediaBox.getUpperRight_y() - marginv ) else: page.mediaBox.upperRight = ( page.mediaBox.getUpperRight_x() / 2, page.mediaBox.getUpperRight_y() - marginv ) page.mediaBox.lowerLeft = ( page.mediaBox.getLowerLeft_x() + marginh, page.mediaBox.getLowerLeft_y() + marginv, ) page.scale(scale, scale) output.addPage(page) # add right column as page page = PageObject.createBlankPage(input) page.mergePage(input.getPage(i)) if columnwidth != 0 and centerwidth != 0: page.mediaBox.lowerLeft = ( page.mediaBox.getLowerLeft_x() + marginh + columnwidth + centerwidth, page.mediaBox.getLowerLeft_y() + marginv, ) else: page.mediaBox.lowerLeft = ( page.mediaBox.getUpperRight_x() / 2, page.mediaBox.getLowerLeft_y() + marginv, ) page.mediaBox.upperRight = ( page.mediaBox.getUpperRight_x() - marginh, page.mediaBox.getUpperRight_y() - marginv ) page.scale(scale, scale) output.addPage(page) # finally, write "output" if ofile is not None: outputStream = file(ofile, "wb") else: outputStream = file(PdfSlicer.getOutName(ifile), "wb") output.write(outputStream) outputStream.close()
def pdf2text(source_pdf,target_pages): pdf = PdfFileReader(file(source_pdf, "rb")) text_string = '' try: #loop over pages for page in target_pages: text_string += pdf.getPage(int(page-1)).extractText() except: #just one page text_string += pdf.getPage(int(target_pages-1)).extractText() return text_string
def test_watermark(self): # Make sure our Test string is available in the original document pdf = PdfFileReader(self.get_pdf_stream()) assert 'Test' in pdf.getPage(0).extractText() assert 'TEST_WATERMARK' not in pdf.getPage(0).extractText() rv = self.combine_and_download(text_overlay='TEST_WATERMARK') pdf_download = PdfFileReader(StringIO(rv.data)) self.assert_('Test' in pdf_download.getPage(0).extractText()) self.assert_('TEST_WATERMARK' in pdf_download.getPage(0).extractText()) self.clean_up()
def merge_pdf(url_list): # Download each PDF and merge them into one giant PDF, post this giant PDF to anonfiles.com, add URL to scraperwiki database output = PdfFileWriter() for url in url_list: if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/083.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/83.pdf" url[1] = "83.pdf" if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/039.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/39.pdf" url[1] = "39.pdf" if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/021.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/21.pdf" url[1] = "21.pdf" if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/016S.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/16S.pdf" url[1] = "16S.pdf" if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/015.pdf": url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/15.pdf" url[1] = "15.pdf" pdf_file = os.system("wget %s" % url[0]) input1 = PdfFileReader(file('/tmp/%s' % url[1], "rb")) numPages = input1.getNumPages() print "number of pages = %s" % (numPages) page1 = input1.getPage(0) page2 = input1.getPage(1) output.addPage(page1) output.addPage(page2) if numPages == 3: page3 = input1.getPage(2) output.addPage(page3) final_page_count = output.getNumPages() print "Number of Pages in Final = %s" % (final_page_count) outputStream = file("/tmp/bus.pdf", "wb") output.write(outputStream) outputStream.close() reply = os.system('curl -kF "[email protected];filename=bus.pdf" https://anonfiles.com/api/hotlink -o "reply.txt"') with open('reply.txt', 'r') as f: read_data = f.read() data_dict = { 'Title':'Link to COTA Bus Schedule', 'URL':read_data, } scraperwiki.sqlite.save(unique_keys=['Title', 'URL'], data=data_dict)
def run(self): def getSrcDim(srcPage): return (float(srcPage.mediaBox.getWidth()), float(srcPage.mediaBox.getHeight())) def getDestDim(): if self.opts.orientation == const.PORTRAIT: return self.opts.size elif self.opts.orientation == const.LANDSCAPE: return (self.opts.size[1], self.opts.size[0]) def getScale(srcPage): destWidth, destHeight = getDestDim() return (getSrcDim(srcPage)[const.WIDTH]/float(destWidth)) def getScaledDestDim(srcPage): return [x * int(getScale(srcPage)) for x in getDestDim()] reader = PdfFileReader(file(self.infile, "rb")) writer = PdfFileWriter( documentInfo=reader.getDocumentInfo(), authors=["Vimala"]) #self.opts.count srcPage = reader.getPage(0) height = getSrcDim(srcPage)[const.HEIGHT] totalHeight = self.opts.count * height destPage = writer.addBlankPage(*getScaledDestDim(srcPage)) print totalHeight fitScale = getScaledDestDim(srcPage)[const.HEIGHT] / float(totalHeight) print fitScale srcPage.scale(fitScale, fitScale) #scale = getScale(srcPage) #srcPage.scale(scale, scale) destPage.mergeTranslatedPage(srcPage, 0, height * 2 - .2 * height) srcPage = reader.getPage(1) srcPage.scale(fitScale, fitScale) destPage.mergeTranslatedPage(srcPage, 0, height - .1 * height) srcPage = reader.getPage(3) srcPage.scale(fitScale, fitScale) destPage.mergeTranslatedPage(srcPage, 0, 0) #import pdb;pdb.set_trace() writer.write(open(self.outfile, "wb"))
def doc_overlay(request, document_uuid, lot_number, qrcode=True): report = Report.objects.get(lot_number=lot_number) document = Document.objects.get(uuid=document_uuid) response = HttpResponse(content_type='application/pdf') response['Content-Disposition'] = 'filename="inspection_report.pdf"' outputPDF = PdfFileWriter() packet = StringIO() # read your existing PDF f = urlopen(Request(document.file.url)).read() mem = StringIO(f) existing_pdf = PdfFileReader(mem) pages = existing_pdf.getNumPages() first_page = existing_pdf.getPage(0) width = float(first_page.mediaBox.getWidth()) height = float(first_page.mediaBox.getHeight()) # create a new PDF with Reportlab p = canvas.Canvas(packet, pagesize=letter) #p.setFillColorRGB(255,255,255) #p.rect(0*mm, 271*mm, 205*mm, 12*mm, fill=1, stroke=0) p.setFillColorRGB(0,0,0) p.setFont("Helvetica", 7) p.drawCentredString(width/2.0,height-9.0, "%s LOT # %s / %s (doc# %s)" % (settings.PDF_COMPANY_SHORT_NAME, report.lot_number, str(report.created_at.date()), document.uuid)) barcode = createBarcodeDrawing('QR', value="%s%s" % (request.META['HTTP_HOST'], report.get_absolute_url())) barcode.drawOn(p,175*mm, 10*mm) p.save() #move to the beginning of the StringIO buffer packet.seek(0) new_pdf = PdfFileReader(packet) # add the "watermark" (which is the new pdf) on the existing page for x in range(0,pages): page = existing_pdf.getPage(x) page.mergePage(new_pdf.getPage(0)) outputPDF.addPage(page) # finally, write "output" to a real file outputPDF.write(response) #f.close() action.send(request.user, verb="viewed document", action_object=document, target=report) return response
def ProcessPDF ( filename, largeformatsize ): """ Open a PDF to perform a page count and check for corrupt files Count small pages and large pages as defined above. """ pdf_count = { "npages":0, "nlargepages":0, "nsmallpages":0, } try: #Open file filestream = file(filename, "rb") #load into pypdf pdfFile = PdfFileReader(filestream) #First access into pdf contents # Raises decryption/security exceptions here npages = pdfFile.getNumPages() except IOError: err = '<{}> :: Could not open file. Check permissions?'.format(filename) return False, err except Exception as e: err = '<{}> :: {}'.format(filename, e) return False, err for ii in range(npages): pdf_count["npages"] += 1 ''' Calculate page dimensions and pricing category Dimensions are returned by pyPDF in Points (72 points = 1 inch) See: http://en.wikipedia.org/wiki/Point_(typography) http://en.wikipedia.org/wiki/Paper_size ''' width = (pdfFile.getPage(ii).artBox.getUpperRight_x()/72 - pdfFile.getPage(ii).artBox.getLowerLeft_x()/72) height = (pdfFile.getPage(ii).artBox.getUpperRight_y()/72 - pdfFile.getPage(ii).artBox.getLowerLeft_y()/72) if (width * height) > largeformatsize: pdf_count["nlargepages"] += 1 else: pdf_count["nsmallpages"] += 1 return True, pdf_count
def pdf(request, region, name, period_name): """Generate and return a PDF for the given tax form.""" company = request.company form = registry.get_form(region, name) period = get_period(period_name, None) if form is None or period is None: raise Http404 filing = form.tally(company, period) c = canvas.Canvas("hello.pdf") for spec in form.pdf_fields: x, y, name = spec[:3] value = getattr(filing.pages[0], name, None) if value is None: value = u'' if isinstance(value, Decimal): dollars, cents = unicode(value).split('.') c.drawString(x - 8 - c.stringWidth(dollars), y, dollars) c.drawString(x + 4, y, cents) elif len(spec) > 3: value = unicode(value) step = spec[3] for i, char in enumerate(value): c.drawString(x + i * step, y, char) else: value = unicode(value) c.drawString(x, y, value) c.showPage() datadir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data') pdfpath = os.path.join(datadir, form.filename) taxform = PdfFileReader(file(pdfpath, 'rb')) rendering = PdfFileReader(StringIO(c.getpdfdata())) output = PdfFileWriter() watermark = rendering.getPage(0) page1 = taxform.getPage(0) page1.mergePage(watermark) output.addPage(page1) pdfdata = StringIO() output.write(pdfdata) return HttpResponse(pdfdata.getvalue(), content_type='application/pdf')
def AddWatermark(self,watermark,filein,fileout): #Use reportlab to create a PDF that will be used #as a watermark on another PDF. c= canvas.Canvas("temp_watermark.pdf") c.setFont("Courier", 60) #This next setting with make the text of our #watermark gray, nice touch for a watermark. c.setFillGray(0.5,0.5) #Set up our watermark document. Our watermark #will be rotated 45 degrees from the direction #of our underlying document. c.saveState() c.translate(500,100) c.rotate(45) c.drawCentredString(0, 0, "A WATERMARK!") c.drawCentredString(0, 300, "A WATERMARK!") c.drawCentredString(0, 600, "A WATERMARK!") c.restoreState() c.save() #Read in the PDF that will have the PDF applied to it. output = PdfFileWriter() input1 = PdfFileReader(file(filein, "rb")) #Just to demo this function from pyPdf. #If the PDF has a title, this will print it out. print "title = %s" % (input1.getDocumentInfo().title) #Open up the orgininal PDF. page1 = input1.getPage(0) #Read in the file created above by ReportLab for our watermark. twatermark = PdfFileReader(file("temp_watermark.pdf", "rb")) #Apply the watermark by merging the two PDF files. page1.mergePage(twatermark.getPage(0)) #Send the resultant PDF to the output stream. output.addPage(page1) #Just to demo this function from pyPdf. #Return the number of pages in the watermarked PDF. print "watermarked_pdf.pdf has %s pages." % input1.getNumPages() #write the output of our new, watermarked PDF. outputStream = file(fileout, "wb") output.write(outputStream) outputStream.close() os.remove("temp_watermark.pdf") os.remove(filein)
def main(argv = None): """ funcao para pegar uma pagina de um pdf argumentos: nome_do_arquivo_de_entrada nome_do_arquivo_de_saida numero_da_pagina_inicial [numero_da_pagina_final] """ if argv is None: argv = sys.argv[1:] output = PdfFileWriter() input = PdfFileReader(file(argv[0],"rb")) output.addPage(input.getPage(int(argv[2]))) if len(argv) >= 4: for i in range(int(argv[2])+1,int(argv[3])+1): output.addPage(input.getPage(i)) output.write(file(argv[1],"wb"))
def Merge(): from pyPdf import PdfFileWriter, PdfFileReader output_pdf = PdfFileWriter() with open(r'input.pdf', 'rb') as readfile: input_pdf = PdfFileReader(readfile) total_pages = input_pdf.getNumPages() for page in xrange(total_pages - 1, -1, -2): print page page1 = input_pdf.getPage(page) page2 = input_pdf.getPage(page-1) page1.mergePage(page2) output_pdf.addPage(page1) with open(r'output.pdf', "wb") as writefile: output_pdf.write(writefile)
def download_pdf(url): writer = PdfFileWriter() code = requests.get(url, stream=True).status_code if code != 404: remoteFile = urlopen(Request(url)).read() memoryFile = StringIO(remoteFile) pdfFile = PdfFileReader(memoryFile) for pageNum in xrange(pdfFile.getNumPages()): currentPage = pdfFile.getPage(pageNum) #currentPage.mergePage(watermark.getPage(0)) writer.addPage(currentPage) outputStream = open('pdf_folder/%s'%basename(url),"wb") writer.write(outputStream) outputStream.close() return (True) else: return (False)
def getDocData(Fdoc): from pyPdf import PdfFileReader import codecs f = PdfFileReader(file(Fdoc, "rb")) pages = f.getNumPages() pages = int(pages - 1) name = Fdoc.replace("input_PDF", "output_text")[:-4] + ".txt" #I want to replace test 2 with something dynamic so I am not writing over my files g = codecs.open(name, encoding='utf-8', mode='wb') #looping through the pages and putting the contents in to a text document l = 0 for t in range(0, pages): while l <= pages: pg = f.getPage(l) pgtxt = pg.extractText() l = l + 1 g.write(pgtxt)
def split_pages(input_filename, prefix=None): """ Splits up a PDF file into single page PDF files. Returns path string where resulting PDF files are located. It is the caller's responsibility to clean the disk when the files are no longer necessary. The best way to do it is to call result.rmtree() """ output_dir = None try: if prefix is None: # create a temporary directory output_dir = mkdtemp('donomo') prefix = os.path.join(output_dir, 'page-') # open PDF pdf_input = PdfFileReader(file(input_filename, 'rb')) # iterate over pages in the input PDF for i in xrange(pdf_input.getNumPages()): # get n-th page page = pdf_input.getPage(i) # create a one-page pdf writer pdf_output = PdfFileWriter() pdf_output.addPage(page) # save it in a new file page_filename = '%s%03d.pdf' % (prefix, i) page_filestream = file(page_filename, "wb") pdf_output.write(page_filestream) page_filestream.close() # return the directory name to the caller return os.path.dirname(prefix) except Exception, e: logging.error(str(e)) # delete a temporary directory along with all its contents if output_dir: rmtree(output_dir) raise
def createForm(info): luokka = info['Luokka'] if info['Koiran sukupuoli'] == 'uros': info['Uros'] = 'x' elif info['Koiran sukupuoli'] == 'narttu': info['Narttu'] = 'x' packet = StringIO.StringIO() c = canvas.Canvas(packet, pagesize=A4) for k in info.keys(): if k in placements[luokka].keys(): x, y = placements[luokka][k] y = y + offsets[luokka] text = info[k] c.drawString(x * cm, y * cm, text) c.save() packet.seek(0) new_pdf = PdfFileReader(packet) output = PdfFileWriter() output.addPage(new_pdf.getPage(0)) outputStream = file("/tmp/stamp.pdf", "wb") output.write(outputStream) outputStream.close() try: os.mkdir("esitaytetyt") except: pass print " ".join([ 'pdftk', "pohjat/%s.pdf" % luokka, 'stamp', '/tmp/stamp.pdf', 'output', 'esitaytetyt/%s.pdf' % info['Rekisterinumero'].replace('/', '-') ]) call([ 'pdftk', "pohjat/%s.pdf" % luokka, 'stamp', '/tmp/stamp.pdf', 'output', 'esitaytetyt/%s.pdf' % info['Rekisterinumero'].replace('/', '-') ])
def add_omr_marks(self, pdf_data, is_latest_document): # Documentation # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf # https://pythonhosted.org/PyPDF2/PdfFileReader.html # https://stackoverflow.com/a/17538003 # https://gist.github.com/kzim44/5023021 # https://www.blog.pythonlibrary.org/2013/07/16/ # pypdf-how-to-write-a-pdf-to-memory/ self.ensure_one() pdf_buffer = StringIO.StringIO() pdf_buffer.write(pdf_data) existing_pdf = PdfFileReader(pdf_buffer) output = PdfFileWriter() total_pages = existing_pdf.getNumPages() def lastpair(a): b = a - 1 if self.omr_single_sided or b % 2 == 0: return b return lastpair(b) # print latest omr mark on latest pair page (recto) latest_omr_page = lastpair(total_pages) for page_number in range(total_pages): page = existing_pdf.getPage(page_number) # only print omr marks on pair pages (recto) if self.omr_single_sided or page_number % 2 is 0: is_latest_page = is_latest_document and \ page_number == latest_omr_page marks = self._compute_marks(is_latest_page) omr_layer = self._build_omr_layer(marks) page.mergePage(omr_layer) output.addPage(page) out_buffer = StringIO.StringIO() output.write(out_buffer) return out_buffer.getvalue()
def pdf2Text(pdf): from pyPdf import PdfFileWriter, PdfFileReader with open("/tmp/temp_crawler.pdf", "wb") as file_pdf: file_pdf.write(pdf) try: pdf = PdfFileReader(file("/tmp/temp_crawler.pdf", "rb")) content = "" for i in range(0, pdf.getNumPages()): content += pdf.getPage(i).extractText() + "\n" content = u" ".join(content.replace(u"\xa0", u" ").strip().split()) except Exception: print "[ Error con el PDF ]" return " " return unidecode(content)
def _merge_pdf(self, documents): """Merge PDF files into one. :param documents: list of path of pdf files :returns: path of the merged pdf """ writer = PdfFileWriter() streams = [ ] # We have to close the streams *after* PdfFilWriter's call to write() for document in documents: pdfreport = file(document, 'rb') streams.append(pdfreport) reader = PdfFileReader(pdfreport) for page in range(0, reader.getNumPages()): writer.addPage(reader.getPage(page)) merged_file_fd, merged_file_path = tempfile.mkstemp( suffix='.pdf', prefix='report.merged.tmp.') with closing(os.fdopen(merged_file_fd, 'w')) as merged_file: writer.write(merged_file) for stream in streams: stream.close() return merged_file_path
def remove_pages(pdf_file, max_pages=1): output = PdfFileWriter() with open(pdf_file, 'r') as pdf: input = PdfFileReader(pdf) total_pages = input.getNumPages() for i in xrange(max_pages): if i >= total_pages: break p = input.getPage(i) output.addPage(p) with open(pdf_file + '.tmp', 'w') as pdf: output.write(pdf) os.remove(pdf_file) os.rename(pdf_file + '.tmp', pdf_file) return pdf_file
def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None): results = self._create_source(cr, uid, ids, data, report_xml, context) if results and len(results) == 1: return results[0] if results: if deferred: deferred.set_status(_('Concatenating single documents')) not_pdf = filter(lambda r: r[1] != 'pdf', results) if not_pdf: raise osv.except_osv(_('Error!'), _('Unsupported combination of formats!')) #if results[0][1]=='pdf': output = PdfFileWriter() for r in results: reader = PdfFileReader(StringIO(r[0])) for page in range(reader.getNumPages()): output.addPage(reader.getPage(page)) s = StringIO() output.write(s) return s.getvalue(), results[0][1] return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
def kesit(dosya_yolu , sayfa1 , sayfa2=0): """pdf dosyasının sayfa1'den sayfa2'ye kadar olan kısmını alır sayfa2 verilmesse sayfa1'den sonuna kadar alır """ try: kaynak = PdfFileReader(open(dosya_yolu, "rb")) islem = PdfFileWriter() if sayfa1<0: (-1)*sayfa1 if sayfa2==0: sayfa2=kaynak.getNumPages() if sayfa2<=sayfa1: sayfa2=sayfa1+1 hedef = open("data.pdf", "wb") for i in range(int(sayfa1),int(sayfa2)): islem.addPage(kaynak.getPage(i)) islem.write(hedef) hedef.close() print "»» pdf oluşturuldu" except: print "»» pdf oluşturulamadı"
def genfile(srcfile, desfile, startpage, endpage): """ 根据startpage跟endpage做pdf文件切分 :param srcfile: :param desfile: :param startpage: :param endpage: :return: """ output = PdfFileWriter() src = PdfFileReader(file(srcfile, "rb")) (filepath, filename) = os.path.split(desfile) if not os.path.exists(filepath): os.makedirs(filepath) des = file(desfile, "wb") for i in range(startpage - 1, endpage): output.addPage(src.getPage(i)) output.write(des) des.close() del src del des
def fisk_pdf(pdffile, directory): name = pdffile[:-4] g = open(os.path.join(directory, name, name + ".md"), "w") print("# Notes on: ", file = g) input = PdfFileReader(file(pdffile, "rb")) print("Number of pages %s" % input.getNumPages()) j = 0 for p in [input.getPage(i) for i in range(0,input.getNumPages())]: j = j + 1 output = PdfFileWriter() output.addPage(p) print("### Page " + str(j), file = g) imagefile = os.path.join(directory, name, "images", "file_" + str(j) + ".pdf") imagefilePNG = os.path.join(directory, name, "images", "file_" + str(j) + ".png") f = open(imagefile, "w") output.write(f) f.close() textfile = os.path.join(directory, name, "texts", "file_" + str(j) + ".txt") textfileASCII = os.path.join(directory, name, "texts", "file_" + str(j) + "_ascii.txt") cmd = "pdftotext " + imagefile + " " + textfile # extracts text from the pdffile os.system(cmd) cmd = "iconv -c -f utf8 -t ascii " + textfile + " > " + textfileASCII # ./texts/file_" + str(j) + ".txt" + " > ./texts/file_" + str(j) + "_ascii.txt" os.system(cmd) cmd = "convert -density 100 " + imagefile + " -quality 100 " + imagefilePNG os.system(cmd) print("![](./images/file_" + str(j) + ".png)", file = g) print("", file = g) print("### Text from page " + str(j), file = g) t = open(os.path.join(directory, name, "texts", "file_" + str(j) + "_ascii.txt"), "r") txt = t.read() txt = txt.replace('\n', ' ').replace('\r', '').replace('', '') n = 80 chunks = [txt[i:i+n] for i in range(0, len(txt), n)] for c in chunks: print(" " + c, file = g) print("", file = g) print("### Notes on page " + str(j), file = g) g.close()
def Split(d): args = d["args"] if len(args) < 2: Error("split needs at least two arguments") prefix, count = args[0], d["-n"] for filename in args[1:]: reader = PdfFileReader(open(filename, "rb")) numpages = reader.getNumPages() # Get a format string that allows the integer numbers to have # leading zeros so that they will sort in natural order. lt = int(ceil(log10(numpages))) assert lt > 0 fmt = "%%0%dd" % lt for i in range(reader.getNumPages()): output_file = prefix + (fmt % count) + ".pdf" writer = GetOutputWriter(output_file, d) output_stream = open(output_file, "wb") page = reader.getPage(i) Sentinel(d) writer.addPage(page) writer.write(open(output_file, "wb")) count += 1
def test1(self): "Test generating several 'n-up' docs, n = (m**2) / 2..." for path0 in ("samples/test-a4-l.pdf", "samples/test-a4-p.pdf"): for n in (2, 8, 18): outName = os.path.splitext(path0)[0] + "-%dup.pdf" % n path1 = os.path.join(".", outName) generateNup(path0, n, path1, verbose=False) # , dirs="UL") # assert output has correct number of pages input = PdfFileReader(file(path0, "rb")) np0 = input.getNumPages() input = PdfFileReader(file(path1, "rb")) np1 = input.getNumPages() self.assertEqual(np1, math.ceil(np0 / float(n))) # assert output page(s) has/have correct text content for pn in range(np1): page = input.getPage(pn) text = page.extractText().split() exp = group([str(num) for num in range(np0)], n)[pn] self.assertEqual(text, exp)
def append_file(out, input_file_name): print("Open %s" % input_file_name) input1 = PdfFileReader(file(input_file_name, "rb")) page = input1.getPage(0) upperleft_x = page.mediaBox.getUpperLeft_x() upperleft_y = page.mediaBox.getUpperLeft_y() upperright_x = page.mediaBox.getUpperRight_x() upperright_y = page.mediaBox.getUpperRight_y() # print(page.mediaBox) # print(page.mediaBox.getUpperLeft_x()) # print(page.mediaBox.getUpperLeft_y()) # print(page.mediaBox.getUpperRight_x()) # print(page.mediaBox.getUpperRight_y()) # print(page.mediaBox.lowerLeft) # print(page.mediaBox.upperLeft) # print(page.mediaBox.lowerRight) # print(page.mediaBox.upperRight) if backpage: # устанавливаем зону обрезки по оборотной стороне уведомления page.mediaBox.upperRight = ( float(page.mediaBox.getUpperRight_x()), float(page.mediaBox.getUpperRight_y()) - (float(page.mediaBox.getUpperLeft_y()) * 0.325)) page.mediaBox.lowerRight = ( float(page.mediaBox.getLowerRight_x()) * 0.7, float(page.mediaBox.getUpperRight_y()) - (float(page.mediaBox.getUpperLeft_y()) * 0.48)) else: # устанавливаем зону обрезки по основной стороне уведомления page.mediaBox.lowerRight = ( float(page.mediaBox.getLowerRight_x()) * 0.7, float(page.mediaBox.getUpperLeft_y()) - (float(page.mediaBox.getUpperLeft_y()) * 0.325)) #print(page.mediaBox) out.addPage(page) print('Croped and added to output file')
def test_rotation_different_to_unrotated(self): # Make sure our Test string is available in the original document pdf = PdfFileReader(self.get_pdf_stream()) assert 'Test' in pdf.getPage(0).extractText() rv = self.app.get('/') self.assertEquals(rv.status_code, 200) rv = self.app.post('/handleform', data={'file': (self.get_pdf_stream(), 'test.pdf')}) # Start build without rotation rv = self.combine_and_download() content_no_rotation = rv.data # Start build with rotation rv = self.combine_and_download(rotate='180') content = rv.data self.assert_(content_no_rotation != content) self.clean_up()
def _combine_pdf_files(self, tmp_folder_name, output_report): output_path = tmp_folder_name + output_report output_temp_path = tmp_folder_name + 'temp.pdf' cmd = """gs -q -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=%s \ -dBATCH %s*water*.pdf""" % (output_temp_path, tmp_folder_name) os.system(cmd) # remove the last empty page input_stream = PdfFileReader(file(output_temp_path, 'rb')) output_stream = PdfFileWriter() pagenum = input_stream.getNumPages() for i in range(pagenum - 1): page = input_stream.getPage(i) output_stream.addPage(page) out_stream = file(output_path, 'wb') try: output_stream.write(out_stream) finally: out_stream.close()
def parse_pdf(self, pdfname): if not os.path.exists(pdfname): print 'Missing API documentation, downloading from: %s' % pdfurl urllib.urlretrieve(pdfurl, pdfname) print 'Compiling API methods...' apidoc = PdfFileReader(file(pdfname, 'rb')) for p in range(0, apidoc.getNumPages()): doc = apidoc.getPage(p).extractText() mCall = re.search(r'<methodCall>.+</methodCall>', doc) mResp = re.search(r'<methodResponse>.+</methodResponse>', doc) section = None if mCall: xml = mCall.group() method = re.search(r'<methodName>(UDNS_\w+)</methodName>', xml) if not method: continue section = method.group(1) if re.search(r'[Cc]onnect', section): continue # exclude connection-related methods if section not in self.config._sections: self.config.add_section(section) patt = re.compile( r'<value><([^\s]+)>([^\s]+)</([^\s]+)></value>') match = re.findall(patt, xml) order = [] for m in match: key = '%s+%s' % m[0:2] self.config.set(section, key, m[2]) order.append(key) if order: self.config.set(section, 'order', ','.join(order)) if section and re.search(r'<fault>', doc): self.config.set(section, 'fault', True) if section and mResp: xml = mResp.group() record = re.search(r'<array>\s*<data>\s*<value>\s*<([_\w]+)>', xml) if record: self.config.set(section, 'array', record.group(1))
def print_danfe(inv): str_pdf = "" paths = [] if inv.nfe_version == '1.10': from pysped.nfe.leiaute import ProcNFe_110 procnfe = ProcNFe_110() elif inv.nfe_version == '2.00': from pysped.nfe.leiaute import ProcNFe_200 procnfe = ProcNFe_200() elif inv.nfe_version == '3.10': from pysped.nfe.leiaute import ProcNFe_310 procnfe = ProcNFe_310() file_xml = monta_caminho_nfe(inv.company_id, inv.nfe_access_key) if inv.state not in ('open', 'paid', 'sefaz_cancelled'): file_xml = os.path.join(file_xml, 'tmp/') procnfe.xml = os.path.join(file_xml, inv.nfe_access_key + '-nfe.xml') danfe = DANFE() danfe.logo = add_backgound_to_logo_image(inv.company_id) danfe.NFe = procnfe.NFe danfe.leiaute_logo_vertical = inv.company_id.nfe_logo_vertical danfe.protNFe = procnfe.protNFe danfe.caminho = "/tmp/" danfe.gerar_danfe() paths.append(danfe.caminho + danfe.NFe.chave + '.pdf') output = PdfFileWriter() s = StringIO() for path in paths: pdf = PdfFileReader(file(path, "rb")) for i in range(pdf.getNumPages()): output.addPage(pdf.getPage(i)) output.write(s) str_pdf = s.getvalue() s.close() return str_pdf
def pdf_watermark_fast(self, pathname, Wm_f, wt1='', **kwargs): try: url_watermark = kwargs['url_wtm'] except: pass from pyPdf import PdfFileWriter, PdfFileReader # fo=os.getcwd() # CurrentDir=os.path.dirname(os.path.realpath(__file__)) import watter_marker url_watermark2 = url_watermark.replace(".", "_") url_watermark2 = url_watermark2.replace("://", "__") # CurrentDir=os.path.dirname(os.path.realpath(__file__)).replace('\\','/') if wt1 == '': if not os.path.isfile(self.Watermarked_PDF_Dir + "/" + "watermarker_slow" + url_watermark2 + ".pdf"): wt1 = self.watermark_file(self.Watermarked_PDF_Dir + "/" + "watermarker_slow" + url_watermark2 + ".pdf", url_watermark, center_text=False) else: wt1 = self.Watermarked_PDF_Dir + "/" + "watermarker_fast.pdf" if True: watermark1 = PdfFileReader(file(wt1, 'rb')) else: wt1 = self.watermark_file(self.Watermarked_PDF_Dir + "/" + "watermarker_slow" + url_watermark2 + ".pdf", url_watermark, center_text=False) watermark1 = PdfFileReader(file(wt1, 'rb')) wtt = watermark1.getPage(0) watter_marker.op_w_input(pathname, wt1, Wm_f) # Wm_f is full address return Wm_f
def collate(self, remove_temp=True, remove_sources=False): from pyPdf import PdfFileWriter, PdfFileReader from svglib.svglib import svg2rlg from reportlab.graphics import renderPDF # Make temporary folder dest_dir, _ = os.path.split(self.dest) if not os.path.exists(dest_dir): raise RuntimeError, "output place %s d.n.e." % dest_dir temp_dir = dest_dir + '/tmp' if not os.path.exists(temp_dir): os.mkdir(temp_dir) # Fix SVG windows for PDFing temp_page = [ '%s/page%i.tmp' % (temp_dir, i) for i, _ in enumerate(self.sources) ] for s, d in zip(self.sources, temp_page): hack_svg_viewbox(s, d) # Generate single PDF pages for s in temp_page: drawing = svg2rlg(s) renderPDF.drawToFile(drawing, s, autoSize=1) # Concatenate the PDF pages into a single document output = PdfFileWriter() for s in temp_page: i = PdfFileReader(open(s, 'rb')) output.addPage(i.getPage(0)) del i fout = file(self.dest, 'wb') output.write(fout) fout.close() # Remove the temporary folder if remove_temp: shutil.rmtree(temp_dir) # Remove the source images if remove_sources: map(os.remove, self.sources) return True
def createForm(dogs, filename): packet = StringIO.StringIO() c = canvas.Canvas(packet, pagesize=A4) extraoffset = {'Luokka': 0} for i, info in enumerate(dogs): for k in info.keys(): if k in placements.keys(): if k in extraoffset: extra = extraoffset[k] else: extra = 0 x, y = placements[k] text = info[k] c.drawString(extra + x*cm, yoffset[i]*cm + y*cm, text) if i == 5: c.showPage() extraoffset['Luokka'] = 0.3*cm if len(dogs) < 7: c.showPage() c.save() packet.seek(0) new_pdf = PdfFileReader(packet) output = PdfFileWriter() output.addPage(new_pdf.getPage(0)) output.addPage(new_pdf.getPage(1)) outputStream = file("/tmp/stamp.pdf", "wb") output.write(outputStream) outputStream.close() try: os.mkdir("esitaytetyt") except: pass call(['pdftk', 'pohjat/koepoytakirja.pdf', 'multistamp', '/tmp/stamp.pdf', 'output', 'esitaytetyt/%s' % filename])
def writePDF(linkPaths): if "<type 'list'>" != str(type(linkPaths)): print "Invalid parameter passed.\n" return l = len(linkPaths) output = PdfFileWriter() for i in range(0,l): input1 = PdfFileReader(file("./Tmp/" + str(i+1) + ".pdf", "rb")) output.addPage(input1.getPage(0)) print("Generating newspaper...\n") dateObject = datetime.now() fileName = dateObject.strftime("%Y%m%d") fileName = "GDN " + fileName + ".pdf" outputStream = file(fileName, "wb") output.write(outputStream) outputStream.close() return
def merge_pdf(lpdf): """ Merge all PDF in the list and return the content as a File Object :param lpdf: List of PDF as File Object :type lpdf: list :return: return a file object :rtype: File Object """ fo_pdf = StringIO() ret = PdfFileWriter() for current_pdf in lpdf: if current_pdf is None: continue # We ensure we start at the begining of the file current_pdf.seek(0) tmp_pdf = PdfFileReader(current_pdf) for page in range(tmp_pdf.getNumPages()): ret.addPage(tmp_pdf.getPage(page)) # We store the content of the merge into a file object ret.write(fo_pdf) return fo_pdf
def cropNzoom(inputFile, pageNumber, zoomFactor): print "Cropping and scaling pdf" pageNumber = pageNumber - 1 outputFile = inputFile[:inputFile.rindex('.')] + '_' + str(pageNumber + 1) + 'test.pdf' output = PdfFileWriter() input1 = PdfFileReader(file(inputFile, "rb")) page = input1.getPage(pageNumber) page.scaleBy(zoomFactor) output.addPage(page) print "Saving cropped pdf as: " + outputFile[outputFile.rindex('\\') + 1:] outputStream = file(outputFile, "wb") output.write(outputStream) outputStream.close() return outputFile
def main(output_file, input_files): print "concat all files:" output = PdfFileWriter() total_pages = 0 for f in input_files: # expect filename as "*.pdf" if f[-4:] != ".pdf": print "skipped file: ", f continue else: input = PdfFileReader(file(f, 'rb')) num_pages = input.getNumPages() total_pages += num_pages print f, "->", str(num_pages) + "pages" for i in xrange(0, num_pages): output.addPage(input.getPage(i)) outputStream = file(output_file, 'wb') output.write(outputStream) print str(total_pages) + "pages written" outputStream.close()
def create(self, cr, uid, ids, datas, context=None): self.pool = pooler.get_pool(cr.dbname) checkoutType = self.pool.get('plm.checkout') output = PdfFileWriter() children = [] packed = [] checkouts = checkoutType.browse(cr, uid, ids) for checkout in checkouts: document = checkout.documentid if document.printout: if not document.id in packed: input1 = PdfFileReader( StringIO.StringIO( base64.decodestring(document.printout))) output.addPage(input1.getPage(0)) packed.append(document.id) pdf_string = StringIO.StringIO() output.write(pdf_string) self.obj = external_pdf(pdf_string.getvalue()) self.obj.render() pdf_string.close() return (self.obj.pdf, 'pdf')
def crop_image(box, pdf_page, filename, count): print "BOX" print box with open(filename + "_data/" + pdf_page, "rb") as in_f: input1 = PdfFileReader(in_f) output = PdfFileWriter() page = input1.getPage(0) x0 = float(box[0]) y0 = pdf_metadata.page_height - float(box[1]) x1 = float(box[2]) y1 = pdf_metadata.page_height - float(box[3]) page.trimBox.lowerLeft = (x0, y1) page.trimBox.upperRight = (x1, y0) page.cropBox.lowerLeft = (x0, y1) page.cropBox.upperRight = (x1, y0) output.addPage(page) with open("OCR_DATASET/" + filename + "_me_" + str(count), "wb") as out_f: output.write(out_f)
def get_pdf_pagesize(fn, page=0): f = open(fn, "rb") pdf = PdfFileReader(f) p = pdf.getPage(page) f.close() x0 = x1 = y0 = y1 = 0.0 for k in [ '/TrimBox', '/CropBox', '/MediaBox', '/ArtBox', ]: try: _dim = p[k] except: continue _x0, _y0, _x1, _y1 = map(float, _dim) y0 = min(y0, _y0) y1 = max(y1, _y1) x0 = min(x0, _x0) x1 = max(x1, _x1) width = x1 - x0 height = y1 - y0 return (width, height)
def MergePDF(filepath,outfile): output=PdfFileWriter() outputPages=0 pdf_fileName=getFileName(filepath) print '总的',pdf_fileName for each in pdf_fileName: if '.DS_Store' in each: continue # print '看看',os.path.dirname(each),'+', os.path.splitext(each.replace(os.path.dirname(each),'')) # print '单的',each # 读取源pdf文件 input = PdfFileReader(file(each, "rb")) # print 'input:',input # 如果pdf文件已经加密,必须首先解密才能使用pyPdf if input.isEncrypted == True: print 'input.isEncrypted',input.isEncrypted input.decrypt("map") # 获得源pdf文件中页面总数 pageCount = input.getNumPages() outputPages += pageCount print pageCount # 分别将page添加到输出output中 for iPage in range(0, pageCount): output.addPage(input.getPage(iPage)) print "All Pages Number:"+str(outputPages) # 最后写pdf文件 outputStream=file(filepath+outfile,"wb") output.write(outputStream) outputStream.close() print "finished"