예제 #1
0
def getannots(pdfannots, pageno, fh):
    global index
    annots = []
    input1 = PdfFileReader(fh)
    output = PdfFileWriter()
    targetPage = input1.getPage(pageno)
    newpath = "./images/"
    for pa in pdfannots:
        # print(pa)
        subtype = pa.get('Subtype')
        if subtype is not None and subtype.name not in ANNOT_SUBTYPES:
            continue
        print(subtype)
        if (subtype.name == "Ink" or subtype.name == "Square"):
            print("yes")
            print(type(pa.get('Rect')))
            coord = pa.get('Rect')
            targetPage.cropBox.lowerLeft = (coord[0], coord[1])
            targetPage.trimBox.lowerLeft = (coord[0], coord[1])
            targetPage.mediaBox.lowerLeft = (coord[0], coord[1])
            targetPage.cropBox.upperRight = (coord[2], coord[3])
            targetPage.trimBox.upperRight = (coord[2], coord[3])
            targetPage.mediaBox.upperRight = (coord[2], coord[3])
            pdf_bytes = io.BytesIO()
            output.addPage(targetPage)
            output.write(pdf_bytes)
            pdf_bytes.seek(0)
            img = Image(file=pdf_bytes, resolution=300)
            img.convert("png")
            if not os.path.exists(newpath):
                os.makedirs(newpath)
            img.save(filename=newpath + str(index) + ".png")

        colour = pa.get('C')

        contents = pa.get('Contents')

        def getcolour(colour):
            if (colour == [1.0, 0.90196, 0.0]):
                return "yellow"
            elif (colour == [0.26667, 0.78431, 0.96078]):
                return "blue"
            elif (colour == [0.92549, 0.0, 0.54902]):
                return "pink"
            elif (colour == [0.90196, 0.10588, 0.10588]):
                return "red"
            else:
                return "none"

        if contents is not None:
            contents = str(contents, 'iso8859-15')  #'utf-8'
            contents = contents.replace('\r\n', '\n').replace('\r', '\n')
        a = Annotation(index, pageno, subtype.name.lower(),
                       pa.get('QuadPoints'), pa.get('Rect'), contents,
                       getcolour(colour))
        annots.append(a)

        index += 1

    return annots
예제 #2
0
def joinpdf(folder=TMPFOLDER,startpage=INDEX,outputname='freecad.pdf'):
    "creates one pdf file from several others, following order from startpage"
    if VERBOSE: print ("Building table of contents...")
    f = open(folder+os.sep+startpage+'.html')
    html = ''
    for line in f: html += line
    f.close()
    html = html.replace("\n"," ")
    html = html.replace("> <","><")
    html = re.findall("<ul.*/ul>",html)[0]
    pages = re.findall('href="(.*?)"',html)
    pages.insert(1,startpage+".html")
    result = PdfFileWriter()
    for p in pages:
        if exists(p[:-5]):
            if VERBOSE: print ('Appending',p)
            try: inputfile = PdfFileReader(open(folder+os.sep+p[:-5]+'.pdf','rb'))
            except: print ('Unable to append',p)
            else:
                for i in range(inputfile.getNumPages()):
                    result.addPage(inputfile.getPage(i))
    outputfile = open(OUTPUTPATH + os.sep + outputname,'wb')
    result.write(outputfile)
    outputfile.close()
    if VERBOSE: print ('Successfully created',OUTPUTPATH,os.sep,outputname)
예제 #3
0
파일: jobs.py 프로젝트: sachazyto/nbproject
def split_chapters(*t_args):
    """
    Split a large pdf into chunks (i.e. chapters)
    """    
    if len(t_args)>0:
        args=t_args[0]
        if len(args)<1:  
            print "usage: utils_pdf split_chapters configfile"
            return 
        from pyPdf import PdfFileWriter, PdfFileReader
        f = open(args[0])
        P = json.loads(f.read())
        f.close()
        input = PdfFileReader(file(P["source"], "rb"))
        i0 =  P["first_chapter_index"]
        ends = P["chapters_ends"]
        for i in xrange(0, len(ends)): 
            ch_num = i0+i
            fmt = P["chapter_fmt"] % (ch_num, )
            output = PdfFileWriter()
            if not os.path.exists(P["outputdir"]): 
                os.mkdir( P["outputdir"])
            fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt)
            j0 = P["firstpage"] if i==0 else ends[i-1]
            for j in xrange(j0, ends[i]): 
                output.addPage(input.getPage(j))
            outputStream = file(fn_out, "wb")
            output.write(outputStream)
            outputStream.close()
            print "wrote %s" % (fn_out,)
예제 #4
0
def split_file(f, filename):
    """Split our file into 10-page sub-files and add those to the queue
    in order.
    """
    global file_queue
    curr_page = 0
    pages_left = f.getNumPages()
    log('Splitting file ' + filename + " with " + str(pages_left) + " pages.")
    while pages_left > 0:
        # Create the new file
        pages_processed = 0
        fname = filename[:-4] + '_' + str(curr_page) + '.pdf'
        output = PdfFileWriter()
        # Get 10 pages for it
        for i in range(curr_page, 10+curr_page):
            if pages_processed >= pages_left:
                break
            pages_processed += 1
            output.addPage(f.getPage(i))
        # Write and save file
        fout = file(fname, 'wb')
        output.write(fout)
        fout.flush()
        fout.close()
        file_queue.append(fname)
        curr_page += pages_processed
        pages_left -= pages_processed
    # Delete the file now that it's in pieces'
    os.remove(filename)
예제 #5
0
def cat(infilenames, outputfilename, verbose):
	inputs = []
	for infilename in infilenames:
		print infilename
		if not os.path.exists(infilename):
			halp()
			print ("error: "+infilename+" does not exist... exiting nao")
			sys.exit(2) # pdf file is no pdf file...
	if os.path.exists(outputfilename):
		halp()
		print ("error: "+outputfilename+" does already exist... exiting nao")
		sys.exit(2) # pdf file is no pdf file...
	try: 
		for i in infilenames:
			inputs.append(PdfFileReader(file(i, "rb")))
	except:
		halp()
		sys.exit(2) # pdf file is no pdf file...
	
	i = 0
	output = PdfFileWriter()

	for pdf in inputs:
		for pagenr in range(pdf.getNumPages()):
			output.addPage(pdf.getPage(pagenr))
			i=i+1
	outputStream = file(outputfilename, "wb")
	output.write(outputStream)
	outputStream.close()
	if verbose: print (str(i)+" pages processed")
  def concatenate_pdf(self,book_title):
		fileList = os.listdir(os.getcwd())
		num_chapters=0
		for i in range(1,40):
			if not fileList.__contains__(book_title+str(i)+".pdf"):
				num_chapters= i-1
				print "numero capitulos"+str(num_chapters)
				break

		print"Uniendo pfs..."
		output = PdfFileWriter()
		for i in range (1,num_chapters):
			f=open(book_title+str(i)+".pdf", "rb")
			num_pages=PdfFileReader(f).getNumPages()
			if num_pages==0:
				pdfOne = PdfFileReader(f).getPage(0)
				output.addPage(pdfOne)

			else:
				for a in range (0,num_pages):
					pdfOne = PdfFileReader(f).getPage(a)
					output.addPage(pdfOne)

		outputStream = file(r""+book_title+".pdf", "wb")
		output.write(outputStream)
		outputStream.close()

		print"Union finalizada"
		for i in range(1,num_chapters+1):
			print "borrando... capitulo: "+str(i)
			os.remove(book_title+str(i)+".pdf")
예제 #7
0
파일: views.py 프로젝트: nisiotis/dideman
def showpdf(request):
    sign = os.path.join(settings.MEDIA_ROOT, "signature.png")
    mimetypes.init()
    response = None
    if 'f' in request.GET:
        
        fr = open(os.path.join(settings.MEDIA_ROOT,'pdffiles','extracted','%s' % request.GET['f']), "rb")
        imgTemp = StringIO()
        imgDoc = canvas.Canvas(imgTemp)
        if request.GET['o'] == 'l':
            imgDoc.drawImage(sign, 529, 40, 290/2, 154/2)
        else:
            imgDoc.drawImage(sign, 70, 40, 290/2, 154/2)

        imgDoc.save()
        overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0)
        page = PdfFileReader(fr).getPage(0)
                            
        page.mergePage(overlay)
        pdf_out = PdfFileWriter()
        pdf_out.addPage(page)
        response = HttpResponse(mimetype='application/pdf')
        response['Content-Disposition'] = 'attachment; filename=%s' % request.GET['f']

        pdf_out.write(response)
            
    return response
예제 #8
0
def applica_firma(file_firma, pdf_file):

    # Using ReportLab to insert image into PDF
    imgTemp = StringIO()
    imgDoc = canvas.Canvas(imgTemp)

    buff = 50

    # Draw image on Canvas and save PDF in buffer
    imgPath = file_firma
    imgDoc.drawImage(imgPath, 200, 190 - buff, 200,
                     75)  ## at (399,760) with size 160x160

    p = imgDoc.beginPath()
    p.moveTo(200, 210 - buff)
    p.lineTo(400, 210 - buff)

    imgDoc.drawPath(p, stroke=1, fill=1)
    imgDoc.setFont("Helvetica", 8)
    imgDoc.drawString(260, 195 - buff, "(Firma del Richiedente)")
    imgDoc.save()

    # Use PyPDF to merge the image-PDF into the template
    page = PdfFileReader(file(pdf_file, "rb")).getPage(0)
    overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0)
    page.mergePage(overlay)

    #Save the result
    output = PdfFileWriter()
    output.addPage(page)
    output.write(file(pdf_file, "w"))
    def add_omr_marks(self, pdf_data, is_latest_document):
        # Documentation
        # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf
        # https://pythonhosted.org/PyPDF2/PdfFileReader.html
        # https://stackoverflow.com/a/17538003
        # https://gist.github.com/kzim44/5023021
        # https://www.blog.pythonlibrary.org/2013/07/16/
        #   pypdf-how-to-write-a-pdf-to-memory/
        self.ensure_one()

        pdf_buffer = StringIO.StringIO()
        pdf_buffer.write(pdf_data)

        existing_pdf = PdfFileReader(pdf_buffer)
        output = PdfFileWriter()
        total_pages = existing_pdf.getNumPages()

        # print latest omr mark on latest pair page (recto)
        latest_omr_page = total_pages // 2

        for page_number in range(total_pages):
            page = existing_pdf.getPage(page_number)
            # only print omr marks on pair pages (recto)
            if page_number % 2 is 0:
                is_latest_page = is_latest_document and \
                    page_number == latest_omr_page
                marks = self._compute_marks(is_latest_page)
                omr_layer = self._build_omr_layer(marks)
                page.mergePage(omr_layer)
            output.addPage(page)

        out_buffer = StringIO.StringIO()
        output.write(out_buffer)

        return out_buffer.getvalue()
예제 #10
0
def delete(filesandranges, outputfilename, verbose):

    for i in range(len(filesandranges)):
        if not os.path.exists(filesandranges[i]['name']):
            halp()
            print("error: " + filesandranges[i]['name'] +
                  " does not exist... exiting nao")
            sys.exit(2)  # pdf file is no pdf file...
    if os.path.exists(outputfilename):
        halp()
        print("error: " + filesandranges[i]['name'] +
              " does already exist... exiting nao")
        sys.exit(2)  # pdf file is no pdf file...

    output = PdfFileWriter()
    try:
        for pdf in filesandranges:
            print(pdf["name"])
            fiel = PdfFileReader(file(pdf["name"], "rb"))

            for pagenr in range(1, fiel.getNumPages() + 1):
                if (pagenr not in pdf["pages"]):
                    output.addPage(fiel.getPage(pagenr - 1))
#				else:
#					print ("skipping page nr: "+str(pagenr))
    except:
        halp()
        sys.exit(2)  # pdf file is no pdf file...
    if (not os.path.exists(outputfilename)):
        outputStream = file(outputfilename, "wb")
        output.write(outputStream)
        outputStream.close()
    else:
        print("file exists, discontinuing operation")
예제 #11
0
def cat(infilenames, outputfilename, verbose):
    inputs = []
    for infilename in infilenames:
        print infilename
        if not os.path.exists(infilename):
            halp()
            print("error: " + infilename + " does not exist... exiting nao")
            sys.exit(2)  # pdf file is no pdf file...
    if os.path.exists(outputfilename):
        halp()
        print("error: " + outputfilename +
              " does already exist... exiting nao")
        sys.exit(2)  # pdf file is no pdf file...
    try:
        for i in infilenames:
            inputs.append(PdfFileReader(file(i, "rb")))
    except:
        halp()
        sys.exit(2)  # pdf file is no pdf file...

    i = 0
    output = PdfFileWriter()

    for pdf in inputs:
        for pagenr in range(pdf.getNumPages()):
            output.addPage(pdf.getPage(pagenr))
            i = i + 1
    outputStream = file(outputfilename, "wb")
    output.write(outputStream)
    outputStream.close()
    if verbose: print(str(i) + " pages processed")
예제 #12
0
def split(files, verbose):

    for infilename in files:
        if not os.path.exists(infilename):
            halp()
            print("error: " + infilename + " does not exist... exiting nao")
            sys.exit(2)  # pdf file is no pdf file...
    inputs = []
    try:
        for i in files:
            inputs.append(PdfFileReader(file(i, "rb")))
    except:
        halp()
        print("there has been an error of unfortunate proportions")
        sys.exit(2)  # pdf file is no pdf file...
    i = 0
    j = 0
    for pdf in inputs:
        for pagenr in range(pdf.getNumPages()):
            output = PdfFileWriter()
            output.addPage(pdf.getPage(pagenr))
            (name, ext) = splitext(files[i])
            my_str = "%0" + str(math.ceil(math.log10(pdf.getNumPages()))) + "d"
            my_str = my_str % (pagenr + 1)
            print(name + "p" + my_str + ext)
            outputStream = file(name + "p" + my_str + ext, "wb")
            output.write(outputStream)
            outputStream.close()
            j = j + 1
        i = i + 1
    if verbose: print(str(j) + " pages in " + str(i) + " files processed")
예제 #13
0
    def run(self):
        """
        Run the report
        """
        self.doc = SimpleDocTemplate("test.pdf")
        self.story = [Spacer(1, 1*inch)]
        self.createLineItems()
 
        self.doc.build(self.story, onFirstPage=self.first_page, onLaterPages=self.later_page)
        print "finished!"
        
        with open("test.pdf", "rb") as f:
            print "merginig"
            new_pdf = PdfFileReader(f)
            existing_pdf = PdfFileReader(file("report_template.pdf", "rb"))
            output = PdfFileWriter()
            page = existing_pdf.getPage(0)
            page.mergePage(new_pdf.getPage(0))
            output.addPage(page)
            
            for page in range(new_pdf.getNumPages()-1):
                output.addPage(new_pdf.getPage(page+1))
            
            outputStream = file("final_report.pdf", "wb")
            output.write(outputStream)
            outputStream.close()
예제 #14
0
    def uploadFile(self):
        '''Store pdf in package, gets sides from pdf, if self.sides
        isn't empty
        '''
        filePath = self.path
        log.debug(u"uploadFile " + unicode(filePath))
        if not self.parentNode or not self.parentNode.package:
            log.error('something is wrong with the file')
        ## replace all non-digits and non-usefull stuff with ''
        self.pages = sub('[^\d,-]', '', self.pages)
        if self.pages != "":
            input = PdfFileReader(file(filePath, "rb"))
            lastPage = input.getNumPages() - 1 # last page
            toimport = PdfIdevice.__parseImportPages(self.pages, lastPage)
            log.debug("Parsed pages: " + str(toimport))
            output = PdfFileWriter()

            for page in toimport:
                output.addPage(input.getPage(page))
            log.debug("Found pages to import %s" % toimport)
            tmp = os.tmpnam() + ".pdf"
            log.debug('Tempfile is %s' % tmp)
            outputStream = file(tmp, "wb")
            output.write(outputStream)
            outputStream.close()
            resourceFile = Path(tmp)
            self.file = Resource(self, resourceFile)
            log.debug("Uploaded %s, pages: %s" % (tmp, toimport)) 
            os.remove(tmp)
            filePath = tmp
        resourceFile = Path(filePath)
        if resourceFile.isfile():
            self.file = Resource(self, resourceFile)
            log.debug(u"uploaded " + self.path)
def into_half(src, dst):
	_src = file(src, 'rb')
	_dst = file(dst, 'wb')
	
	
	
	input = PdfFileReader(_src)

	output = PdfFileWriter()
	
	for i in range(input.getNumPages()):
		
		p = input.getPage(i)
		q = copy.copy(p)
		q.mediaBox = copy.copy(p.mediaBox)

		#x1, x2 = p.mediaBox.lowerLeft
		#x3, x4 = p.mediaBox.upperRight
		(w, h) = p.mediaBox.upperRight
		
		print w, h
		
		p.mediaBox.upperRight = (w/2, h)
		q.mediaBox.upperLeft = (w/2, h)

		output.addPage(p)
		output.addPage(q)

	output.write(_dst)
	_src.close()
	_dst.close()
예제 #16
0
def concatenate_pdfs(output_fn, input_fns):
    from pyPdf import PdfFileWriter, PdfFileReader
    outfile = PdfFileWriter()
    for fn in input_fns:
        infile = PdfFileReader(open(fn, 'rb'))
        outfile.addPage(infile.getPage(0))
    outfile.write(open(output_fn, "wb"))
예제 #17
0
def renderToPdf(envLL, filename, sizex, sizey):
    """Renders the specified Box2d and zoom level as a PDF"""
    basefilename = os.path.splitext(filename)[0]
    mergedpdf = None
    for mapname in MAPNIK_LAYERS:
        print 'Rendering', mapname
        # Render layer PDF.
        localfilename = basefilename + '_' + mapname + '.pdf';
        file = open(localfilename, 'wb')
        surface = cairo.PDFSurface(file.name, sizex, sizey) 
        envMerc = LLToMerc(envLL)
        map = mapnik.Map(sizex, sizey)
        mapnik.load_map(map, mapname + ".xml")
        map.zoom_to_box(envMerc)
        mapnik.render(map, surface)
        surface.finish()
        file.close()
        # Merge with master.
        if not mergedpdf:            
            mergedpdf = PdfFileWriter()
            localpdf = PdfFileReader(open(localfilename, "rb"))
            page = localpdf.getPage(0)
            mergedpdf.addPage(page)
        else:
            localpdf = PdfFileReader(open(localfilename, "rb"))
            page.mergePage(localpdf.getPage(0))
    output = open(filename, 'wb')
    mergedpdf.write(output)
    output.close()
예제 #18
0
def split(file_name):

    input1 = PdfFileReader(file(file_name, "rb"))
    output = PdfFileWriter()
    
    numPages = input1.getNumPages()
    print "document has %s pages." % numPages
    
    for i in range(numPages):
        page1 = input1.getPage(i)
        page2 = copy.copy(page1)
        w = page1.mediaBox.getUpperRight_y()
        h = page1.mediaBox.getUpperRight_x()
        #The width and height are weird
        page1.cropBox.lowerLeft = (0, 0)
        page1.cropBox.upperRight = (h, w/2)
        
        page2.cropBox.lowerLeft = ( 0,w/2)
        page2.cropBox.upperRight = (h, w)
        
        output.addPage(page1)
        output.addPage(page2)
        
        
        
    
    
    outputStream = file("out.pdf", "wb")
    output.write(outputStream)
    outputStream.close()    
    
    print 'Finished'   
예제 #19
0
파일: main.py 프로젝트: pb-/mkbooklet
    def add_guides(self):
        pdf_in = PdfFileReader(open('sig.pdf', 'rb'))
        pdf_out = PdfFileWriter()

        for i in xrange(pdf_in.getNumPages()):
            page = pdf_in.getPage(i)
            if not i:
                guides = StringIO()

                if self.args.longarm:
                    create_pdf(
                        guides, a4lwidth_pt, a4lheight_pt, generate_longarm())
                else:
                    if self.args.a5:
                        w, h = a5width_pt, a5height_pt
                    else:
                        w, h = a4lwidth_pt, a4lheight_pt
                    create_pdf(guides, w, h, generate_shortarm(
                        self.args.a5, bool(self.args.signature)))

                pdf_guides = PdfFileReader(guides)
                page.mergePage(pdf_guides.getPage(0))
            pdf_out.addPage(page)

        pdf_out.write(open('sigs.pdf', 'wb'))
예제 #20
0
def joinpdf(folder=TMPFOLDER, startpage=INDEX, outputname='freecad.pdf'):
    "creates one pdf file from several others, following order from startpage"
    if VERBOSE: print("Building table of contents...")
    f = open(folder + os.sep + startpage + '.html')
    html = ''
    for line in f:
        html += line
    f.close()
    html = html.replace("\n", " ")
    html = html.replace("> <", "><")
    html = re.findall("<ul.*/ul>", html)[0]
    pages = re.findall('href="(.*?)"', html)
    pages.insert(1, startpage + ".html")
    result = PdfFileWriter()
    for p in pages:
        if exists(p[:-5]):
            if VERBOSE: print('Appending', p)
            try:
                inputfile = PdfFileReader(
                    file(folder + os.sep + p[:-5] + '.pdf', 'rb'))
            except:
                print('Unable to append', p)
            else:
                for i in range(inputfile.getNumPages()):
                    result.addPage(inputfile.getPage(i))
    outputfile = file(OUTPUTPATH + os.sep + outputname, 'wb')
    result.write(outputfile)
    outputfile.close()
    if VERBOSE: print('Successfully created', OUTPUTPATH, os.sep, outputname)
예제 #21
0
def pdf(coursesid,examsid):
	''' Creates a blank PDF of this exam '''
	# TODO: Obviously fix this up to generate actual PDFs; this is just a proof of concept
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter
	from pyPdf import PdfFileWriter, PdfFileReader
	from io import BytesIO

	output = BytesIO()

	p = canvas.Canvas(output, pagesize=letter)
	p.drawString(100, 100, 'Hello')
	p.save()

	output.seek(0)
	new_pdf = PdfFileReader(output)
	existing_pdf = PdfFileReader(open('/home/treece/src/web/bubbleck/res/Template.pdf', 'rb'))
	out = PdfFileWriter()
	page = existing_pdf.getPage(0)
	page.mergePage(new_pdf.getPage(0))
	out.addPage(page)
	a = BytesIO()
	pdf_out = out.write(a)

	response = make_response(pdf_out)
	response.headers['Content-Disposition'] = "filename='sakulaci.pdf"
	response.mimetype = 'application/pdf'

	return response
예제 #22
0
 def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None):
     if not context:
         context = {}
     pool = pooler.get_pool(cr.dbname)
     attach = report_xml.attachment
     if attach:
         objs = self.getObjects(cr, uid, ids, context)
         results = []
         for obj in objs:
             aname = eval(attach, {'object': obj, 'time': time})
             result = False
             if report_xml.attachment_use and aname and context.get(
                     'attachment_use', True):
                 aids = pool.get('ir.attachment').search(
                     cr, uid, [('datas_fname', '=', aname + '.pdf'),
                               ('res_model', '=', self.table),
                               ('res_id', '=', obj.id)])
                 if aids:
                     brow_rec = pool.get('ir.attachment').browse(
                         cr, uid, aids[0])
                     if not brow_rec.datas:
                         continue
                     d = base64.decodestring(brow_rec.datas)
                     results.append((d, 'pdf'))
                     continue
             result = self.create_single_pdf(cr, uid, [obj.id], data,
                                             report_xml, context)
             if not result:
                 return False
             if aname:
                 try:
                     name = aname + '.' + result[1]
                     pool.get('ir.attachment').create(
                         cr,
                         uid, {
                             'name': aname,
                             'datas': base64.encodestring(result[0]),
                             'datas_fname': name,
                             'res_model': self.table,
                             'res_id': obj.id,
                         },
                         context=context)
                 except Exception:
                     #TODO: should probably raise a proper osv_except instead, shouldn't we? see LP bug #325632
                     logging.getLogger('report').error(
                         'Could not create saved report attachment',
                         exc_info=True)
             results.append(result)
         if results:
             if results[0][1] == 'pdf':
                 from pyPdf import PdfFileWriter, PdfFileReader
                 output = PdfFileWriter()
                 for r in results:
                     reader = PdfFileReader(cStringIO.StringIO(r[0]))
                     for page in range(reader.getNumPages()):
                         output.addPage(reader.getPage(page))
                 s = cStringIO.StringIO()
                 output.write(s)
                 return s.getvalue(), results[0][1]
     return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
예제 #23
0
def select(filesandranges, outputfilename, verbose):

 	if verbose: print (str(filesandranges)+"\noutput: "+str(outputfilename))

	for i in range(len(filesandranges)):
		if not os.path.exists(filesandranges[i]['name']):
			halp()
			print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao")
			sys.exit(2) # pdf file is no pdf file...
	if os.path.exists(outputfilename):
		halp()
		print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao")
		sys.exit(2) # pdf file is no pdf file...

	output = PdfFileWriter()
 	try:
		for pdf in filesandranges:
			fiel = PdfFileReader(file(pdf["name"], "rb"))
			for pagenr in pdf["pages"]:
				if (not (pagenr > fiel.getNumPages()) and not(pagenr < 1)):
					output.addPage(fiel.getPage(pagenr-1))
				else:
					print("one or more pages are not in the chosen PDF")
					halp()
					sys.exit(3) #wrong pages or ranges
 	except:
 		halp()
 		sys.exit(2) # pdf file is no pdf file...h
	if (not os.path.exists(outputfilename)):
		outputStream = file(outputfilename, "wb")
		output.write(outputStream)
		outputStream.close()
	else:
		print ("file exists, discontinuing operation")
예제 #24
0
def editPDF(filename):
    """ function to add metadata to pdf files"""
    INPUT = filename
    OUTPUT = filename[:-4] + '_updated.pdf'

    output = PdfFileWriter()
    fin = file(INPUT, 'rb')
    pdf_in = PdfFileReader(fin)
    infoDict = output._info.getObject()

    ###########################################################
    # I've added random tags here, use what needs to be added #
    #                                                         #
    ###########################################################
    infoDict.update({
        NameObject('/Tags'):
        createStringObject(tag_dict[filename]),
        NameObject('/Keywords'):
        createStringObject(tag_dict[filename])
    })
    for page in range(pdf_in.getNumPages()):
        output.addPage(pdf_in.getPage(page))

    outputStream = file(os.path.join(directory, OUTPUT), 'wb')
    output.write(outputStream)
    fin.close()
    outputStream.close()
예제 #25
0
    def __call__(self, data, attachments=[], pages=None):
        self.rendered = {}
        for field, ctx in self.fields.items():
            if "template" not in ctx:
                continue

            self.context = ctx
            kwargs = self.template_args(data)
            template = self.context["template"]

            try:
                rendered_field = template.render(**kwargs)
            except Exception as err:
                logger.error("%s: %s %s", field, template, err)
            else:
                # Skip the field if it is already rendered by filter
                if field not in self.rendered:
                    self.rendered[field] = rendered_field

        filled = PdfFileReader(self.exec_pdftk(self.rendered))
        for pagenumber, watermark in self.watermarks:
            page = filled.getPage(pagenumber)
            page.mergePage(watermark)

        output = PdfFileWriter()
        pages = pages or xrange(filled.getNumPages())
        for p in pages:
            output.addPage(filled.getPage(p))

        for attachment in attachments:
            output.addBlankPage().mergePage(attachment.pdf())

        return output
예제 #26
0
	def addPdfOverlay(self, pdf_doc, overlay_doc, output_doc, repeatOverlay=False):
		'''
			Essentially merging two PDF documents.
			
			pdf_doc: (string)
				Path to PDF document.
			overlay_doc: (string)
				Path to PDF overlay document to overlay pdf_doc.
			repeatOverlay: (boolean)
				If set to True, page 1 of the overlay document is repeated
				for each page of the pdf_doc. (default: False)
		'''
		pdf = PdfFileReader(file(pdf_doc, "rb"))
		pdf_overlay = PdfFileReader(file(overlay_doc, "rb"))
		page_cnt = pdf.numPages
		if repeatOverlay:
			overlay_pages = [pdf_overlay.getPage(0) for n in range(page_cnt)]
		else:
			overlay_pages = pdf_overlay.pages
		outputWriter = PdfFileWriter()
		for n in range(page_cnt):
			pg = pdf.getPage(n)
			pg.mergePage(overlay_pages[n])
			outputWriter.addPage(pg)
		
		# Output
		outputStream = file(output_doc, "wb")
		outputWriter.write(outputStream)
		
		# Close streams
		outputStream.close()
		pdf.stream.close()
		pdf_overlay.stream.close()
예제 #27
0
def parse_file(pdfFile,nameFile):
  pdfReader = PdfFileReader(file(pdfFile,"rb"))
  
  # read the names and emails from csv file
  names = get_names(nameFile)
  
  # create an instance in SMTP server
  smtp = smtplib.SMTP('localhost')
  
  # loop through the pages of the pdf
  # when a name is found, write pages to a new pdf until next name is found
  # then write the file and email as attachment
  i = 0
  prevName = ""
  while i<pdfReader.getNumPages():
    page = pdfReader.getPage(i)
    pageStr = page.extractText()      # extract the pdf text
    for name in names.keys():
      if pageStr.lower().find(name.lower())!=-1:
        if 'pdfWriter' in locals():   # send the current pdf
          send_email(smtp,pdfWriter,prevName,names)

        pdfWriter = PdfFileWriter()   # create new pdfWriter file and add current page
        prevName = name               # save off previous name
        break
    if 'pdfWriter' in locals():
      pdfWriter.addPage(page)
    i+=1

  # send the last file
  if 'pdfWriter' in locals():
    send_email(smtp,pdfWriter,prevName,names)
    
  # quit the smtp server
  smtp.quit()
예제 #28
0
	def appendDocuments(self, pdf_docs, output_doc):
		'''
		Append PDF documents together.
		
		pdf_docs: (list)
			List of PDF document paths.
		
		output_doc: (string)
			Path to the outputed PDF document.
		'''
		try:
			outputWriter = PdfFileWriter()
			pdf_readers = []
			for doc in pdf_docs:
				# Need to add new PdfFileReader objects to 
				# list so stream can be closed after the loop.
				pdf_readers.append(PdfFileReader(file(doc , "rb")))
				for pg in pdf_readers[-1].pages:
					outputWriter.addPage(pg)
			# Output
			outputStream = file(output_doc, "wb")
			outputWriter.write(outputStream)
			outputStream.close()
			for pdf_reader in pdf_readers:
				pdf_reader.stream.close()
			return True
		except:
			return False
예제 #29
0
    def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}):

        packet = StringIO.StringIO()
        # create a new PDF with Reportlab
        can = canvas.Canvas(packet, pagesize=letter)
        can.setFont(font['name'], font['size'])
        for i in context:
            can.drawString(i['x'], i['y'], i['value'])
        can.save()

        # move to the beginning of the StringIO buffer
        packet.seek(0)
        new_pdf = PdfFileReader(packet)
        # read your existing PDF
        existing_pdf = PdfFileReader(file(self.path, "rb"))
        output = PdfFileWriter()
        # merge the new file with the existing
        page = existing_pdf.getPage(0)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)
        # finally, write "output" to a real file
        outputStream = file(self.destination, "wb")
        output.write(outputStream)
        outputStream.close()

        return True
예제 #30
0
	def replicatePage(self, pdf_doc, count=1, pageNumber=1):
		'''
			Replicate a page in a document, appends replicated page to
			the end of the document.
			
			pdf_doc: (string)
				Path to PDF document.
			count: (integer)
				Number of times to replicate page. (default 1)
			pageNumber: (integer)
				Page number to replicate. (default 1)
		'''
		pdf_reader = PdfFileReader(file(pdf_doc, "rb"))
		page = pdf.getPage(pageNumber-1)
		pdf_dir = os.path.dirname(pdf_doc)
		unique_filename = self.__uniqueName()
		outputWriter = PdfFileWriter()
		# Copy oringal pages to new document.
		for pg in pdf_reader.pages:
			outputWriter.addPage(pg)
		
		# Added replicated pages.
		for n in range(count):
			outputWriter.addPage(page)
			
		# Output
		temp_file = os.path.join(pdf_dir, unique_filename+".pdf")
		outputStream = file(temp_file, "wb")
		outputWriter.write(outputStream)
		outputStream.close()
		pdf_reader.stream.close()
		
		shutil.move(temp_file, pdf_doc)
예제 #31
0
 def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None):
     flag=False
     if not context:
         context={}
     pool = pooler.get_pool(cr.dbname)
     attach = report_xml.attachment
     #~
     #~ Check in the new model if this report allow to reprint,
     #~ Allowtoreprint should mandate over attach,
     if attach: 
         objs = self.getObjects(cr, uid, ids, context)
         results = []
         for obj in objs:
             aname = eval(attach, {'object':obj, 'time':time})
             result = False
             if report_xml.attachment_use and aname and context.get('attachment_use', True):
                 aids = pool.get('ir.attachment').search(cr, uid, [('datas_fname','=',aname+'.pdf'),('res_model','=',self.table),('res_id','=',obj.id)])                  
                 if aids:
                     brow_rec = pool.get('ir.attachment').browse(cr, uid, aids[0])
                     if not brow_rec.datas:
                         continue
                     d = base64.decodestring(brow_rec.datas)
                     results.append((d,'pdf'))
                     continue
             result = self.create_single_pdf(cr, uid, [obj.id], data, report_xml, context)
             if not result:
                 return False
             try:
                 if aname:
                     flag=True #ya que entra solo la primera vez sin attachment
                     name = aname+'.'+result[1]
                     pool.get('ir.attachment').create(cr, uid, {
                         'name': aname,
                         'datas': base64.encodestring(result[0]),
                         'datas_fname': name,
                         'res_model': self.table,
                         'res_id': obj.id,
                         }, context=context
                     )
                     cr.commit()
                     
             except Exception,e:
                  import traceback, sys
                  tb_s = reduce(lambda x, y: x+y, traceback.format_exception(sys.exc_type, sys.exc_value, sys.exc_traceback))
                  netsvc.Logger().notifyChannel('report', netsvc.LOG_ERROR,str(e))
             results.append(result) 
         if results:
             if results[0][1]=='pdf':
                 if not context.get('allow',False):
                     return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
                 else:
                     from pyPdf import PdfFileWriter, PdfFileReader
                     output = PdfFileWriter()
                     for r in results:
                         reader = PdfFileReader(cStringIO.StringIO(r[0]))
                         for page in range(reader.getNumPages()):
                             output.addPage(reader.getPage(page))
                     s = cStringIO.StringIO()
                     output.write(s)
                     return s.getvalue(), results[0][1]
예제 #32
0
파일: views.py 프로젝트: mi235/dideman
def showpdf(request):
    sign = os.path.join(settings.MEDIA_ROOT, "signature.png")
    mimetypes.init()
    response = None
    if 'f' in request.GET:

        fr = open(
            os.path.join(settings.MEDIA_ROOT, 'pdffiles', 'extracted',
                         '%s' % request.GET['f']), "rb")
        imgTemp = StringIO()
        imgDoc = canvas.Canvas(imgTemp)
        if request.GET['o'] == 'l':
            imgDoc.drawImage(sign, 529, 40, 290 / 2, 154 / 2)
        else:
            imgDoc.drawImage(sign, 70, 40, 290 / 2, 154 / 2)

        imgDoc.save()
        overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0)
        page = PdfFileReader(fr).getPage(0)

        page.mergePage(overlay)
        pdf_out = PdfFileWriter()
        pdf_out.addPage(page)
        response = HttpResponse(mimetype='application/pdf')
        response[
            'Content-Disposition'] = 'attachment; filename=%s' % request.GET[
                'f']

        pdf_out.write(response)

    return response
예제 #33
0
def join_pages(composites):
  # latex_buf = StringIO()
  page_fnames = []
  for page_num, collection in enumerate(collect_pages(composites)):
    fnames, transcriptions, types = [], [], []
    for r in collection:
      fnames.append(r['location'])
      transcriptions.append(r['transcription'])
      types.append(r['type'])
    page_fnames.append(paint_original_segments(fnames, transcriptions, page_num))
  #   latex_buf.write(assemble_latex(fnames, transcriptions, types))
  #   latex_buf.write(LATEX_NEWPAGE_SNIPPET)
  # raw_latex = LATEX_WRAP.format(raw_latex=latex_buf.getvalue(), font_size=LATEX_FONT_SIZE)
  # # transcribed pdf
  # latex_pdf_fname = latex_to_pdf(raw_latex)
  # ---
  # searchable pdf
  pdf_writer = PdfFileWriter()
  pdf_pages = []
  for page_fname in page_fnames:
    pdf_pages.append(open(page_fname, 'rb'))
    pdf_reader = PdfFileReader(pdf_pages[-1])
    pdf_writer.addPage(pdf_reader.getPage(0))
  searchable_pdf = NamedTemporaryFile(prefix='searchable_', suffix='.pdf', dir=path.abspath('./static/images/'), delete=False)
  pdf_writer.write(searchable_pdf)
  searchable_pdf.close()
  map(lambda f: f.close(), pdf_pages)
  json.dump({
    # 'transcribed': latex_pdf_fname,
    'searchable': searchable_pdf.name
  }, sys.stdout)
예제 #34
0
파일: views.py 프로젝트: Dpetters/Umeqo
def receipts_view(request):
    customer = request.META.get('customer', None)
    employer = request.user.recruiter.employer
        
    charges = stripe.Charge.all(count=100, customer = customer.id).data

    pdf_name = "Umeqo %s Charges.pdf" % (employer)
    path = "%semployer/receipts/" % (s.MEDIA_ROOT)
    pdf_path = "%s%s" % (path, pdf_name)
    
    output = PdfFileWriter()
    for charge in charges:
        try:
            invoice = stripe.Invoice.retrieve(charge.invoice) 
        except InvalidRequestError as e:
            pass
        receipt_path = get_or_create_receipt_pdf(charge, invoice, employer.name)
        receipt_file = open(receipt_path, "rb")
        output.addPage(PdfFileReader(receipt_file).getPage(0))
    if not os.path.exists(path):
        os.makedirs(path)
    outputStream = file(pdf_path, "wb")
    output.write(outputStream)
    outputStream.close()
    receipt_file.close()
    
    mimetype = "application/pdf"
    response = HttpResponse(file(pdf_path, "rb").read(), mimetype=mimetype)
    response["Content-Disposition"] = 'inline; filename="%s"' % pdf_name
    return response
예제 #35
0
파일: handler.py 프로젝트: Nexedi/cloudooo
  def setMetadata(self, metadata):
    """Returns a document with new metadata.
    Keyword arguments:
    metadata -- expected an dictionary with metadata.
    """
    # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate
    input_pdf = PdfFileReader(open(self.document.getUrl(), "rb"))
    output_pdf = PdfFileWriter()

    modification_date = metadata.pop("ModificationDate", None)
    if modification_date:
      metadata['ModDate'] = modification_date
    if type(metadata.get('Keywords', None)) is list:
      metadata['Keywords'] = metadata['Keywords'].join(' ')
    args = {}
    for key, value in list(metadata.items()):
      args[NameObject('/' + key.capitalize())] = createStringObject(value)

    output_pdf._info.getObject().update(args)

    for page_num in range(input_pdf.getNumPages()):
      output_pdf.addPage(input_pdf.getPage(page_num))

    output_stream = io.BytesIO()
    output_pdf.write(output_stream)
    return output_stream.getvalue()
예제 #36
0
def split_file(f, filename):
    """Split our file into 10-page sub-files and add those to the queue
    in order.
    """
    global file_queue
    curr_page = 0
    pages_left = f.getNumPages()
    log('Splitting file ' + filename + " with " + str(pages_left) + " pages.")
    while pages_left > 0:
        # Create the new file
        pages_processed = 0
        fname = filename[:-4] + '_' + str(curr_page) + '.pdf'
        output = PdfFileWriter()
        # Get 10 pages for it
        for i in range(curr_page, 10 + curr_page):
            if pages_processed >= pages_left:
                break
            pages_processed += 1
            output.addPage(f.getPage(i))
        # Write and save file
        fout = file(fname, 'wb')
        output.write(fout)
        fout.flush()
        fout.close()
        file_queue.append(fname)
        curr_page += pages_processed
        pages_left -= pages_processed
    # Delete the file now that it's in pieces'
    os.remove(filename)
예제 #37
0
def generate(donor):
    os.system('mkdir -p output')
    donor_url = donor.replace(' ','%20')
    page1 = 'output/%s1' % (donor.replace(' ','-').lower())
    page2 = 'output/%s2' % (donor.replace(' ','-').lower())

    combined = 'output/%s.pdf' % (donor.replace(' ','-').lower())
    if os.path.exists(combined): return

    os.system('cp "%s" "%s.svg"' % (page1_svg, page1))
    os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1))
    os.system('inkscape  --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1))
    os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1))
    os.system('cp "%s" "%s.svg"' % (page2_svg, page2))
    os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2))
    os.system('inkscape  --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2))
    os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2))
    # Merge pages
    input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb'))
    input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb'))
    output = PdfFileWriter()
    output.addPage(input1.getPage(0))
    output.addPage(input2.getPage(0))
    outputStream = file(combined, 'wb')
    output.write(outputStream)
    outputStream.close()
    sleep(2)
예제 #38
0
    def build_output_pdf(self, output_stream):

        # Move to the beginning of the StringIO buffer
        # And initialize a PDF file reader to read that in
        # as source asset PDF to be merged into the original 
        self._modification_stream.seek(0)
        source_pdf = PdfFileReader(self._modification_stream)

        # Now read in the destination/original PDF as the merge target
        self._original_pdf_stream.seek(0)
        original_pdf = self._get_pdf_reader(self._original_pdf_stream)

        # Now create the output PDF as the merge result holder
        output_pdf = PdfFileWriter()

        # Enumerate through the list of pages from the original PDF
        #  * Merge the specified page, and
        #  * For other pages, simply add them as is to the new PDF
        for page_index in range(0, original_pdf.numPages):    
            page = original_pdf.getPage(page_index)
            if (page_index < source_pdf.numPages):
                page.mergePage(source_pdf.getPage(page_index))
            output_pdf.addPage(page)

        # If the modification doc has more pages than the original
        # also just append them to the resultant document
        for page_index in range(0, source_pdf.numPages):
            if (page_index >= original_pdf.numPages):
                page = source_pdf.getPage(page_index)
                output_pdf.addPage(page)

        # Finally, write the result PDF to the given output stream
        output_pdf.write(output_stream)
예제 #39
0
 def output(self):
     # get the output filename using the file dialog
     (out_filename, filter) = \
         QFileDialog.getSaveFileName(parent = self, 
                                     caption = self.tr(u'Export'),
                                     dir = '',
                                     filter = self.tr('pdf (*.pdf)'))
                                     
     # file IO
     out_file = open(out_filename, 'wb')
     in_file = open(self.in_filename, 'rb')        
     in_reader = PdfFileReader(in_file)
     out_writer = PdfFileWriter()
     
     # extract input
     pages_string = self.pages_line_edit.text()
     
     # Get the indices of pages  to extract
     pages = pages_parser(in_reader.getNumPages()).parse(pages_string)
     
     # append pages to output writer
     for page_index in pages:
         out_writer.addPage(in_reader.getPage(page_index))
         
     # write to file
     out_writer.write(out_file)
     
     # close files
     in_file.close()
     out_file.close()
예제 #40
0
파일: toposm.py 프로젝트: rpcarver/TopOSM
def renderToPdf(envLL, filename, sizex, sizey):
    """Renders the specified Box2d and zoom level as a PDF"""
    basefilename = os.path.splitext(filename)[0]
    mergedpdf = None
    for mapname in MAPNIK_LAYERS:
        print 'Rendering', mapname
        # Render layer PDF.
        localfilename = basefilename + '_' + mapname + '.pdf'
        file = open(localfilename, 'wb')
        surface = cairo.PDFSurface(file.name, sizex, sizey)
        envMerc = LLToMerc(envLL)
        map = mapnik.Map(sizex, sizey)
        mapnik.load_map(map, mapname + ".xml")
        map.zoom_to_box(envMerc)
        mapnik.render(map, surface)
        surface.finish()
        file.close()
        # Merge with master.
        if not mergedpdf:
            mergedpdf = PdfFileWriter()
            localpdf = PdfFileReader(open(localfilename, "rb"))
            page = localpdf.getPage(0)
            mergedpdf.addPage(page)
        else:
            localpdf = PdfFileReader(open(localfilename, "rb"))
            page.mergePage(localpdf.getPage(0))
    output = open(filename, 'wb')
    mergedpdf.write(output)
    output.close()
예제 #41
0
파일: erml2pdf.py 프로젝트: KDE/kraft
    def watermark( self, pdfStr, watermarkFile, spec ):
        # Read the watermark- and document pdf file
        inputWatermark = PdfFileReader( file( watermarkFile, "rb" ) )
        generatedPdf = PdfFileReader( pdfStr )
        outputPdf = PdfFileWriter()
        
        # flag for the first page of the source file
     	firstPage = True
     	
     	# Loop over source document pages and merge with the first page of the watermark
     	# file.
     	watermarkPage = inputWatermark.getPage(0)
     	for page in generatedPdf.pages:
	    if (spec == Mark.FIRST_PAGE and firstPage) or spec == Mark.ALL_PAGES:
		# deep copy the watermark page here, otherwise the watermark page
		# gets merged over and over because p would only be a reference
		p = copy.copy( watermarkPage )
		p.mergePage( page )
		outputPdf.addPage( p )
		firstPage = False
	    else:
                outputPdf.addPage(page)
     	
     	if self.outputFile:
     	    # Write to outputfile
     	    outputStream = file( self.outputFile, "wb" )
     	    outputPdf.write( outputStream )
     	    outputStream.close()
     	    return self.outputFile
     	else: 
     	    stringIO = StringIO.StringIO();
     	    outputPdf.write( stringIO )
     	    return stringIO.getvalue()
예제 #42
0
def scalePDF(inputFile,pageNumber,zoomFactor):
    #print "entered scalepdf"
    #print "SCALING PDF TO INCREASE IMAGE QUALITY FOR TESSERACT"
    #print "---------------------------------------------------"
    #Proper indexing
    pageNumber=pageNumber-1
    
    #Generate output filename (Puts everything in its own directory)
    outputDirectory=inputFile[:inputFile.rindex('.')]#+inputFile[inputFile.rindex('/'):inputFile.rindex('.')]+inputFile[inputFile.rindex('/'):inputFile.rindex('.')]
    #print outputDirectory
    if not os.path.exists(outputDirectory): os.makedirs(outputDirectory)

    outputFile=inputFile[:inputFile.rindex('/')]+inputFile[inputFile.rindex('/'):inputFile.rindex('.')]+inputFile[inputFile.rindex('/'):inputFile.rindex('.')]+'_'+str(pageNumber+1)+'.pdf'
    #outputFile=inputFile[:inputFile.rindex('.')]+'_'+str(pageNumber+1)+'.pdf'
    output=PdfFileWriter()
    input1=PdfFileReader(file(inputFile,"rb"))
    page = input1.getPage(pageNumber)
    # I ran into some trouble with scaling a certain page, I
    # still can't figure out what it was. So I use try here.
    try:
        page.scaleBy(zoomFactor)
    except:
        print "---PAGE WAS NOT SCALED: "+str(pageNumber+1)
        #print "---------------------------------------------------"
    #Add page to output
    output.addPage(page)
    #Print just the file name
    #print "SAVING   SCALED    PDF   AS: "+outputFile[outputFile.rindex('/')+1:]
    #print "---------------------------------------------------"
    outputStream = file(outputFile, "wb")
    output.write(outputStream)
    outputStream.close()
    return outputFile
예제 #43
0
def main():
    """
    """

    # Parse command line
    pdf_files = sys.argv[1:]
    if len(pdf_files) == 0:
        print __usage__
        sys.exit()

    # Make sure there is more than one pdf file
    if len(pdf_files) == 1:
        print "In the spirit of gnu tar, this script cowardly refuses to"
        print "combine one pdf file!"
        sys.exit()

    # Create unique name for output file
    localtime = time.localtime()
    localtime = [str(x) for x in localtime]
    localtime = [x.zfill(2) for x in localtime]
    localtime[0] = localtime[0].zfill(4)
    output_file = "%s-%s-%s_%s-%s-%s.pdf" % tuple(localtime[:6])

    # Combine pdf files in order 
    output = PdfFileWriter()
    for pdf in pdf_files:
        input = PdfFileReader(file(pdf,"rb"))
        num_pages = input.getNumPages()
        for i in range(num_pages):
            output.addPage(input.getPage(i))

    # Write final pdf  
    stream = file(output_file,"wb")
    output.write(stream) 
    stream.close()
    def pdf_watermark_fast_first_page(self, pathname, Wm_f, wt1='', **kwargs):
        try:
            url_watermark = kwargs['url_wtm']
        except:
            pass
        from pyPdf import PdfFileWriter, PdfFileReader
        import StringIO
        from reportlab.pdfgen import canvas
        from reportlab.lib.pagesizes import letter

        packet = StringIO.StringIO()
        # create a new PDF with Reportlab
        can = canvas.Canvas(packet, pagesize=letter)
        can.drawString(10, 100, url_watermark)
        can.save()

        #move to the beginning of the StringIO buffer
        packet.seek(0)
        new_pdf = PdfFileReader(packet)
        # read your existing PDF
        existing_pdf = PdfFileReader(file(pathname, "rb"))
        output = PdfFileWriter()
        # add the "watermark" (which is the new pdf) on the existing page
        page = existing_pdf.getPage(0)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)
        # finally, write "output" to a real file
        outputStream = file(Wm_f, "wb")
        # import sys;sys.setrecursionlimit(11500)
        output.write(outputStream)
        outputStream.close()
        return Wm_f
예제 #45
0
def split(files, verbose):

	for infilename in files:
		if not os.path.exists(infilename):
			halp()
			print ("error: "+infilename+" does not exist... exiting nao")
			sys.exit(2) # pdf file is no pdf file...
	inputs = []
	try:
		for i in files:
			inputs.append(PdfFileReader(file(i, "rb")))
	except:
		halp()
		print ("there has been an error of unfortunate proportions")
		sys.exit(2) # pdf file is no pdf file...
	i=0
	j=0
	for pdf in inputs:
		for pagenr in range(pdf.getNumPages()):
			output = PdfFileWriter()
			output.addPage(pdf.getPage(pagenr))
			(name, ext) = splitext(files[i])
			my_str = "%0" + str(math.ceil(math.log10(pdf.getNumPages()))) + "d"
			my_str = my_str % (pagenr+1)
			print (name+"p"+my_str+ext)
			outputStream = file(name+"p"+my_str+ext, "wb")
			output.write(outputStream)
			outputStream.close()
			j=j+1
		i=i+1
	if verbose: print (str(j)+" pages in "+str(i)+" files processed")
예제 #46
0
def write_pdf(pdf, part_count_ye, part_count_ye_end, fen, output_file):
  out = PdfFileWriter()
  for pp in range(part_count_ye, part_count_ye_end):
    out.addPage(pdf.getPage(pp))
  ous = file(output_file+'_'+str(fen+1)+'.pdf', 'wb')
  out.write(ous)
  ous.close()
예제 #47
0
def delete(filesandranges, outputfilename, verbose):

	for i in range(len(filesandranges)):
		if not os.path.exists(filesandranges[i]['name']):
			halp()
			print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao")
			sys.exit(2) # pdf file is no pdf file...
	if os.path.exists(outputfilename):
		halp()
		print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao")
		sys.exit(2) # pdf file is no pdf file...

	output = PdfFileWriter()
 	try:
		for pdf in filesandranges:
			print (pdf["name"])
			fiel = PdfFileReader(file(pdf["name"], "rb"))

			for pagenr in range(1,fiel.getNumPages()+1):
				if (pagenr not in pdf["pages"]):
					output.addPage(fiel.getPage(pagenr-1))
#				else:
#					print ("skipping page nr: "+str(pagenr))
	except:
 		halp()
 		sys.exit(2) # pdf file is no pdf file...
	if (not os.path.exists(outputfilename)):
		outputStream = file(outputfilename, "wb")
		output.write(outputStream)
		outputStream.close()
	else:
		print ("file exists, discontinuing operation")
예제 #48
0
def save_ready_template(request, id):
    person_print = FIO.objects.get(id=id)
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=letter)
    can.drawString(284, 579, "{} {}".format(person_print.name,
                                            person_print.surname))
    can.showPage()
    can.drawString(260, 494, "{} {}".format(person_print.name,
                                            person_print.surname))
    can.showPage()
    can.save()
    # move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    # read your existing PDF
    existing_pdf = PdfFileReader(
        file("/Users/danilakimov/Desktop/template1.pdf", "rb"))
    output = PdfFileWriter()
    # add the "watermark" (which is the new pdf) on the existing page
    page = existing_pdf.getPage(0)
    page.mergePage(new_pdf.getPage(0))
    output.addPage(page)
    page = existing_pdf.getPage(1)
    page.mergePage(new_pdf.getPage(1))
    output.addPage(page)
    # finally, write "output" to a real file
    outputStream = file("/Users/danilakimov/Desktop/readytemplate.pdf", "wb")
    output.write(outputStream)
    outputStream.close()
    return render(request, 'template_page.html',
                  {'person_template': person_print})
예제 #49
0
def split_pset():
    if (not options.pset or not options.probs):
        print_err_and_die("You must enter both arguements! run with -h for help")

    path = "pset%s/latex/"%options.pset
    try:
        filename = "%spset%s_answers.pdf"%(path, options.pset)
        inp = PdfFileReader(file(filename, "rb"))
    except IOError:
        print_err_and_die("Error! File, %s was not found." % filename)
    
    ##loop over user input and break up pdf
    questionNum = 1
    probs = options.probs.split(",")
    for prob in probs:
        print "Processing question", questionNum

        prob = prob.strip() #kill whitespace

        out = PdfFileWriter()
        pages = get_pages(prob, inp.getNumPages())

        for page in pages:
            print "page num", str(page)
            out.addPage(inp.getPage(int(page)-1))

        outStream = file("%spset%s-%s_answer.pdf"%(path, options.pset, questionNum), "wb")
        out.write(outStream)
        outStream.close()
        questionNum +=1

    print "Done!"
예제 #50
0
    def _merge_pdf(self, documents):
        """Merge PDF files into one.

        :param documents: list of path of pdf files
        :returns: path of the merged pdf
        """
        writer = PdfFileWriter()
        streams = [
        ]  # We have to close the streams *after* PdfFilWriter's call to write()
        for document in documents:
            pdfreport = file(document, 'rb')
            streams.append(pdfreport)
            reader = PdfFileReader(pdfreport)
            for page in range(0, reader.getNumPages()):
                writer.addPage(reader.getPage(page))

        merged_file_fd, merged_file_path = tempfile.mkstemp(
            suffix='.html', prefix='report.merged.tmp.')
        with closing(os.fdopen(merged_file_fd, 'w')) as merged_file:
            writer.write(merged_file)

        for stream in streams:
            stream.close()

        return merged_file_path
예제 #51
0
class cleanpdf:
	
	def __init__(self,pathFile):
		
		self.pathFile = pathFile
		self.inputFile = file(self.pathFile,"rb")
		self.pdfInput = PdfFileReader(self.inputFile)
		self.pyPdfOutput = PdfFileWriter()
		self.dataToUpdate = self.pyPdfOutput._info.getObject()
		self.__modifyData()
		self.__copyPDF()
	
	def __modifyData(self):
		
		for data in self.dataToUpdate:
			self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii'))
	
	def __copyPDF(self):
		
		for page in range(0,self.pdfInput.getNumPages()):
			self.pyPdfOutput.addPage(self.pdfInput.getPage(page))
		outputFile = file(self.__changeName(),"wb")
		self.pyPdfOutput.write(outputFile)
	
	def __changeName(self):
		
		newName = self.pathFile[0:self.pathFile.rfind(".")]+"5.pdf"
		return newName
예제 #52
0
def splitXPDF(pdfFileName):
    try:
        inputpdf = PdfFileReader(open(pdfFileName, "rb"))

        print '[+] Total Page : ' + str(inputpdf.getNumPages())

        setpath = pdfFileName[pdfFileName.find('\\') + 1:pdfFileName.find('.')]
        lstName = []
        with open("nameFile.base", "r") as nameFile:
            lstName = nameFile.read().split('\n')

        if (inputpdf.getNumPages() == len(lstName)):
            for i in xrange(inputpdf.numPages):
                output = PdfFileWriter()
                output.addPage(inputpdf.getPage(i))
                if (os.path.isdir('resault') != True):
                    os.mkdir('resault')
                if (os.path.isdir('resault\\' + setpath) != True):
                    os.mkdir('resault\\' + setpath)

                with open('resault\\' + setpath + '\\' + lstName[i] + '.pdf',
                          'wb') as outputStream:
                    output.write(outputStream)
                print '[+] Generate Page ' + str(
                    i + 1) + ' with File : ' + lstName[i] + '.pdf'

        else:
            print '[-] Number of Name in \'nameFile.base\' is not match with Number Page in PDF.'
    except IOError:
        print '[-] Cannot Openfile.'
예제 #53
0
    def save(self, to):
        origin = self.get_origin()
        
        if not origin:
            raise RuntimeError("Please implement get_origin method or origin attribute")

        try:
            existing_pdf = PdfFileReader(file(origin, "rb"))
        except IOError:
            raise RuntimeError(u"Failed to open origin file")

        output = PdfFileWriter()
                
        for page_id, page_class in enumerate(self.pages):
            new_page = page_class(self.instance).save()
            
            base_page = existing_pdf.getPage(0)
            base_page.mergePage(new_page)
            output.addPage(base_page)

        if isinstance(to, basestring):
            outputStream = file(to, "wb")
        else:
            outputStream = to
        
        output.write(outputStream)
        outputStream.close()
def make_pdf_2(dni=''):
	packet = StringIO.StringIO()

	can = canvas.Canvas(packet, pagesize=letter)
	can.drawString(257, 568, '28 de Julio de 2018')
	
	can.save()

	#move to the beginning of the StringIO buffer
	packet.seek(0)
	new_pdf = PdfFileReader(packet)
	
	# read your existing PDF
	existing_pdf = PdfFileReader(file("contracte_voluntariat.pdf", "rb"))
	output = PdfFileWriter()
	
	# add the "watermark" (which is the new pdf) on the existing page
	#print 'hola '+str(existing_pdf.getNumPages())

	page = existing_pdf.getPage(1)

	page.mergePage(new_pdf.getPage(0))
	output.addPage(page)
	filename = str(dni)+'file'+str(1)+'.pdf'
	
	# finally, write "output" to a real file
	outputStream = file(filename, "wb")
	output.write(outputStream)
	outputStream.close()



	return filename	
    def add_omr_marks(self, pdf_data, is_latest_document):
        # Documentation
        # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf
        # https://pythonhosted.org/PyPDF2/PdfFileReader.html
        # https://stackoverflow.com/a/17538003
        # https://gist.github.com/kzim44/5023021
        # https://www.blog.pythonlibrary.org/2013/07/16/
        #   pypdf-how-to-write-a-pdf-to-memory/
        self.ensure_one()

        pdf_buffer = StringIO.StringIO()
        pdf_buffer.write(pdf_data)

        existing_pdf = PdfFileReader(pdf_buffer)
        output = PdfFileWriter()
        total_pages = existing_pdf.getNumPages()

        # print latest omr mark on latest pair page (recto)
        latest_omr_page = total_pages // 2

        for page_number in range(total_pages):
            page = existing_pdf.getPage(page_number)
            # only print omr marks on pair pages (recto)
            if page_number % 2 is 0:
                is_latest_page = is_latest_document and \
                    page_number == latest_omr_page
                marks = self._compute_marks(is_latest_page)
                omr_layer = self._build_omr_layer(marks)
                page.mergePage(omr_layer)
            output.addPage(page)

        out_buffer = StringIO.StringIO()
        output.write(out_buffer)

        return out_buffer.getvalue()
예제 #56
0
def print_danfe(xml_nfe):
    from pysped.nfe.leiaute import ProcNFe_310
    from pysped.nfe.danfe import DANFE
    procnfe = ProcNFe_310()

    paths = []
    procnfe.xml = xml_nfe
    danfe = DANFE()
    danfe.NFe = procnfe.NFe
    danfe.protNFe = procnfe.protNFe
    danfe.caminho = "/tmp/"
    danfe.gerar_danfe()
    paths.append(danfe.caminho + danfe.NFe.chave + '.pdf')

    output = PdfFileWriter()
    s = StringIO()

    for path in paths:
        pdf = PdfFileReader(file(path, "rb"))
        for i in range(pdf.getNumPages()):
            output.addPage(pdf.getPage(i))
        output.write(s)

    str_pdf = s.getvalue()
    s.close()
    return str_pdf
예제 #57
0
def createPDFHttpResponse(filepath, output_filename, user, access_time):
    """
    Creates a HttpResponse from a watermarked PDF file. Watermark contains the user who accessed the document
    and the time of access.

    :param filepath: Path to the file
    :param output_filename: File name sent to the user
    :param user:
    :param access_time:
    :return: HttpResponse with the file content, or HttpResponseNotFound
    
    """
    #Add access watermark
    buffer = StringIO()
    p = canvas.Canvas(buffer)
    p.drawString(0,0, "Downloaded by %s at %s" %(user, access_time.isoformat(' ')))
    p.showPage()
    p.save()
    buffer.seek(0)
    watermark = PdfFileReader(buffer)

    #Read the PDF to be accessed
    attachment = PdfFileReader(open(filepath, 'rb'))
    output = PdfFileWriter()

    #Attach watermark to each page
    for page in attachment.pages:
        page.mergePage(watermark.getPage(0))
        output.addPage(page)

    response = HttpResponse(mimetype='application/pdf')
    response['Content-Disposition'] = 'inline; filename=%s' % output_filename.encode('utf-8')
    output.write(response)
    return response
예제 #58
0
def duplicated_pdf(stream):
    """Creates a duplicated pdf, from html stream (A.K.A. StringIO)"""

    o_text = "<center><h3>-- Original --</h3></center>"
    c_text = "<center><h3>-- Duplicado --</h3></center>"
    pdf_conv = html_to_pdf.HTMLToPDFConverter()

    original = PdfFileReader(StringIO(pdf_conv.convert(stream, o_text, o_text)))

    stream.seek(0)
    copy = PdfFileReader(StringIO(pdf_conv.convert(stream, c_text, c_text)))

    out = PdfFileWriter()
    for n in xrange(0, original.getNumPages()):
        out.addPage(original.getPage(n))

    for n in xrange(0, copy.getNumPages()):
        out.addPage(copy.getPage(n))

    encoded_pdf = StringIO()
    out.write(encoded_pdf)

    encoded_pdf.seek(0)
    encoded_pdf = encoded_pdf.read()

    return encoded_pdf
예제 #59
0
def convert(filename):
    inp = PdfFileReader(open(filename, 'rb'))
    outp = PdfFileWriter()

    for page in inp.pages:
        page1 = copy.copy(page)
        page2 = copy.copy(page)

        UL = page.mediaBox.upperLeft
        UR = page.mediaBox.upperRight
        LL = page.mediaBox.lowerLeft
        LR = page.mediaBox.lowerRight

        # left column
        page1.mediaBox.upperLeft = (UL[0], UL[1])
        page1.mediaBox.upperRight = (UR[0]/2, UR[1])
        page1.mediaBox.lowerLeft = (LL[0], LL[1])
        page1.mediaBox.lowerRight = (LR[0]/2, LR[1])
        outp.addPage(page1)

        # right column
        page2.mediaBox.upperLeft = (UR[0]/2, UL[1])
        page2.mediaBox.upperRight = (UR[0], UR[1])
        page2.mediaBox.lowerLeft = (LR[0]/2, LR[1])
        page2.mediaBox.lowerRight = (LR[0], LR[1])
        outp.addPage(page2)

    outp.write(open(filename+'.2', 'wb'))
예제 #60
0
def split_chapters(*t_args):
    """
    Split a large pdf into chunks (i.e. chapters)
    """
    if len(t_args) > 0:
        args = t_args[0]
        if len(args) < 1:
            print "usage: utils_pdf split_chapters configfile"
            return
        from pyPdf import PdfFileWriter, PdfFileReader
        f = open(args[0])
        P = json.loads(f.read())
        f.close()
        input = PdfFileReader(file(P["source"], "rb"))
        i0 = P["first_chapter_index"]
        ends = P["chapters_ends"]
        for i in xrange(0, len(ends)):
            ch_num = i0 + i
            fmt = P["chapter_fmt"] % (ch_num, )
            output = PdfFileWriter()
            if not os.path.exists(P["outputdir"]):
                os.mkdir(P["outputdir"])
            fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt)
            j0 = P["firstpage"] if i == 0 else ends[i - 1]
            for j in xrange(j0, ends[i]):
                output.addPage(input.getPage(j))
            outputStream = file(fn_out, "wb")
            output.write(outputStream)
            outputStream.close()
            print "wrote %s" % (fn_out, )