Example #1
0
    def __call__(self, data, attachments=[], pages=None):
        self.rendered = {}
        for field, ctx in self.fields.items():
            if "template" not in ctx:
                continue

            self.context = ctx
            kwargs = self.template_args(data)
            template = self.context["template"]

            try:
                rendered_field = template.render(**kwargs)
            except Exception as err:
                logger.error("%s: %s %s", field, template, err)
            else:
                # Skip the field if it is already rendered by filter
                if field not in self.rendered:
                    self.rendered[field] = rendered_field

        filled = PdfFileReader(self.exec_pdftk(self.rendered))
        for pagenumber, watermark in self.watermarks:
            page = filled.getPage(pagenumber)
            page.mergePage(watermark)

        output = PdfFileWriter()
        pages = pages or xrange(filled.getNumPages())
        for p in pages:
            output.addPage(filled.getPage(p))

        for attachment in attachments:
            output.addBlankPage().mergePage(attachment.pdf())

        return output
Example #2
0
    def test_two_on_one_page(self):
        # Build a document with two pages
        pdf = PdfFileReader(self.get_pdf_stream())
        output = PdfFileWriter()
        output.addPage(pdf.getPage(0))
        output.addPage(pdf.getPage(0))
        assert output.getNumPages() == 2
        assert output.getPage(0).extractText().count('Test') ==  1
        buf = StringIO()
        output.write(buf)
        buf.seek(0)

        rv = self.app.get('/')
        self.assertEquals(rv.status_code, 200)

        rv = self.app.post('/handleform',
                           data={'file': (buf, 'test.pdf')})

        rv = self.combine_and_download(pages_sheet='2')

        pdf_download = PdfFileReader(StringIO(rv.data))
        self.assertEquals(pdf_download.getPage(0).extractText().count('Test'),
                          2)
        self.assertEquals(pdf_download.getNumPages(), 1)

        self.clean_up()
Example #3
0
def renderToPdf(envLL, filename, sizex, sizey):
    """Renders the specified Box2d and zoom level as a PDF"""
    basefilename = os.path.splitext(filename)[0]
    mergedpdf = None
    for mapname in MAPNIK_LAYERS:
        print 'Rendering', mapname
        # Render layer PDF.
        localfilename = basefilename + '_' + mapname + '.pdf';
        file = open(localfilename, 'wb')
        surface = cairo.PDFSurface(file.name, sizex, sizey) 
        envMerc = LLToMerc(envLL)
        map = mapnik.Map(sizex, sizey)
        mapnik.load_map(map, mapname + ".xml")
        map.zoom_to_box(envMerc)
        mapnik.render(map, surface)
        surface.finish()
        file.close()
        # Merge with master.
        if not mergedpdf:            
            mergedpdf = PdfFileWriter()
            localpdf = PdfFileReader(open(localfilename, "rb"))
            page = localpdf.getPage(0)
            mergedpdf.addPage(page)
        else:
            localpdf = PdfFileReader(open(localfilename, "rb"))
            page.mergePage(localpdf.getPage(0))
    output = open(filename, 'wb')
    mergedpdf.write(output)
    output.close()
Example #4
0
    def test_concat_pdf_files( self ):
        try:
            os.unlink( r"docs/c.pdf" )
        except:
            pass
        self.assertTrue( True )
        input_a = PdfFileReader( file( r"docs/a.pdf", 'rb' ) )
        input_b = PdfFileReader( file( r"docs/b.pdf", 'rb' ) )

        output = PdfFileWriter()

        for x in range( 0, input_a.getNumPages() ):
            output.addPage( input_a.getPage( x ) )
        for x in range( 0, input_b.getNumPages() ):
            output.addPage( input_b.getPage( x ) )

        outputStream = file( r"docs/c.pdf", 'wb' )
        output.write( outputStream )
        outputStream.close()
        
        count = input_a.getNumPages() + input_b.getNumPages()
        
        check = PdfFileReader( file( r"docs/c.pdf", 'rb' ) )
        self.assertEqual( count, check.getNumPages() )
        os.unlink( r"docs/c.pdf" )
Example #5
0
	def addPdfOverlay(self, pdf_doc, overlay_doc, output_doc, repeatOverlay=False):
		'''
			Essentially merging two PDF documents.
			
			pdf_doc: (string)
				Path to PDF document.
			overlay_doc: (string)
				Path to PDF overlay document to overlay pdf_doc.
			repeatOverlay: (boolean)
				If set to True, page 1 of the overlay document is repeated
				for each page of the pdf_doc. (default: False)
		'''
		pdf = PdfFileReader(file(pdf_doc, "rb"))
		pdf_overlay = PdfFileReader(file(overlay_doc, "rb"))
		page_cnt = pdf.numPages
		if repeatOverlay:
			overlay_pages = [pdf_overlay.getPage(0) for n in range(page_cnt)]
		else:
			overlay_pages = pdf_overlay.pages
		outputWriter = PdfFileWriter()
		for n in range(page_cnt):
			pg = pdf.getPage(n)
			pg.mergePage(overlay_pages[n])
			outputWriter.addPage(pg)
		
		# Output
		outputStream = file(output_doc, "wb")
		outputWriter.write(outputStream)
		
		# Close streams
		outputStream.close()
		pdf.stream.close()
		pdf_overlay.stream.close()
Example #6
0
def save_ready_template(request, id):
    person_print = FIO.objects.get(id=id)
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=letter)
    can.drawString(284, 579, "{} {}".format(person_print.name, person_print.surname))
    can.showPage()
    can.drawString(260, 494, "{} {}".format(person_print.name, person_print.surname))
    can.showPage()
    can.save()
    # move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    # read your existing PDF
    existing_pdf = PdfFileReader(file("/Users/danilakimov/Desktop/template1.pdf", "rb"))
    output = PdfFileWriter()
    # add the "watermark" (which is the new pdf) on the existing page
    page = existing_pdf.getPage(0)
    page.mergePage(new_pdf.getPage(0))
    output.addPage(page)
    page = existing_pdf.getPage(1)
    page.mergePage(new_pdf.getPage(1))
    output.addPage(page)
    # finally, write "output" to a real file
    outputStream = file("/Users/danilakimov/Desktop/readytemplate.pdf", "wb")
    output.write(outputStream)
    outputStream.close()
    return render(request, 'template_page.html', {'person_template': person_print})
Example #7
0
def merge(fppath, bppath, outputpath, no_delete, fed_backwards):
  fpfile = PdfFileReader(open(fppath))
  bpfile = PdfFileReader(open(bppath))

  outputfile = PdfFileWriter()

  outputpages = []
  for i in range(fpfile.getNumPages()):
    backpages = True
    try:
      outputpages.append(fpfile.getPage(i))
      if backpages:
        if fed_backwards:
          outputpages.append(bpfile.getPage(bpfile.getNumPages() - i - 1))
        else:
          outputpages.append(bpfile.getPage(i))
    except IndexError:
      backpages = False

  if not no_delete:
    outputpages = [page for page in outputpages if page.extractText() != '']

  [outputfile.addPage(page) for page in outputpages]

  outputfile.write(open(os.path.expanduser(outputpath), 'w'))
Example #8
0
    def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}):

        packet = StringIO.StringIO()
        # create a new PDF with Reportlab
        can = canvas.Canvas(packet, pagesize=letter)
        can.setFont(font['name'], font['size'])
        for i in context:
            can.drawString(i['x'], i['y'], i['value'])
        can.save()

        # move to the beginning of the StringIO buffer
        packet.seek(0)
        new_pdf = PdfFileReader(packet)
        # read your existing PDF
        existing_pdf = PdfFileReader(file(self.path, "rb"))
        output = PdfFileWriter()
        # merge the new file with the existing
        page = existing_pdf.getPage(0)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)
        # finally, write "output" to a real file
        outputStream = file(self.destination, "wb")
        output.write(outputStream)
        outputStream.close()

        return True
    def pdf_watermark_fast_first_page(self, pathname, Wm_f, wt1='',**kwargs):
        try :
            url_watermark=kwargs['url_wtm']
        except:pass
        from pyPdf import PdfFileWriter, PdfFileReader
        import StringIO
        from reportlab.pdfgen import canvas
        from reportlab.lib.pagesizes import letter

        packet = StringIO.StringIO()
        # create a new PDF with Reportlab
        can = canvas.Canvas(packet, pagesize=letter)
        can.drawString(10, 100, url_watermark)
        can.save()

        #move to the beginning of the StringIO buffer
        packet.seek(0)
        new_pdf = PdfFileReader(packet)
        # read your existing PDF
        existing_pdf = PdfFileReader(file(pathname, "rb"))
        output = PdfFileWriter()
        # add the "watermark" (which is the new pdf) on the existing page
        page = existing_pdf.getPage(0)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)
        # finally, write "output" to a real file
        outputStream = file(Wm_f, "wb")
        # import sys;sys.setrecursionlimit(11500)
        output.write(outputStream)
        outputStream.close()
        return Wm_f
Example #10
0
    def add_guides(self):
        pdf_in = PdfFileReader(open('sig.pdf', 'rb'))
        pdf_out = PdfFileWriter()

        for i in xrange(pdf_in.getNumPages()):
            page = pdf_in.getPage(i)
            if not i:
                guides = StringIO()

                if self.args.longarm:
                    create_pdf(
                        guides, a4lwidth_pt, a4lheight_pt, generate_longarm())
                else:
                    if self.args.a5:
                        w, h = a5width_pt, a5height_pt
                    else:
                        w, h = a4lwidth_pt, a4lheight_pt
                    create_pdf(guides, w, h, generate_shortarm(
                        self.args.a5, bool(self.args.signature)))

                pdf_guides = PdfFileReader(guides)
                page.mergePage(pdf_guides.getPage(0))
            pdf_out.addPage(page)

        pdf_out.write(open('sigs.pdf', 'wb'))
Example #11
0
def duplicated_pdf(stream):
    """Creates a duplicated pdf, from html stream (A.K.A. StringIO)"""

    o_text = "<center><h3>-- Original --</h3></center>"
    c_text = "<center><h3>-- Duplicado --</h3></center>"
    pdf_conv = html_to_pdf.HTMLToPDFConverter()

    original = PdfFileReader(StringIO(pdf_conv.convert(stream, o_text, o_text)))

    stream.seek(0)
    copy = PdfFileReader(StringIO(pdf_conv.convert(stream, c_text, c_text)))

    out = PdfFileWriter()
    for n in xrange(0, original.getNumPages()):
        out.addPage(original.getPage(n))

    for n in xrange(0, copy.getNumPages()):
        out.addPage(copy.getPage(n))

    encoded_pdf = StringIO()
    out.write(encoded_pdf)

    encoded_pdf.seek(0)
    encoded_pdf = encoded_pdf.read()

    return encoded_pdf
Example #12
0
def pdf(coursesid,examsid):
	''' Creates a blank PDF of this exam '''
	# TODO: Obviously fix this up to generate actual PDFs; this is just a proof of concept
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter
	from pyPdf import PdfFileWriter, PdfFileReader
	from io import BytesIO

	output = BytesIO()

	p = canvas.Canvas(output, pagesize=letter)
	p.drawString(100, 100, 'Hello')
	p.save()

	output.seek(0)
	new_pdf = PdfFileReader(output)
	existing_pdf = PdfFileReader(open('/home/treece/src/web/bubbleck/res/Template.pdf', 'rb'))
	out = PdfFileWriter()
	page = existing_pdf.getPage(0)
	page.mergePage(new_pdf.getPage(0))
	out.addPage(page)
	a = BytesIO()
	pdf_out = out.write(a)

	response = make_response(pdf_out)
	response.headers['Content-Disposition'] = "filename='sakulaci.pdf"
	response.mimetype = 'application/pdf'

	return response
Example #13
0
class Packet(object):
	def __init__(self, results_pdf):
		self.results_pdf = PdfFileReader(open(results_pdf, "rb"))
		self.num_pages = self.results_pdf.getNumPages()
		self.result_list = self.result_string.split('\n')
		self.entry_start = 0


	@property
	def num_rounds(self):
		first_entry = self.results_pdf.getPage(0).extractText().split('\n')[0]
		i = len(first_entry)-1
		while i >= 0:
			if first_entry[i] in digits:
				return int(first_entry[i])
			i-=1
	
	@property 
	def result_string(self):
		result_string = ""
		for i in range(0, self.num_pages):
			result_string+=self.results_pdf.getPage(i).extractText()
		return result_string
	
	def next_entry(self):
		school_code = school_codes(self.result_list[self.entry_start])
		start, end = self.entry_start+1, self.entry_start+self.num_rounds+2
		rest = self.result_list[start:end]
		self.entry_start+=self.num_rounds+2 
		return [school_code] + rest
Example #14
0
def add_footer_pdf(in_fname, op_fname, imgPath):
	from pyPdf import PdfFileWriter, PdfFileReader
	from reportlab.pdfgen import canvas
	from StringIO import StringIO
	
	output = PdfFileWriter()
	
	# Using ReportLab to insert image into PDF
	imgTemp1 = StringIO()
	imgDoc1 = canvas.Canvas(imgTemp1)
	
	# Draw image on Canvas and save PDF in buffer
	imgDoc1.drawImage(imgPath, 210, 20, 155, 35)    
	imgDoc1.save()
	
	overlay1 = PdfFileReader(StringIO(imgTemp1.getvalue())).getPage(0)
	
	in_file = PdfFileReader(file(in_fname,"rb"))
	n_pg = in_file.getNumPages() - 1
	
	page = in_file.getPage(0)
	output.addPage(page)
	
	# Use PyPDF to merge the image-PDF into the template
	for i in range(n_pg):
		page = in_file.getPage(i+1)
		page.mergePage(overlay1)
		output.addPage(page)
		
		#Save the result
		
		outputStream = file(op_fname,"w")
		output.write(outputStream)
		outputStream.close()
Example #15
0
def make_Cert(code, redeem_for):
	packet = StringIO.StringIO()
	# create a new PDF with Reportlab

	can = canvas.Canvas(packet, pagesize=letter)
	can.setFont('Helvetica', 32)
	can.drawString(280,540, code)
	can.setFont('Helvetica', 32)
	can.drawString(220,300, redeem_for)
	can.save()

	#move to the beginning of the StringIO buffer
	packet.seek(0)
	new_pdf = PdfFileReader(packet)
	# read your existing PDF
	existing_pdf = PdfFileReader(file("cert.pdf", "rb"))
	output = PdfFileWriter()
	# add the "watermark" (which is the new pdf) on the existing page
	page = existing_pdf.getPage(0)
	page.mergePage(new_pdf.getPage(0))
	output.addPage(page)
	# finally, write "output" to a real file
	outputStream = file(redeem_for.replace(" ", "_") + "_cert.pdf", "wb")
	output.write(outputStream)
	outputStream.close()
Example #16
0
 def render(self):
     output = PdfFileWriter()
     base1 = "%s/lib/%s" % (path.dirname(__file__), "kfza_base.pdf")
     base1 = open(base1, 'rb')
     b1_pdf = PdfFileReader(base1)
     wm = b1_pdf.getPage(0)
     p1 = PdfFileReader(self.generate_page_one())
     page1 = p1.getPage(0)
     page1.mergePage(wm)
     output.addPage(page1)
     bpdf = "%s/lib/%s" % (path.dirname(__file__), self.base_pdf)
     with open(bpdf, 'rb') as pdf:
         pf = PdfFileReader(pdf)
         if pf.isEncrypted:
             pf.decrypt('')
         for page in range(pf.getNumPages()):
             output.addPage(pf.getPage(page))
         if self.context.course.extra_questions:
             b1_pdf = PdfFileReader(base1)
             wm = b1_pdf.getPage(0)
             p1 = PdfFileReader(self.generate_page_one())
             page1 = p1.getPage(1)
             page1.mergePage(wm)
             output.addPage(page1)
         ntf = TemporaryFile()
         output.write(ntf)
     ntf.seek(0)
     base1.close()
     return ntf
Example #17
0
def generate(donor):
    os.system('mkdir -p output')
    donor_url = donor.replace(' ','%20')
    page1 = 'output/%s1' % (donor.replace(' ','-').lower())
    page2 = 'output/%s2' % (donor.replace(' ','-').lower())

    combined = 'output/%s.pdf' % (donor.replace(' ','-').lower())
    if os.path.exists(combined): return

    os.system('cp "%s" "%s.svg"' % (page1_svg, page1))
    os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1))
    os.system('inkscape  --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1))
    os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1))
    os.system('cp "%s" "%s.svg"' % (page2_svg, page2))
    os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2))
    os.system('inkscape  --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2))
    os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2))
    # Merge pages
    input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb'))
    input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb'))
    output = PdfFileWriter()
    output.addPage(input1.getPage(0))
    output.addPage(input2.getPage(0))
    outputStream = file(combined, 'wb')
    output.write(outputStream)
    outputStream.close()
    sleep(2)
Example #18
0
def reshuffle(output_directory, input_file, filename):
    output_pdf = PdfFileWriter()

    with open(input_file, 'rb') as readfile:

        input_pdf = PdfFileReader(readfile)
        total_pages = input_pdf.getNumPages()

        if total_pages == 7:

            print filename,

            if "hardest_jet_phi_all_linear" in filename or "hardest_jet_eta_all_linear" in filename or "hardest_jet_pT_all_linear" in filename or "hardest_jet_pT_jec_all_linear" in filename or "area" in filename or "jec" in filename or "pfc_neutral_0_100_pT" in filename or "pfc_charged_0_100_pT" in filename or "pfc_neutral_0_5_pT" in filename or "pfc_charged_0_5_pT" in filename:
                output_pdf.addPage(input_pdf.getPage(4))
                output_pdf.addPage(input_pdf.getPage(5))
                output_pdf.addPage(input_pdf.getPage(6))
                print "85"
            else:
                output_pdf.addPage(input_pdf.getPage(5))
                output_pdf.addPage(input_pdf.getPage(4))
                output_pdf.addPage(input_pdf.getPage(6))
                print "150"

            for i in range(0, 4):
                output_pdf.addPage(input_pdf.getPage(i))

            with open(output_directory + filename, "wb") as writefile:
                output_pdf.write(writefile)
Example #19
0
    def add_terms_and_conditions(self, ids, original_report_pdf,
                                 original_report):

        terms_and_conditions_decoded = False
        default_terms_and_conditions_decoded = False


        user = self.env['res.users'].browse(self._uid)

        # todo change user language to report language (client language)

        language_field = original_report.terms_conditions_language_field
        model = original_report.model

        object = self.env[model].browse(ids)
        localdict = {'o': object}
        eval('document_language = o.%s' % language_field, localdict,
             mode="exec", nocopy=True)
        document_language = localdict.get('document_language',
                                          self._context.get('lang'))

        company = object.company_id
        # todo check language
        terms_and_conditions_list = company.terms_and_conditions

        for terms_and_conditions in terms_and_conditions_list:
            if terms_and_conditions.language == document_language:
                terms_and_conditions_decoded =\
                    base64.decodestring(terms_and_conditions.datas)
            if terms_and_conditions.language == 'default':
                default_terms_and_conditions_decoded = \
                    base64.decodestring(terms_and_conditions.datas)

        if not terms_and_conditions_decoded:
            terms_and_conditions_decoded = \
                default_terms_and_conditions_decoded or False

        if terms_and_conditions_decoded:
            writer = PdfFileWriter()
            stream_original_report = StringIO(original_report_pdf)
            reader_original_report = PdfFileReader(stream_original_report)
            stream_terms_and_conditions = StringIO(terms_and_conditions_decoded)
            reader_terms_and_conditions = PdfFileReader(
                stream_terms_and_conditions)
            for page in range(0, reader_original_report.getNumPages()):
                writer.addPage(reader_original_report.getPage(page))

            for page in range(0, reader_terms_and_conditions.getNumPages()):
                writer.addPage(reader_terms_and_conditions.getPage(page))

            stream_to_write = StringIO()
            writer.write(stream_to_write)

            combined_pdf = stream_to_write.getvalue()

            return combined_pdf
        else:
            return original_report_pdf
Example #20
0
    def slice(self, ifile, ofile=None, marginv=0, marginh=0, columnwidth=0, centerwidth=0, scale=0.9):
        output = PdfFileWriter()
        input = PdfFileReader(file(ifile, "rb"))
        # print the title of document1.pdf
        print "title = %s" % (input.getDocumentInfo().title)
        print "Processing page: "
        for i in xrange(input.getNumPages()):
            print i+1
            # add left column as page
            page = PageObject.createBlankPage(input)
            page.mergePage(input.getPage(i))
            if columnwidth != 0 and centerwidth != 0:
                page.mediaBox.upperRight = (
                    page.mediaBox.getUpperLeft_x() + marginh + columnwidth,
                    page.mediaBox.getUpperRight_y() - marginv
                )
            else:
                page.mediaBox.upperRight = (
                    page.mediaBox.getUpperRight_x() / 2,
                    page.mediaBox.getUpperRight_y() - marginv
                )
            page.mediaBox.lowerLeft = (
                page.mediaBox.getLowerLeft_x() + marginh,
                page.mediaBox.getLowerLeft_y() + marginv,
            )
            page.scale(scale, scale)
            output.addPage(page)

            # add right column as page
            page = PageObject.createBlankPage(input)
            page.mergePage(input.getPage(i))
            if columnwidth != 0 and centerwidth != 0:
                page.mediaBox.lowerLeft = (
                    page.mediaBox.getLowerLeft_x() + marginh + columnwidth + centerwidth,
                    page.mediaBox.getLowerLeft_y() + marginv,
                )
            else:
                page.mediaBox.lowerLeft = (
                    page.mediaBox.getUpperRight_x() / 2,
                    page.mediaBox.getLowerLeft_y() + marginv,
                )
            page.mediaBox.upperRight = (
                page.mediaBox.getUpperRight_x() - marginh,
                page.mediaBox.getUpperRight_y() - marginv
            )
            page.scale(scale, scale)
            output.addPage(page)

        # finally, write "output"
        if ofile is not None:
            outputStream = file(ofile, "wb")
        else:
            outputStream = file(PdfSlicer.getOutName(ifile), "wb")
        output.write(outputStream)
        outputStream.close()
Example #21
0
def pdf2text(source_pdf,target_pages):
	pdf = PdfFileReader(file(source_pdf, "rb"))
	text_string = ''
	try: #loop over pages
		for page in target_pages:
			text_string += pdf.getPage(int(page-1)).extractText()
			
	except: #just one page
		text_string += pdf.getPage(int(target_pages-1)).extractText()
	
	return text_string
Example #22
0
    def test_watermark(self):
        # Make sure our Test string is available in the original document
        pdf = PdfFileReader(self.get_pdf_stream())
        assert 'Test' in pdf.getPage(0).extractText()
        assert 'TEST_WATERMARK' not in pdf.getPage(0).extractText()

        rv = self.combine_and_download(text_overlay='TEST_WATERMARK')

        pdf_download = PdfFileReader(StringIO(rv.data))
        self.assert_('Test' in pdf_download.getPage(0).extractText())
        self.assert_('TEST_WATERMARK' in pdf_download.getPage(0).extractText())

        self.clean_up()
def merge_pdf(url_list):
    # Download each PDF and merge them into one giant PDF, post this giant PDF to anonfiles.com, add URL to scraperwiki database
    output = PdfFileWriter()
    for url in url_list:

        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/083.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/83.pdf"
             url[1] = "83.pdf"
        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/039.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/39.pdf"
             url[1] = "39.pdf"
        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/021.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/21.pdf"
             url[1] = "21.pdf"
        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/016S.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/16S.pdf"
             url[1] = "16S.pdf"
        if url[0] == "http://www.cota.com/assets/Riding-Cota/Schedules/Current/015.pdf":
             url[0] = "http://www.cota.com/assets/Riding-Cota/Schedules/Current/15.pdf"
             url[1] = "15.pdf"

        pdf_file = os.system("wget %s" % url[0])
        input1 = PdfFileReader(file('/tmp/%s' % url[1], "rb"))
        numPages  = input1.getNumPages()
        print "number of pages = %s" % (numPages)
        page1 = input1.getPage(0)
        page2 = input1.getPage(1)
        output.addPage(page1)
        output.addPage(page2)

        if numPages == 3:
            page3 = input1.getPage(2)
            output.addPage(page3)

    final_page_count = output.getNumPages()
    print "Number of Pages in Final = %s" % (final_page_count)

    outputStream = file("/tmp/bus.pdf", "wb")
    output.write(outputStream)
    outputStream.close()

    reply = os.system('curl -kF "[email protected];filename=bus.pdf" https://anonfiles.com/api/hotlink -o "reply.txt"')

    with open('reply.txt', 'r') as f:
        read_data = f.read()

    data_dict = {
                   'Title':'Link to COTA Bus Schedule',
                   'URL':read_data,
                }
    scraperwiki.sqlite.save(unique_keys=['Title', 'URL'], data=data_dict)
Example #24
0
    def run(self):
        def getSrcDim(srcPage):
            return (float(srcPage.mediaBox.getWidth()),
                    float(srcPage.mediaBox.getHeight()))

        def getDestDim():
            if self.opts.orientation == const.PORTRAIT:
                return self.opts.size
            elif self.opts.orientation == const.LANDSCAPE:
                return (self.opts.size[1], self.opts.size[0])

        def getScale(srcPage):
            destWidth, destHeight = getDestDim()
            return (getSrcDim(srcPage)[const.WIDTH]/float(destWidth))


        def getScaledDestDim(srcPage):
            return [x * int(getScale(srcPage)) for x in getDestDim()]


        reader = PdfFileReader(file(self.infile, "rb"))
        writer = PdfFileWriter(
            documentInfo=reader.getDocumentInfo(), authors=["Vimala"])

        #self.opts.count

        srcPage = reader.getPage(0)
        height = getSrcDim(srcPage)[const.HEIGHT]
        totalHeight = self.opts.count * height

        destPage = writer.addBlankPage(*getScaledDestDim(srcPage))

        print totalHeight
        fitScale = getScaledDestDim(srcPage)[const.HEIGHT] / float(totalHeight)
        print fitScale
        srcPage.scale(fitScale, fitScale)
        #scale = getScale(srcPage)
        #srcPage.scale(scale, scale)

        destPage.mergeTranslatedPage(srcPage, 0, height * 2 - .2 * height)

        srcPage = reader.getPage(1)
        srcPage.scale(fitScale, fitScale)
        destPage.mergeTranslatedPage(srcPage, 0, height - .1 * height)

        srcPage = reader.getPage(3)
        srcPage.scale(fitScale, fitScale)
        destPage.mergeTranslatedPage(srcPage, 0, 0)

        #import pdb;pdb.set_trace()
        writer.write(open(self.outfile, "wb"))
Example #25
0
def doc_overlay(request, document_uuid, lot_number, qrcode=True):
    report = Report.objects.get(lot_number=lot_number)
    document = Document.objects.get(uuid=document_uuid)

    response = HttpResponse(content_type='application/pdf')
    response['Content-Disposition'] = 'filename="inspection_report.pdf"'

    outputPDF = PdfFileWriter()
    packet = StringIO()
    
    # read your existing PDF
    f = urlopen(Request(document.file.url)).read()
    mem = StringIO(f)
    existing_pdf = PdfFileReader(mem)
    pages = existing_pdf.getNumPages()
    first_page = existing_pdf.getPage(0)
    width = float(first_page.mediaBox.getWidth())
    height = float(first_page.mediaBox.getHeight())
    
    # create a new PDF with Reportlab
    p = canvas.Canvas(packet, pagesize=letter)
    #p.setFillColorRGB(255,255,255)
    #p.rect(0*mm, 271*mm, 205*mm, 12*mm, fill=1, stroke=0)
    p.setFillColorRGB(0,0,0)
    p.setFont("Helvetica", 7)
    p.drawCentredString(width/2.0,height-9.0, "%s LOT # %s / %s (doc# %s)" % 
                                (settings.PDF_COMPANY_SHORT_NAME,
                                report.lot_number, str(report.created_at.date()), document.uuid))
    
    barcode = createBarcodeDrawing('QR', value="%s%s" % (request.META['HTTP_HOST'], report.get_absolute_url()))
    barcode.drawOn(p,175*mm, 10*mm)

                                
    
    p.save()
    
    #move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    
    # add the "watermark" (which is the new pdf) on the existing page
    for x in range(0,pages):
        page = existing_pdf.getPage(x)
        page.mergePage(new_pdf.getPage(0))
        outputPDF.addPage(page)
    
    # finally, write "output" to a real file
    outputPDF.write(response)
    #f.close()
    action.send(request.user, verb="viewed document", action_object=document, target=report)
    return response
Example #26
0
def ProcessPDF ( filename, largeformatsize ):
    """
    Open a PDF to perform a page count and check for corrupt files
    Count small pages and large pages as defined above.
    """
    pdf_count = { 
                "npages":0,
                "nlargepages":0,
                "nsmallpages":0,
               }

    try: 
      #Open file
      filestream = file(filename, "rb")    
      #load into pypdf
      pdfFile = PdfFileReader(filestream)
      
      #First access into pdf contents
      #  Raises decryption/security exceptions here
      npages = pdfFile.getNumPages()

    except IOError: 
      err = '<{}> :: Could not open file. Check permissions?'.format(filename)
      return False, err
    except Exception as e: 
      err = '<{}> :: {}'.format(filename, e)
      return False, err
    
    for ii in range(npages): 
        pdf_count["npages"] += 1

        '''
        Calculate page dimensions and pricing category
        Dimensions are returned by pyPDF in Points (72 points = 1 inch)
        See: 
           http://en.wikipedia.org/wiki/Point_(typography)
           http://en.wikipedia.org/wiki/Paper_size
        '''
        width = (pdfFile.getPage(ii).artBox.getUpperRight_x()/72 - 
             pdfFile.getPage(ii).artBox.getLowerLeft_x()/72)
    
        height = (pdfFile.getPage(ii).artBox.getUpperRight_y()/72 -
                  pdfFile.getPage(ii).artBox.getLowerLeft_y()/72)
        
        if (width * height) > largeformatsize:
            pdf_count["nlargepages"] += 1
        else:
            pdf_count["nsmallpages"] += 1
                    
    return True, pdf_count
Example #27
0
def pdf(request, region, name, period_name):
    """Generate and return a PDF for the given tax form."""

    company = request.company
    form = registry.get_form(region, name)
    period = get_period(period_name, None)
    if form is None or period is None:
        raise Http404

    filing = form.tally(company, period)

    c = canvas.Canvas("hello.pdf")
    for spec in form.pdf_fields:
        x, y, name = spec[:3]
        value = getattr(filing.pages[0], name, None)
        if value is None:
            value = u''
        if isinstance(value, Decimal):
            dollars, cents = unicode(value).split('.')
            c.drawString(x - 8 - c.stringWidth(dollars), y, dollars)
            c.drawString(x + 4, y, cents)
        elif len(spec) > 3:
            value = unicode(value)
            step = spec[3]
            for i, char in enumerate(value):
                c.drawString(x + i * step, y, char)
        else:
            value = unicode(value)
            c.drawString(x, y, value)

    c.showPage()

    datadir = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'data')
    pdfpath = os.path.join(datadir, form.filename)

    taxform = PdfFileReader(file(pdfpath, 'rb'))
    rendering = PdfFileReader(StringIO(c.getpdfdata()))
    output = PdfFileWriter()

    watermark = rendering.getPage(0)

    page1 = taxform.getPage(0)
    page1.mergePage(watermark)
    output.addPage(page1)

    pdfdata = StringIO()
    output.write(pdfdata)

    return HttpResponse(pdfdata.getvalue(), content_type='application/pdf')
Example #28
0
 def AddWatermark(self,watermark,filein,fileout):
     #Use reportlab to create a PDF that will be used
     #as a watermark on another PDF.
     c= canvas.Canvas("temp_watermark.pdf")
     c.setFont("Courier", 60)
     #This next setting with make the text of our
     #watermark gray, nice touch for a watermark.
     c.setFillGray(0.5,0.5)
     #Set up our watermark document. Our watermark
     #will be rotated 45 degrees from the direction
     #of our underlying document.
     c.saveState()
     c.translate(500,100)
     c.rotate(45)
     c.drawCentredString(0, 0, "A WATERMARK!")
     c.drawCentredString(0, 300, "A WATERMARK!")
     c.drawCentredString(0, 600, "A WATERMARK!")
     c.restoreState()
     c.save() 
     
     #Read in the PDF that will have the PDF applied to it.
     output = PdfFileWriter()
     input1 = PdfFileReader(file(filein, "rb")) 
     
     #Just to demo this function from pyPdf.
     #If the PDF has a title, this will print it out.
     print "title = %s" % (input1.getDocumentInfo().title)
     
     #Open up the orgininal PDF.
     page1 = input1.getPage(0)
     
     #Read in the file created above by ReportLab for our watermark.
     twatermark = PdfFileReader(file("temp_watermark.pdf", "rb"))
     #Apply the watermark by merging the two PDF files.
     page1.mergePage(twatermark.getPage(0))
     #Send the resultant PDF to the output stream.
     output.addPage(page1)
     
     #Just to demo this function from pyPdf.
     #Return the number of pages in the watermarked PDF.
     print "watermarked_pdf.pdf has %s pages." % input1.getNumPages()
     
     #write the output of our new, watermarked PDF.
     outputStream = file(fileout, "wb")
     output.write(outputStream)
     outputStream.close()
     os.remove("temp_watermark.pdf")
     os.remove(filein)
     
Example #29
0
def main(argv = None):
    """
    funcao para pegar uma pagina de um pdf
    argumentos:
	nome_do_arquivo_de_entrada nome_do_arquivo_de_saida numero_da_pagina_inicial [numero_da_pagina_final]
    """
    if argv is None:
	argv = sys.argv[1:]
    output = PdfFileWriter()
    input = PdfFileReader(file(argv[0],"rb"))
    output.addPage(input.getPage(int(argv[2])))
    if len(argv) >= 4:
	for i in range(int(argv[2])+1,int(argv[3])+1):
	    output.addPage(input.getPage(i))
    output.write(file(argv[1],"wb"))
Example #30
0
def Merge():
  from pyPdf import PdfFileWriter, PdfFileReader
  output_pdf = PdfFileWriter()
  
  with open(r'input.pdf', 'rb') as readfile:
      input_pdf = PdfFileReader(readfile)
      total_pages = input_pdf.getNumPages()
      for page in xrange(total_pages - 1, -1, -2):
          print page
          page1 = input_pdf.getPage(page)
          page2 = input_pdf.getPage(page-1)
          page1.mergePage(page2)
          output_pdf.addPage(page1)
      with open(r'output.pdf', "wb") as writefile:
          output_pdf.write(writefile)
def download_pdf(url):
    writer = PdfFileWriter()
    code = requests.get(url, stream=True).status_code
    if code != 404:
        remoteFile = urlopen(Request(url)).read()
    
        memoryFile = StringIO(remoteFile)
        pdfFile = PdfFileReader(memoryFile)
        
        for pageNum in xrange(pdfFile.getNumPages()):
                currentPage = pdfFile.getPage(pageNum)
                #currentPage.mergePage(watermark.getPage(0))
                writer.addPage(currentPage)
        
        outputStream = open('pdf_folder/%s'%basename(url),"wb")
        writer.write(outputStream)
        outputStream.close()
        return (True)
    else:
        return (False)
Example #32
0
def getDocData(Fdoc):
    from pyPdf import PdfFileReader
    import codecs

    f = PdfFileReader(file(Fdoc, "rb"))
    pages = f.getNumPages()
    pages = int(pages - 1)
    name = Fdoc.replace("input_PDF", "output_text")[:-4] + ".txt"
    
#I want to replace test 2 with something dynamic so I am not writing over my files
    g = codecs.open(name, encoding='utf-8', mode='wb')

    #looping through the pages and putting the contents in to a text document
    l = 0
    for t in range(0, pages):
        while l <= pages:
            pg = f.getPage(l)
            pgtxt = pg.extractText()
            l = l + 1
            g.write(pgtxt)
Example #33
0
def split_pages(input_filename, prefix=None):
    """
    Splits up a PDF file into single page PDF files.  Returns
    path string where resulting PDF files are
    located. It is the caller's responsibility to clean the disk when
    the files are no longer necessary. The best way to do it is to
    call result.rmtree()
    """
    output_dir = None
    try:
        if prefix is None:
            # create a temporary directory
            output_dir = mkdtemp('donomo')
            prefix = os.path.join(output_dir, 'page-')

        # open PDF
        pdf_input = PdfFileReader(file(input_filename, 'rb'))

        # iterate over pages in the input PDF
        for i in xrange(pdf_input.getNumPages()):
            # get n-th page
            page = pdf_input.getPage(i)

            # create a one-page pdf writer
            pdf_output = PdfFileWriter()
            pdf_output.addPage(page)

            # save it in a new file
            page_filename = '%s%03d.pdf' % (prefix, i)
            page_filestream = file(page_filename, "wb")
            pdf_output.write(page_filestream)
            page_filestream.close()

        # return the directory name to the caller
        return os.path.dirname(prefix)
    except Exception, e:
        logging.error(str(e))
        # delete a temporary directory along with all its contents
        if output_dir:
            rmtree(output_dir)
        raise
Example #34
0
def createForm(info):
    luokka = info['Luokka']
    if info['Koiran sukupuoli'] == 'uros':
        info['Uros'] = 'x'
    elif info['Koiran sukupuoli'] == 'narttu':
        info['Narttu'] = 'x'

    packet = StringIO.StringIO()
    c = canvas.Canvas(packet, pagesize=A4)
    for k in info.keys():
        if k in placements[luokka].keys():
            x, y = placements[luokka][k]
            y = y + offsets[luokka]
            text = info[k]
            c.drawString(x * cm, y * cm, text)

    c.save()
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    output = PdfFileWriter()
    output.addPage(new_pdf.getPage(0))

    outputStream = file("/tmp/stamp.pdf", "wb")
    output.write(outputStream)
    outputStream.close()

    try:
        os.mkdir("esitaytetyt")
    except:
        pass

    print " ".join([
        'pdftk',
        "pohjat/%s.pdf" % luokka, 'stamp', '/tmp/stamp.pdf', 'output',
        'esitaytetyt/%s.pdf' % info['Rekisterinumero'].replace('/', '-')
    ])
    call([
        'pdftk',
        "pohjat/%s.pdf" % luokka, 'stamp', '/tmp/stamp.pdf', 'output',
        'esitaytetyt/%s.pdf' % info['Rekisterinumero'].replace('/', '-')
    ])
    def add_omr_marks(self, pdf_data, is_latest_document):
        # Documentation
        # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf
        # https://pythonhosted.org/PyPDF2/PdfFileReader.html
        # https://stackoverflow.com/a/17538003
        # https://gist.github.com/kzim44/5023021
        # https://www.blog.pythonlibrary.org/2013/07/16/
        #   pypdf-how-to-write-a-pdf-to-memory/
        self.ensure_one()

        pdf_buffer = StringIO.StringIO()
        pdf_buffer.write(pdf_data)

        existing_pdf = PdfFileReader(pdf_buffer)
        output = PdfFileWriter()
        total_pages = existing_pdf.getNumPages()

        def lastpair(a):
            b = a - 1
            if self.omr_single_sided or b % 2 == 0:
                return b
            return lastpair(b)

        # print latest omr mark on latest pair page (recto)
        latest_omr_page = lastpair(total_pages)

        for page_number in range(total_pages):
            page = existing_pdf.getPage(page_number)
            # only print omr marks on pair pages (recto)
            if self.omr_single_sided or page_number % 2 is 0:
                is_latest_page = is_latest_document and \
                    page_number == latest_omr_page
                marks = self._compute_marks(is_latest_page)
                omr_layer = self._build_omr_layer(marks)
                page.mergePage(omr_layer)
            output.addPage(page)

        out_buffer = StringIO.StringIO()
        output.write(out_buffer)

        return out_buffer.getvalue()
Example #36
0
File: spider.py Project: dmouse/nlp
    def pdf2Text(pdf):

        from pyPdf import PdfFileWriter, PdfFileReader

        with open("/tmp/temp_crawler.pdf", "wb") as file_pdf:
            file_pdf.write(pdf)

        try:
            pdf = PdfFileReader(file("/tmp/temp_crawler.pdf", "rb"))

            content = ""
            for i in range(0, pdf.getNumPages()):
                content += pdf.getPage(i).extractText() + "\n"

            content = u" ".join(content.replace(u"\xa0", u" ").strip().split())

        except Exception:
            print "[ Error con el PDF ]"
            return " "

        return unidecode(content)
 def _merge_pdf(self, documents):
     """Merge PDF files into one.
     :param documents: list of path of pdf files
     :returns: path of the merged pdf
     """
     writer = PdfFileWriter()
     streams = [
     ]  # We have to close the streams *after* PdfFilWriter's call to write()
     for document in documents:
         pdfreport = file(document, 'rb')
         streams.append(pdfreport)
         reader = PdfFileReader(pdfreport)
         for page in range(0, reader.getNumPages()):
             writer.addPage(reader.getPage(page))
     merged_file_fd, merged_file_path = tempfile.mkstemp(
         suffix='.pdf', prefix='report.merged.tmp.')
     with closing(os.fdopen(merged_file_fd, 'w')) as merged_file:
         writer.write(merged_file)
     for stream in streams:
         stream.close()
     return merged_file_path
Example #38
0
def remove_pages(pdf_file, max_pages=1):
    output = PdfFileWriter()

    with open(pdf_file, 'r') as pdf:
        input = PdfFileReader(pdf)

        total_pages = input.getNumPages()

        for i in xrange(max_pages):
            if i >= total_pages:
                break

            p = input.getPage(i)
            output.addPage(p)

        with open(pdf_file + '.tmp', 'w') as pdf:
            output.write(pdf)

    os.remove(pdf_file)
    os.rename(pdf_file + '.tmp', pdf_file)
    return pdf_file
Example #39
0
 def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None):
     results = self._create_source(cr, uid, ids, data, report_xml, context)
     if results and len(results) == 1:
         return results[0]
     if results:
         if deferred:
             deferred.set_status(_('Concatenating single documents'))
         not_pdf = filter(lambda r: r[1] != 'pdf', results)
         if not_pdf:
             raise osv.except_osv(_('Error!'),
                                  _('Unsupported combination of formats!'))
         #if results[0][1]=='pdf':
         output = PdfFileWriter()
         for r in results:
             reader = PdfFileReader(StringIO(r[0]))
             for page in range(reader.getNumPages()):
                 output.addPage(reader.getPage(page))
         s = StringIO()
         output.write(s)
         return s.getvalue(), results[0][1]
     return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
Example #40
0
def kesit(dosya_yolu , sayfa1 , sayfa2=0):
        """pdf dosyasının sayfa1'den sayfa2'ye kadar olan kısmını alır
           sayfa2 verilmesse sayfa1'den sonuna kadar alır        
        """
        try:
                kaynak = PdfFileReader(open(dosya_yolu, "rb"))
                islem  = PdfFileWriter()
                if sayfa1<0:
                        (-1)*sayfa1
                if sayfa2==0:
                        sayfa2=kaynak.getNumPages()
                if sayfa2<=sayfa1:
                        sayfa2=sayfa1+1
                hedef = open("data.pdf", "wb")
                for i in range(int(sayfa1),int(sayfa2)):
                        islem.addPage(kaynak.getPage(i))
                islem.write(hedef)
                hedef.close()
                print "»» pdf oluşturuldu"
        except:
                print "»» pdf oluşturulamadı"
Example #41
0
def genfile(srcfile, desfile, startpage, endpage):
    """
    根据startpage跟endpage做pdf文件切分
    :param srcfile:
    :param desfile:
    :param startpage:
    :param endpage:
    :return:
    """
    output = PdfFileWriter()
    src = PdfFileReader(file(srcfile, "rb"))
    (filepath, filename) = os.path.split(desfile)
    if not os.path.exists(filepath):
        os.makedirs(filepath)
    des = file(desfile, "wb")
    for i in range(startpage - 1, endpage):
        output.addPage(src.getPage(i))
    output.write(des)
    des.close()
    del src
    del des
Example #42
0
def fisk_pdf(pdffile, directory):
    name = pdffile[:-4]
    g = open(os.path.join(directory, name, name + ".md"), "w")
    print("# Notes on: ", file = g)
    input = PdfFileReader(file(pdffile, "rb"))
    print("Number of pages %s" % input.getNumPages())
    j = 0 
    for p in [input.getPage(i) for i in range(0,input.getNumPages())]:
        j = j + 1
        output = PdfFileWriter()
        output.addPage(p)
        print("### Page " + str(j), file = g)
        imagefile = os.path.join(directory, name, "images", "file_" + str(j) + ".pdf")
        imagefilePNG = os.path.join(directory, name, "images", "file_" + str(j) + ".png")
        f = open(imagefile, "w")
        output.write(f)
        f.close()
        textfile = os.path.join(directory, name, "texts", "file_" + str(j) + ".txt")
        textfileASCII = os.path.join(directory, name, "texts", "file_" + str(j) + "_ascii.txt")
        cmd = "pdftotext " + imagefile + " " + textfile # extracts text from the pdffile
        os.system(cmd)
        cmd = "iconv -c -f utf8 -t ascii " + textfile + " > " + textfileASCII 
        # ./texts/file_" + str(j) + ".txt" + " > ./texts/file_" + str(j) + "_ascii.txt"
        os.system(cmd)
        cmd = "convert -density 100 " + imagefile + " -quality 100 " + imagefilePNG
        os.system(cmd)
        print("![](./images/file_" + str(j) + ".png)", file = g)
        print("", file = g)
        print("### Text from page " + str(j), file = g)
        t = open(os.path.join(directory, name, "texts", "file_" + str(j) + "_ascii.txt"), "r")
        txt = t.read() 
        txt = txt.replace('\n', ' ').replace('\r', '').replace('', '')
        n = 80 
        chunks = [txt[i:i+n] for i in range(0, len(txt), n)]
        for c in chunks:
             print("    " + c, file = g)
             print("", file = g)
        print("### Notes on page " + str(j), file = g)

    g.close() 
Example #43
0
def Split(d):
    args = d["args"]
    if len(args) < 2:
        Error("split needs at least two arguments")
    prefix, count = args[0], d["-n"]
    for filename in args[1:]:
        reader = PdfFileReader(open(filename, "rb"))
        numpages = reader.getNumPages()
        # Get a format string that allows the integer numbers to have
        # leading zeros so that they will sort in natural order.
        lt = int(ceil(log10(numpages)))
        assert lt > 0
        fmt = "%%0%dd" % lt
        for i in range(reader.getNumPages()):
            output_file = prefix + (fmt % count) + ".pdf"
            writer = GetOutputWriter(output_file, d)
            output_stream = open(output_file, "wb")
            page = reader.getPage(i)
            Sentinel(d)
            writer.addPage(page)
            writer.write(open(output_file, "wb"))
            count += 1
Example #44
0
    def test1(self):
        "Test generating several 'n-up' docs, n = (m**2) / 2..."

        for path0 in ("samples/test-a4-l.pdf", "samples/test-a4-p.pdf"):
            for n in (2, 8, 18):
                outName = os.path.splitext(path0)[0] + "-%dup.pdf" % n
                path1 = os.path.join(".", outName)
                generateNup(path0, n, path1, verbose=False)  # , dirs="UL")

                # assert output has correct number of pages
                input = PdfFileReader(file(path0, "rb"))
                np0 = input.getNumPages()
                input = PdfFileReader(file(path1, "rb"))
                np1 = input.getNumPages()
                self.assertEqual(np1, math.ceil(np0 / float(n)))

                # assert output page(s) has/have correct text content
                for pn in range(np1):
                    page = input.getPage(pn)
                    text = page.extractText().split()
                    exp = group([str(num) for num in range(np0)], n)[pn]
                    self.assertEqual(text, exp)
Example #45
0
def append_file(out, input_file_name):
    print("Open %s" % input_file_name)
    input1 = PdfFileReader(file(input_file_name, "rb"))
    page = input1.getPage(0)

    upperleft_x = page.mediaBox.getUpperLeft_x()
    upperleft_y = page.mediaBox.getUpperLeft_y()
    upperright_x = page.mediaBox.getUpperRight_x()
    upperright_y = page.mediaBox.getUpperRight_y()

    # print(page.mediaBox)
    # print(page.mediaBox.getUpperLeft_x())
    # print(page.mediaBox.getUpperLeft_y())
    # print(page.mediaBox.getUpperRight_x())
    # print(page.mediaBox.getUpperRight_y())
    # print(page.mediaBox.lowerLeft)
    # print(page.mediaBox.upperLeft)
    # print(page.mediaBox.lowerRight)
    # print(page.mediaBox.upperRight)

    if backpage:
        # устанавливаем зону обрезки по оборотной стороне уведомления
        page.mediaBox.upperRight = (
            float(page.mediaBox.getUpperRight_x()),
            float(page.mediaBox.getUpperRight_y()) -
            (float(page.mediaBox.getUpperLeft_y()) * 0.325))
        page.mediaBox.lowerRight = (
            float(page.mediaBox.getLowerRight_x()) * 0.7,
            float(page.mediaBox.getUpperRight_y()) -
            (float(page.mediaBox.getUpperLeft_y()) * 0.48))
    else:
        # устанавливаем зону обрезки по основной стороне уведомления
        page.mediaBox.lowerRight = (
            float(page.mediaBox.getLowerRight_x()) * 0.7,
            float(page.mediaBox.getUpperLeft_y()) -
            (float(page.mediaBox.getUpperLeft_y()) * 0.325))
    #print(page.mediaBox)
    out.addPage(page)
    print('Croped and added to output file')
Example #46
0
    def test_rotation_different_to_unrotated(self):
        # Make sure our Test string is available in the original document
        pdf = PdfFileReader(self.get_pdf_stream())
        assert 'Test' in pdf.getPage(0).extractText()

        rv = self.app.get('/')
        self.assertEquals(rv.status_code, 200)

        rv = self.app.post('/handleform',
                           data={'file': (self.get_pdf_stream(), 'test.pdf')})

        # Start build without rotation
        rv = self.combine_and_download()
        content_no_rotation = rv.data

        # Start build with rotation
        rv = self.combine_and_download(rotate='180')
        content = rv.data

        self.assert_(content_no_rotation != content)

        self.clean_up()
    def _combine_pdf_files(self, tmp_folder_name, output_report):
        output_path = tmp_folder_name + output_report
        output_temp_path = tmp_folder_name + 'temp.pdf'

        cmd = """gs -q -dNOPAUSE -sDEVICE=pdfwrite -sOUTPUTFILE=%s \
                -dBATCH %s*water*.pdf""" % (output_temp_path, tmp_folder_name)
        os.system(cmd)

        # remove the last empty page
        input_stream = PdfFileReader(file(output_temp_path, 'rb'))
        output_stream = PdfFileWriter()

        pagenum = input_stream.getNumPages()
        for i in range(pagenum - 1):
            page = input_stream.getPage(i)
            output_stream.addPage(page)

        out_stream = file(output_path, 'wb')
        try:
            output_stream.write(out_stream)
        finally:
            out_stream.close()
Example #48
0
 def parse_pdf(self, pdfname):
     if not os.path.exists(pdfname):
         print 'Missing API documentation, downloading from: %s' % pdfurl
         urllib.urlretrieve(pdfurl, pdfname)
     print 'Compiling API methods...'
     apidoc = PdfFileReader(file(pdfname, 'rb'))
     for p in range(0, apidoc.getNumPages()):
         doc = apidoc.getPage(p).extractText()
         mCall = re.search(r'<methodCall>.+</methodCall>', doc)
         mResp = re.search(r'<methodResponse>.+</methodResponse>', doc)
         section = None
         if mCall:
             xml = mCall.group()
             method = re.search(r'<methodName>(UDNS_\w+)</methodName>', xml)
             if not method:
                 continue
             section = method.group(1)
             if re.search(r'[Cc]onnect', section):
                 continue  # exclude connection-related methods
             if section not in self.config._sections:
                 self.config.add_section(section)
             patt = re.compile(
                 r'<value><([^\s]+)>([^\s]+)</([^\s]+)></value>')
             match = re.findall(patt, xml)
             order = []
             for m in match:
                 key = '%s+%s' % m[0:2]
                 self.config.set(section, key, m[2])
                 order.append(key)
             if order:
                 self.config.set(section, 'order', ','.join(order))
         if section and re.search(r'<fault>', doc):
             self.config.set(section, 'fault', True)
         if section and mResp:
             xml = mResp.group()
             record = re.search(r'<array>\s*<data>\s*<value>\s*<([_\w]+)>',
                                xml)
             if record:
                 self.config.set(section, 'array', record.group(1))
def print_danfe(inv):
    str_pdf = ""
    paths = []

    if inv.nfe_version == '1.10':
        from pysped.nfe.leiaute import ProcNFe_110
        procnfe = ProcNFe_110()
    elif inv.nfe_version == '2.00':
        from pysped.nfe.leiaute import ProcNFe_200
        procnfe = ProcNFe_200()
    elif inv.nfe_version == '3.10':
        from pysped.nfe.leiaute import ProcNFe_310
        procnfe = ProcNFe_310()

    file_xml = monta_caminho_nfe(inv.company_id, inv.nfe_access_key)
    if inv.state not in ('open', 'paid', 'sefaz_cancelled'):
        file_xml = os.path.join(file_xml, 'tmp/')
    procnfe.xml = os.path.join(file_xml, inv.nfe_access_key + '-nfe.xml')
    danfe = DANFE()
    danfe.logo = add_backgound_to_logo_image(inv.company_id)
    danfe.NFe = procnfe.NFe
    danfe.leiaute_logo_vertical = inv.company_id.nfe_logo_vertical
    danfe.protNFe = procnfe.protNFe
    danfe.caminho = "/tmp/"
    danfe.gerar_danfe()
    paths.append(danfe.caminho + danfe.NFe.chave + '.pdf')

    output = PdfFileWriter()
    s = StringIO()

    for path in paths:
        pdf = PdfFileReader(file(path, "rb"))
        for i in range(pdf.getNumPages()):
            output.addPage(pdf.getPage(i))
        output.write(s)

    str_pdf = s.getvalue()
    s.close()
    return str_pdf
    def pdf_watermark_fast(self, pathname, Wm_f, wt1='', **kwargs):
        try:
            url_watermark = kwargs['url_wtm']
        except:
            pass

        from pyPdf import PdfFileWriter, PdfFileReader
        # fo=os.getcwd()
        # CurrentDir=os.path.dirname(os.path.realpath(__file__))
        import watter_marker
        url_watermark2 = url_watermark.replace(".", "_")
        url_watermark2 = url_watermark2.replace("://", "__")
        # CurrentDir=os.path.dirname(os.path.realpath(__file__)).replace('\\','/')
        if wt1 == '':
            if not os.path.isfile(self.Watermarked_PDF_Dir + "/" +
                                  "watermarker_slow" + url_watermark2 +
                                  ".pdf"):
                wt1 = self.watermark_file(self.Watermarked_PDF_Dir + "/" +
                                          "watermarker_slow" + url_watermark2 +
                                          ".pdf",
                                          url_watermark,
                                          center_text=False)
            else:
                wt1 = self.Watermarked_PDF_Dir + "/" + "watermarker_fast.pdf"
            if True:
                watermark1 = PdfFileReader(file(wt1, 'rb'))
            else:
                wt1 = self.watermark_file(self.Watermarked_PDF_Dir + "/" +
                                          "watermarker_slow" + url_watermark2 +
                                          ".pdf",
                                          url_watermark,
                                          center_text=False)
                watermark1 = PdfFileReader(file(wt1, 'rb'))
            wtt = watermark1.getPage(0)

        watter_marker.op_w_input(pathname, wt1, Wm_f)
        # Wm_f is full address
        return Wm_f
    def collate(self, remove_temp=True, remove_sources=False):
        from pyPdf import PdfFileWriter, PdfFileReader
        from svglib.svglib import svg2rlg
        from reportlab.graphics import renderPDF

        # Make temporary folder
        dest_dir, _ = os.path.split(self.dest)
        if not os.path.exists(dest_dir):
            raise RuntimeError, "output place %s d.n.e." % dest_dir
        temp_dir = dest_dir + '/tmp'
        if not os.path.exists(temp_dir):
            os.mkdir(temp_dir)
        # Fix SVG windows for PDFing
        temp_page = [
            '%s/page%i.tmp' % (temp_dir, i) for i, _ in enumerate(self.sources)
        ]
        for s, d in zip(self.sources, temp_page):
            hack_svg_viewbox(s, d)
        # Generate single PDF pages
        for s in temp_page:
            drawing = svg2rlg(s)
            renderPDF.drawToFile(drawing, s, autoSize=1)
        # Concatenate the PDF pages into a single document
        output = PdfFileWriter()
        for s in temp_page:
            i = PdfFileReader(open(s, 'rb'))
            output.addPage(i.getPage(0))
            del i
        fout = file(self.dest, 'wb')
        output.write(fout)
        fout.close()
        # Remove the temporary folder
        if remove_temp:
            shutil.rmtree(temp_dir)
        # Remove the source images
        if remove_sources:
            map(os.remove, self.sources)
        return True
def createForm(dogs, filename):
    packet = StringIO.StringIO()
    c = canvas.Canvas(packet, pagesize=A4)
    extraoffset = {'Luokka': 0}
    for i, info in enumerate(dogs):
        for k in info.keys():
            if k in placements.keys():
                if k in extraoffset:
                    extra = extraoffset[k]
                else:
                    extra = 0

                x, y = placements[k]
                text = info[k]
                c.drawString(extra + x*cm, yoffset[i]*cm + y*cm, text)
        if i == 5:
            c.showPage()
            extraoffset['Luokka'] = 0.3*cm

    if len(dogs) < 7:
        c.showPage()
    c.save()
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    output = PdfFileWriter()
    output.addPage(new_pdf.getPage(0))
    output.addPage(new_pdf.getPage(1))

    outputStream = file("/tmp/stamp.pdf", "wb")
    output.write(outputStream)
    outputStream.close()

    try:
        os.mkdir("esitaytetyt")
    except:
        pass

    call(['pdftk', 'pohjat/koepoytakirja.pdf', 'multistamp', '/tmp/stamp.pdf', 'output', 'esitaytetyt/%s' % filename])
def writePDF(linkPaths):
    if "<type 'list'>" != str(type(linkPaths)):
        print "Invalid parameter passed.\n"
        return

    l = len(linkPaths)

    output = PdfFileWriter()
    for i in range(0,l):
        input1 = PdfFileReader(file("./Tmp/" + str(i+1) + ".pdf", "rb"))
        output.addPage(input1.getPage(0))
     
    print("Generating newspaper...\n")
    dateObject = datetime.now()
    fileName = dateObject.strftime("%Y%m%d")

    fileName = "GDN " + fileName + ".pdf"

    outputStream = file(fileName, "wb")
    output.write(outputStream)
    outputStream.close()

    return
Example #54
0
def merge_pdf(lpdf):
    """
    Merge all PDF in the list and return the content as a File Object

    :param lpdf: List of PDF as File Object
    :type  lpdf: list
    :return: return a file object
    :rtype: File Object
    """
    fo_pdf = StringIO()
    ret = PdfFileWriter()
    for current_pdf in lpdf:
        if current_pdf is None:
            continue
        # We ensure we start at the begining of the file
        current_pdf.seek(0)
        tmp_pdf = PdfFileReader(current_pdf)
        for page in range(tmp_pdf.getNumPages()):
            ret.addPage(tmp_pdf.getPage(page))

    # We store the content of the merge into a file object
    ret.write(fo_pdf)
    return fo_pdf
Example #55
0
def cropNzoom(inputFile, pageNumber, zoomFactor):

    print "Cropping and scaling pdf"

    pageNumber = pageNumber - 1
    outputFile = inputFile[:inputFile.rindex('.')] + '_' + str(pageNumber +
                                                               1) + 'test.pdf'

    output = PdfFileWriter()
    input1 = PdfFileReader(file(inputFile, "rb"))

    page = input1.getPage(pageNumber)

    page.scaleBy(zoomFactor)
    output.addPage(page)

    print "Saving cropped pdf as: " + outputFile[outputFile.rindex('\\') + 1:]

    outputStream = file(outputFile, "wb")
    output.write(outputStream)
    outputStream.close()

    return outputFile
Example #56
0
def main(output_file, input_files):
  print "concat all files:"

  output = PdfFileWriter()

  total_pages = 0
  for f in input_files:
    # expect filename as "*.pdf"
    if f[-4:] != ".pdf":
      print "skipped file: ", f
      continue
    else:
      input = PdfFileReader(file(f, 'rb'))
      num_pages = input.getNumPages()
      total_pages += num_pages
      print f, "->", str(num_pages) + "pages"
      for i in xrange(0, num_pages):
        output.addPage(input.getPage(i))

  outputStream = file(output_file, 'wb')
  output.write(outputStream)
  print str(total_pages) + "pages written"
  outputStream.close()
Example #57
0
    def create(self, cr, uid, ids, datas, context=None):
        self.pool = pooler.get_pool(cr.dbname)
        checkoutType = self.pool.get('plm.checkout')
        output = PdfFileWriter()
        children = []
        packed = []
        checkouts = checkoutType.browse(cr, uid, ids)
        for checkout in checkouts:
            document = checkout.documentid
            if document.printout:
                if not document.id in packed:
                    input1 = PdfFileReader(
                        StringIO.StringIO(
                            base64.decodestring(document.printout)))
                    output.addPage(input1.getPage(0))
                    packed.append(document.id)

        pdf_string = StringIO.StringIO()
        output.write(pdf_string)
        self.obj = external_pdf(pdf_string.getvalue())
        self.obj.render()
        pdf_string.close()
        return (self.obj.pdf, 'pdf')
Example #58
0
def crop_image(box, pdf_page, filename, count):
    print "BOX"
    print box
    with open(filename + "_data/" + pdf_page, "rb") as in_f:
        input1 = PdfFileReader(in_f)
        output = PdfFileWriter()

        page = input1.getPage(0)

        x0 = float(box[0])
        y0 = pdf_metadata.page_height - float(box[1])
        x1 = float(box[2])
        y1 = pdf_metadata.page_height - float(box[3])

        page.trimBox.lowerLeft = (x0, y1)
        page.trimBox.upperRight = (x1, y0)
        page.cropBox.lowerLeft = (x0, y1)
        page.cropBox.upperRight = (x1, y0)
        output.addPage(page)

        with open("OCR_DATASET/" + filename + "_me_" + str(count),
                  "wb") as out_f:
            output.write(out_f)
Example #59
0
def get_pdf_pagesize(fn, page=0):
    f = open(fn, "rb")
    pdf = PdfFileReader(f)
    p = pdf.getPage(page)
    f.close()
    x0 = x1 = y0 = y1 = 0.0
    for k in [
            '/TrimBox',
            '/CropBox',
            '/MediaBox',
            '/ArtBox',
    ]:
        try:
            _dim = p[k]
        except:
            continue
        _x0, _y0, _x1, _y1 = map(float, _dim)
        y0 = min(y0, _y0)
        y1 = max(y1, _y1)
        x0 = min(x0, _x0)
        x1 = max(x1, _x1)
    width = x1 - x0
    height = y1 - y0
    return (width, height)
Example #60
0
def MergePDF(filepath,outfile):
    output=PdfFileWriter()
    outputPages=0
    pdf_fileName=getFileName(filepath)
    print '总的',pdf_fileName
    for each in pdf_fileName:
        if '.DS_Store' in each:
            continue
        # print '看看',os.path.dirname(each),'+', os.path.splitext(each.replace(os.path.dirname(each),''))
        # 
        print '单的',each
        # 读取源pdf文件
        input = PdfFileReader(file(each, "rb"))

        # print 'input:',input
        # 如果pdf文件已经加密,必须首先解密才能使用pyPdf
        if input.isEncrypted == True:
            print 'input.isEncrypted',input.isEncrypted 
            input.decrypt("map")

        # 获得源pdf文件中页面总数
        pageCount = input.getNumPages()
        outputPages += pageCount
        print pageCount

        # 分别将page添加到输出output中
        for iPage in range(0, pageCount):
            output.addPage(input.getPage(iPage))


    print "All Pages Number:"+str(outputPages)
    # 最后写pdf文件
    outputStream=file(filepath+outfile,"wb")
    output.write(outputStream)
    outputStream.close()
    print "finished"