Beispiel #1
0
def renderToPdf(envLL, filename, sizex, sizey):
    """Renders the specified Box2d and zoom level as a PDF"""
    basefilename = os.path.splitext(filename)[0]
    mergedpdf = None
    for mapname in MAPNIK_LAYERS:
        print 'Rendering', mapname
        # Render layer PDF.
        localfilename = basefilename + '_' + mapname + '.pdf';
        file = open(localfilename, 'wb')
        surface = cairo.PDFSurface(file.name, sizex, sizey) 
        envMerc = LLToMerc(envLL)
        map = mapnik.Map(sizex, sizey)
        mapnik.load_map(map, mapname + ".xml")
        map.zoom_to_box(envMerc)
        mapnik.render(map, surface)
        surface.finish()
        file.close()
        # Merge with master.
        if not mergedpdf:            
            mergedpdf = PdfFileWriter()
            localpdf = PdfFileReader(open(localfilename, "rb"))
            page = localpdf.getPage(0)
            mergedpdf.addPage(page)
        else:
            localpdf = PdfFileReader(open(localfilename, "rb"))
            page.mergePage(localpdf.getPage(0))
    output = open(filename, 'wb')
    mergedpdf.write(output)
    output.close()
Beispiel #2
0
def add(request):
	"""
	Upload a document
	"""
	
	if request.method == "POST":

		form = AddDocumentForm(request.POST, request.FILES)
		if form.is_valid():
			document = form.save(commit=False)
			document.user = request.user
			
			try:
				from pyPdf import PdfFileReader
				pdf = PdfFileReader(document.file)
				
				document.title = pdf.getDocumentInfo().title
				document.author = pdf.getDocumentInfo().author
				
			except:
				document.title = "( Insert title )"
				document.author = "( Insert author )"
				
			document.save()
			return HttpResponseRedirect('/documents/edit/' + str(document.id))
	else:
		form = AddDocumentForm()
	
	context = {
		'form': form,
	}
	return render_to_response('add.html', context,
							  context_instance=RequestContext(request))
Beispiel #3
0
    def test_read_pdf(self):
        fLOG(__file__, self._testMethodName, OutputPrint=__name__ == "__main__")
        pdffile = os.path.join(os.path.split(__file__)[
                               0], "data", "1305.0445.pdf")
        assert os.path.exists(pdffile)

        with open(pdffile, "rb") as f:
            input1 = PdfFileReader(f)
            title = input1.getDocumentInfo().title
            traw = input1.getDocumentInfo().title_raw
            npage = input1.getNumPages()
            fLOG("title", title, "*", traw)
            fLOG("nb pages", npage)

            page = input1.getPage(0)
            cont = page.getContents()
            fLOG("cont", cont)
            for obj in page:
                fLOG("obj", obj, "*", obj.title())
            annots = page.raw_get("/Annots")
            for a in annots:
                fLOG("annot", a, dir(a))
            for i in page.items():
                fLOG("item", i)
            text = page.extractText()
            fLOG("text---", text)
            assert " " in text
            assert "\n" in text
            if "algorithms: their inability" not in text:
                raise Exception(text)
Beispiel #4
0
  def setMetadata(self, metadata):
    """Returns a document with new metadata.
    Keyword arguments:
    metadata -- expected an dictionary with metadata.
    """
    # TODO: date as "D:20090401124817-04'00'" ASN.1 for ModDate and CreationDate
    input_pdf = PdfFileReader(open(self.document.getUrl(), "rb"))
    output_pdf = PdfFileWriter()

    modification_date = metadata.pop("ModificationDate", None)
    if modification_date:
      metadata['ModDate'] = modification_date
    if type(metadata.get('Keywords', None)) is list:
      metadata['Keywords'] = metadata['Keywords'].join(' ')
    args = {}
    for key, value in list(metadata.items()):
      args[NameObject('/' + key.capitalize())] = createStringObject(value)

    output_pdf._info.getObject().update(args)

    for page_num in range(input_pdf.getNumPages()):
      output_pdf.addPage(input_pdf.getPage(page_num))

    output_stream = io.BytesIO()
    output_pdf.write(output_stream)
    return output_stream.getvalue()
Beispiel #5
0
    def rewrite(self, context, font={'name': 'Times-Roman', 'size': 11}):

        packet = StringIO.StringIO()
        # create a new PDF with Reportlab
        can = canvas.Canvas(packet, pagesize=letter)
        can.setFont(font['name'], font['size'])
        for i in context:
            can.drawString(i['x'], i['y'], i['value'])
        can.save()

        # move to the beginning of the StringIO buffer
        packet.seek(0)
        new_pdf = PdfFileReader(packet)
        # read your existing PDF
        existing_pdf = PdfFileReader(file(self.path, "rb"))
        output = PdfFileWriter()
        # merge the new file with the existing
        page = existing_pdf.getPage(0)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)
        # finally, write "output" to a real file
        outputStream = file(self.destination, "wb")
        output.write(outputStream)
        outputStream.close()

        return True
    def _getPDFText(self, filename, d):
        logger.debug(u"filename: %s" % filename)
        newparatextlist = list()

        try:
            pdfDoc = PdfFileReader(file(filename, u"rb"))

            pdfDict = pdfDoc.getDocumentInfo()

            for x in pdfDict.keys():
                d.addConceptKeyType(x[1:], pdfDict[x])

            # c.logConcepts()

            for page in pdfDoc.pages:
                text = page.extractText()
                if not isinstance(text, str):
                    unicodedata.normalize(u'NFKD', text).encode(u'ascii', u'ignore')

                logger.debug(u"PDF : %s" % text)

                newparatextlist.append(text + u". ")

            return newparatextlist

        except Exception, msg:
            logger.error(u"%s" % msg)
def createPDFHttpResponse(filepath, output_filename, user, access_time):
    """
    Creates a HttpResponse from a watermarked PDF file. Watermark contains the user who accessed the document
    and the time of access.

    :param filepath: Path to the file
    :param output_filename: File name sent to the user
    :param user:
    :param access_time:
    :return: HttpResponse with the file content, or HttpResponseNotFound
    
    """
    #Add access watermark
    buffer = StringIO()
    p = canvas.Canvas(buffer)
    p.drawString(0,0, "Downloaded by %s at %s" %(user, access_time.isoformat(' ')))
    p.showPage()
    p.save()
    buffer.seek(0)
    watermark = PdfFileReader(buffer)

    #Read the PDF to be accessed
    attachment = PdfFileReader(open(filepath, 'rb'))
    output = PdfFileWriter()

    #Attach watermark to each page
    for page in attachment.pages:
        page.mergePage(watermark.getPage(0))
        output.addPage(page)

    response = HttpResponse(mimetype='application/pdf')
    response['Content-Disposition'] = 'inline; filename=%s' % output_filename.encode('utf-8')
    output.write(response)
    return response
Beispiel #8
0
def getPDFContents(path):
    # print % (input1.getDocumentInfo().title)
    try:
        content = ""
        pdf = PdfFileReader(file(path, "rb")) 
        # get all pages and put them in a string
        if pdf.isEncrypted:
            print "%s is encrypted!" % path 
            pass
        else:
            for i in range(0, pdf.getNumPages()):
                #i = pdf.getPage(i).extractText().lower()
                #for word in i:
                #    if word in schlaglist:
                #        cnt[word] +=1
                #        
                content += pdf.getPage(i).extractText().lower() + " \n"
            content = u" ".join(content.replace(u"\xa0", u" ").strip().split())
    except ValueError as d:
        print d.args
        pass
    except Exception as e:
        print e.args
        pass
    return content
Beispiel #9
0
def split_chapters(*t_args):
    """
    Split a large pdf into chunks (i.e. chapters)
    """    
    if len(t_args)>0:
        args=t_args[0]
        if len(args)<1:  
            print "usage: utils_pdf split_chapters configfile"
            return 
        from pyPdf import PdfFileWriter, PdfFileReader
        f = open(args[0])
        P = json.loads(f.read())
        f.close()
        input = PdfFileReader(file(P["source"], "rb"))
        i0 =  P["first_chapter_index"]
        ends = P["chapters_ends"]
        for i in xrange(0, len(ends)): 
            ch_num = i0+i
            fmt = P["chapter_fmt"] % (ch_num, )
            output = PdfFileWriter()
            if not os.path.exists(P["outputdir"]): 
                os.mkdir( P["outputdir"])
            fn_out = "%s/%s%s" % (P["outputdir"], P["chapter_prefix"], fmt)
            j0 = P["firstpage"] if i==0 else ends[i-1]
            for j in xrange(j0, ends[i]): 
                output.addPage(input.getPage(j))
            outputStream = file(fn_out, "wb")
            output.write(outputStream)
            outputStream.close()
            print "wrote %s" % (fn_out,)
Beispiel #10
0
def parse_file(pdfFile,nameFile):
  pdfReader = PdfFileReader(file(pdfFile,"rb"))
  
  # read the names and emails from csv file
  names = get_names(nameFile)
  
  # create an instance in SMTP server
  smtp = smtplib.SMTP('localhost')
  
  # loop through the pages of the pdf
  # when a name is found, write pages to a new pdf until next name is found
  # then write the file and email as attachment
  i = 0
  prevName = ""
  while i<pdfReader.getNumPages():
    page = pdfReader.getPage(i)
    pageStr = page.extractText()      # extract the pdf text
    for name in names.keys():
      if pageStr.lower().find(name.lower())!=-1:
        if 'pdfWriter' in locals():   # send the current pdf
          send_email(smtp,pdfWriter,prevName,names)

        pdfWriter = PdfFileWriter()   # create new pdfWriter file and add current page
        prevName = name               # save off previous name
        break
    if 'pdfWriter' in locals():
      pdfWriter.addPage(page)
    i+=1

  # send the last file
  if 'pdfWriter' in locals():
    send_email(smtp,pdfWriter,prevName,names)
    
  # quit the smtp server
  smtp.quit()
    def add_omr_marks(self, pdf_data, is_latest_document):
        # Documentation
        # http://meteorite.unm.edu/site_media/pdf/reportlab-userguide.pdf
        # https://pythonhosted.org/PyPDF2/PdfFileReader.html
        # https://stackoverflow.com/a/17538003
        # https://gist.github.com/kzim44/5023021
        # https://www.blog.pythonlibrary.org/2013/07/16/
        #   pypdf-how-to-write-a-pdf-to-memory/
        self.ensure_one()

        pdf_buffer = StringIO.StringIO()
        pdf_buffer.write(pdf_data)

        existing_pdf = PdfFileReader(pdf_buffer)
        output = PdfFileWriter()
        total_pages = existing_pdf.getNumPages()

        # print latest omr mark on latest pair page (recto)
        latest_omr_page = total_pages // 2

        for page_number in range(total_pages):
            page = existing_pdf.getPage(page_number)
            # only print omr marks on pair pages (recto)
            if page_number % 2 is 0:
                is_latest_page = is_latest_document and \
                    page_number == latest_omr_page
                marks = self._compute_marks(is_latest_page)
                omr_layer = self._build_omr_layer(marks)
                page.mergePage(omr_layer)
            output.addPage(page)

        out_buffer = StringIO.StringIO()
        output.write(out_buffer)

        return out_buffer.getvalue()
Beispiel #12
0
def joinpdf(folder=TMPFOLDER,startpage=INDEX,outputname='freecad.pdf'):
    "creates one pdf file from several others, following order from startpage"
    if VERBOSE: print ("Building table of contents...")
    f = open(folder+os.sep+startpage+'.html')
    html = ''
    for line in f: html += line
    f.close()
    html = html.replace("\n"," ")
    html = html.replace("> <","><")
    html = re.findall("<ul.*/ul>",html)[0]
    pages = re.findall('href="(.*?)"',html)
    pages.insert(1,startpage+".html")
    result = PdfFileWriter()
    for p in pages:
        if exists(p[:-5]):
            if VERBOSE: print ('Appending',p)
            try: inputfile = PdfFileReader(open(folder+os.sep+p[:-5]+'.pdf','rb'))
            except: print ('Unable to append',p)
            else:
                for i in range(inputfile.getNumPages()):
                    result.addPage(inputfile.getPage(i))
    outputfile = open(OUTPUTPATH + os.sep + outputname,'wb')
    result.write(outputfile)
    outputfile.close()
    if VERBOSE: print ('Successfully created',OUTPUTPATH,os.sep,outputname)
Beispiel #13
0
    def save(self, to):
        origin = self.get_origin()
        
        if not origin:
            raise RuntimeError("Please implement get_origin method or origin attribute")

        try:
            existing_pdf = PdfFileReader(file(origin, "rb"))
        except IOError:
            raise RuntimeError(u"Failed to open origin file")

        output = PdfFileWriter()
                
        for page_id, page_class in enumerate(self.pages):
            new_page = page_class(self.instance).save()
            
            base_page = existing_pdf.getPage(0)
            base_page.mergePage(new_page)
            output.addPage(base_page)

        if isinstance(to, basestring):
            outputStream = file(to, "wb")
        else:
            outputStream = to
        
        output.write(outputStream)
        outputStream.close()
Beispiel #14
0
    def __call__(self, data, attachments=[], pages=None):
        self.rendered = {}
        for field, ctx in self.fields.items():
            if "template" not in ctx:
                continue

            self.context = ctx
            kwargs = self.template_args(data)
            template = self.context["template"]

            try:
                rendered_field = template.render(**kwargs)
            except Exception as err:
                logger.error("%s: %s %s", field, template, err)
            else:
                # Skip the field if it is already rendered by filter
                if field not in self.rendered:
                    self.rendered[field] = rendered_field

        filled = PdfFileReader(self.exec_pdftk(self.rendered))
        for pagenumber, watermark in self.watermarks:
            page = filled.getPage(pagenumber)
            page.mergePage(watermark)

        output = PdfFileWriter()
        pages = pages or xrange(filled.getNumPages())
        for p in pages:
            output.addPage(filled.getPage(p))

        for attachment in attachments:
            output.addBlankPage().mergePage(attachment.pdf())

        return output
class cleanpdf:
	
	def __init__(self,pathFile):
		
		self.pathFile = pathFile
		self.inputFile = file(self.pathFile,"rb")
		self.pdfInput = PdfFileReader(self.inputFile)
		self.pyPdfOutput = PdfFileWriter()
		self.dataToUpdate = self.pyPdfOutput._info.getObject()
		self.__modifyData()
		self.__copyPDF()
	
	def __modifyData(self):
		
		for data in self.dataToUpdate:
			self.dataToUpdate[data] = createStringObject(('<h1 onmouseover=alert(1)>').encode('ascii'))
	
	def __copyPDF(self):
		
		for page in range(0,self.pdfInput.getNumPages()):
			self.pyPdfOutput.addPage(self.pdfInput.getPage(page))
		outputFile = file(self.__changeName(),"wb")
		self.pyPdfOutput.write(outputFile)
	
	def __changeName(self):
		
		newName = self.pathFile[0:self.pathFile.rfind(".")]+"5.pdf"
		return newName
Beispiel #16
0
 def test_cat(self):
     """Make sure files are properly concatenated."""
     check_call([STAPLER, 'cat', ONEPAGE_PDF, FIVEPAGE_PDF,
                 self.outputfile])
     self.assert_(os.path.isfile(self.outputfile))
     pdf = PdfFileReader(file(self.outputfile, 'rb'))
     self.assertEqual(pdf.getNumPages(), 6)
Beispiel #17
0
def split_pset():
    if (not options.pset or not options.probs):
        print_err_and_die("You must enter both arguements! run with -h for help")

    path = "pset%s/latex/"%options.pset
    try:
        filename = "%spset%s_answers.pdf"%(path, options.pset)
        inp = PdfFileReader(file(filename, "rb"))
    except IOError:
        print_err_and_die("Error! File, %s was not found." % filename)
    
    ##loop over user input and break up pdf
    questionNum = 1
    probs = options.probs.split(",")
    for prob in probs:
        print "Processing question", questionNum

        prob = prob.strip() #kill whitespace

        out = PdfFileWriter()
        pages = get_pages(prob, inp.getNumPages())

        for page in pages:
            print "page num", str(page)
            out.addPage(inp.getPage(int(page)-1))

        outStream = file("%spset%s-%s_answer.pdf"%(path, options.pset, questionNum), "wb")
        out.write(outStream)
        outStream.close()
        questionNum +=1

    print "Done!"
Beispiel #18
0
def select(filesandranges, outputfilename, verbose):

 	if verbose: print (str(filesandranges)+"\noutput: "+str(outputfilename))

	for i in range(len(filesandranges)):
		if not os.path.exists(filesandranges[i]['name']):
			halp()
			print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao")
			sys.exit(2) # pdf file is no pdf file...
	if os.path.exists(outputfilename):
		halp()
		print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao")
		sys.exit(2) # pdf file is no pdf file...

	output = PdfFileWriter()
 	try:
		for pdf in filesandranges:
			fiel = PdfFileReader(file(pdf["name"], "rb"))
			for pagenr in pdf["pages"]:
				if (not (pagenr > fiel.getNumPages()) and not(pagenr < 1)):
					output.addPage(fiel.getPage(pagenr-1))
				else:
					print("one or more pages are not in the chosen PDF")
					halp()
					sys.exit(3) #wrong pages or ranges
 	except:
 		halp()
 		sys.exit(2) # pdf file is no pdf file...h
	if (not os.path.exists(outputfilename)):
		outputStream = file(outputfilename, "wb")
		output.write(outputStream)
		outputStream.close()
	else:
		print ("file exists, discontinuing operation")
Beispiel #19
0
def delete(filesandranges, outputfilename, verbose):

	for i in range(len(filesandranges)):
		if not os.path.exists(filesandranges[i]['name']):
			halp()
			print ("error: "+filesandranges[i]['name']+" does not exist... exiting nao")
			sys.exit(2) # pdf file is no pdf file...
	if os.path.exists(outputfilename):
		halp()
		print ("error: "+filesandranges[i]['name']+" does already exist... exiting nao")
		sys.exit(2) # pdf file is no pdf file...

	output = PdfFileWriter()
 	try:
		for pdf in filesandranges:
			print (pdf["name"])
			fiel = PdfFileReader(file(pdf["name"], "rb"))

			for pagenr in range(1,fiel.getNumPages()+1):
				if (pagenr not in pdf["pages"]):
					output.addPage(fiel.getPage(pagenr-1))
#				else:
#					print ("skipping page nr: "+str(pagenr))
	except:
 		halp()
 		sys.exit(2) # pdf file is no pdf file...
	if (not os.path.exists(outputfilename)):
		outputStream = file(outputfilename, "wb")
		output.write(outputStream)
		outputStream.close()
	else:
		print ("file exists, discontinuing operation")
def getNPersonal(paper):
	#print paper.title

	pdfLinks = paper.links
	for link in pdfLinks:
		try:
			if link.title == 'pdf':
				pdfURL = link['href']
				break
		except AttributeError:
			continue
	try:
		rFile = urlopen(Request(pdfURL)).read()
		mFile = StringIO(rFile)
		pdfFile = PdfFileReader(mFile)
	
		nPages = pdfFile.getNumPages()
		thisNPersonal = 0
		for page in range(0, nPages):
			pageStr = pdfFile.getPage(page).extractText().lower()
			thisNPersonal += pageStr.count(' we ')
			thisNPersonal += pageStr.count(' i ')
	except:
		print "Error reading file"
		return -1
	
	thisNPersonal = 0 if thisNPersonal == 1 else thisNPersonal
	print thisNPersonal
	return thisNPersonal
Beispiel #21
0
def process_file(f):
    """Splits the file into parts if necessary,
    then adds it to the global queue.
    """
    global file_queue
    filename = path_to_watch + "/" + f
    # Non-pdfs are not supported
    if (filename[-4:] != ".pdf"):
        log("Not a valid PDF file.")
        return
    try:
        fp = file(filename, 'rb')
        pdf_f = PdfFileReader(fp)
    except IOError as e:
        log("ERROR: Unable to process file "+filename)
        log(str(e))
        return
    except e:
        log("ERROR: Unable to read PDF File")
        log(str(e))
        return

    if pdf_f.getNumPages() > (10 + real_leeway):
        split_file(pdf_f, filename)
    else:
        file_queue.append(filename)
    fp.close()
Beispiel #22
0
def pdf(coursesid,examsid):
	''' Creates a blank PDF of this exam '''
	# TODO: Obviously fix this up to generate actual PDFs; this is just a proof of concept
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter
	from pyPdf import PdfFileWriter, PdfFileReader
	from io import BytesIO

	output = BytesIO()

	p = canvas.Canvas(output, pagesize=letter)
	p.drawString(100, 100, 'Hello')
	p.save()

	output.seek(0)
	new_pdf = PdfFileReader(output)
	existing_pdf = PdfFileReader(open('/home/treece/src/web/bubbleck/res/Template.pdf', 'rb'))
	out = PdfFileWriter()
	page = existing_pdf.getPage(0)
	page.mergePage(new_pdf.getPage(0))
	out.addPage(page)
	a = BytesIO()
	pdf_out = out.write(a)

	response = make_response(pdf_out)
	response.headers['Content-Disposition'] = "filename='sakulaci.pdf"
	response.mimetype = 'application/pdf'

	return response
Beispiel #23
0
	def choose_file(self,widget,data=None):
		
		global textbuffer
		dialog = gtk.FileChooserDialog("Open..",
                               None,
                               gtk.FILE_CHOOSER_ACTION_OPEN,
                               (gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL,
                                gtk.STOCK_OPEN, gtk.RESPONSE_OK))
		dialog.set_default_response(gtk.RESPONSE_OK)	
		filter = gtk.FileFilter()
		filter.set_name("PDF files")
		filter.add_pattern("*.pdf")
		dialog.add_filter(filter)
		response = dialog.run()

		if response == gtk.RESPONSE_OK:

			print dialog.get_filename(), 'selected'
			from pyPdf import PdfFileWriter, PdfFileReader
			pdf = PdfFileReader(file("kpeng.pdf", "rb"))
			content=""
			for i in range(0, pdf.getNumPages()):
				# Extract text from page and add to content
				content += pdf.getPage(i).extractText() + "/n"
		   		# Collapse whitespace
		    		content = " ".join(content.replace(u"/xa0", " ").strip().split()) 
			textbuffer.set_text(content);	    		

		elif response == gtk.RESPONSE_CANCEL:
			print 'Closed, no files selected'

		dialog.destroy()
Beispiel #24
0
 def output(self):
     # get the output filename using the file dialog
     (out_filename, filter) = \
         QFileDialog.getSaveFileName(parent = self, 
                                     caption = self.tr(u'Export'),
                                     dir = '',
                                     filter = self.tr('pdf (*.pdf)'))
                                     
     # file IO
     out_file = open(out_filename, 'wb')
     in_file = open(self.in_filename, 'rb')        
     in_reader = PdfFileReader(in_file)
     out_writer = PdfFileWriter()
     
     # extract input
     pages_string = self.pages_line_edit.text()
     
     # Get the indices of pages  to extract
     pages = pages_parser(in_reader.getNumPages()).parse(pages_string)
     
     # append pages to output writer
     for page_index in pages:
         out_writer.addPage(in_reader.getPage(page_index))
         
     # write to file
     out_writer.write(out_file)
     
     # close files
     in_file.close()
     out_file.close()
Beispiel #25
0
def showpdf(request):
    sign = os.path.join(settings.MEDIA_ROOT, "signature.png")
    mimetypes.init()
    response = None
    if 'f' in request.GET:
        
        fr = open(os.path.join(settings.MEDIA_ROOT,'pdffiles','extracted','%s' % request.GET['f']), "rb")
        imgTemp = StringIO()
        imgDoc = canvas.Canvas(imgTemp)
        if request.GET['o'] == 'l':
            imgDoc.drawImage(sign, 529, 40, 290/2, 154/2)
        else:
            imgDoc.drawImage(sign, 70, 40, 290/2, 154/2)

        imgDoc.save()
        overlay = PdfFileReader(StringIO(imgTemp.getvalue())).getPage(0)
        page = PdfFileReader(fr).getPage(0)
                            
        page.mergePage(overlay)
        pdf_out = PdfFileWriter()
        pdf_out.addPage(page)
        response = HttpResponse(mimetype='application/pdf')
        response['Content-Disposition'] = 'attachment; filename=%s' % request.GET['f']

        pdf_out.write(response)
            
    return response
Beispiel #26
0
def read_neb_enzyme_price_list():
    # throws URLError, IOError
    price_list = urllib2.urlopen(NEB_PRICE_LIST_URL)
    file_buffer = StringIO(price_list.read())
    
    reader = PdfFileReader(file_buffer)
    enzymes = []
    for p in range(reader.getNumPages()):
        # fi/fl misread hacks-- little nasty in here-- poor PDF read
        for match in NEB_PRICE_LINE_RE.finditer(reader.getPage(p).extractText().replace(u'\u02dc','fi').replace(u'˚','fl')):
            # format of the groups will be: name prefix, lastletter(+supplement)+small_cost, supplement, large_cost, small_unit, large_unit
            name_prefix, transition, supplement, large_cost, small_unit, large_unit = match.groups()
            if supplement:
                carryover = transition.index(supplement)+len(supplement)
                name = "%s%s" % (name_prefix, transition[:carryover])
                small_cost = int_comma(transition[carryover:])
            else:
                name = "%s%s" % (name_prefix, transition[0])
                small_cost = int_comma(transition[1:])
            
            large_cost = int_comma(large_cost)
            small_unit = int_comma(small_unit)
            large_unit = int_comma(large_unit)
            
            enzymes.append((name, small_cost, large_cost, small_unit, large_unit))
    
    return sorted(enzymes, key=operator.itemgetter(0))
Beispiel #27
0
    def add_guides(self):
        pdf_in = PdfFileReader(open('sig.pdf', 'rb'))
        pdf_out = PdfFileWriter()

        for i in xrange(pdf_in.getNumPages()):
            page = pdf_in.getPage(i)
            if not i:
                guides = StringIO()

                if self.args.longarm:
                    create_pdf(
                        guides, a4lwidth_pt, a4lheight_pt, generate_longarm())
                else:
                    if self.args.a5:
                        w, h = a5width_pt, a5height_pt
                    else:
                        w, h = a4lwidth_pt, a4lheight_pt
                    create_pdf(guides, w, h, generate_shortarm(
                        self.args.a5, bool(self.args.signature)))

                pdf_guides = PdfFileReader(guides)
                page.mergePage(pdf_guides.getPage(0))
            pdf_out.addPage(page)

        pdf_out.write(open('sigs.pdf', 'wb'))
Beispiel #28
0
    def watermark( self, pdfStr, watermarkFile, spec ):
        # Read the watermark- and document pdf file
        inputWatermark = PdfFileReader( file( watermarkFile, "rb" ) )
        generatedPdf = PdfFileReader( pdfStr )
        outputPdf = PdfFileWriter()
        
        # flag for the first page of the source file
     	firstPage = True
     	
     	# Loop over source document pages and merge with the first page of the watermark
     	# file.
     	watermarkPage = inputWatermark.getPage(0)
     	for page in generatedPdf.pages:
	    if (spec == Mark.FIRST_PAGE and firstPage) or spec == Mark.ALL_PAGES:
		# deep copy the watermark page here, otherwise the watermark page
		# gets merged over and over because p would only be a reference
		p = copy.copy( watermarkPage )
		p.mergePage( page )
		outputPdf.addPage( p )
		firstPage = False
	    else:
                outputPdf.addPage(page)
     	
     	if self.outputFile:
     	    # Write to outputfile
     	    outputStream = file( self.outputFile, "wb" )
     	    outputPdf.write( outputStream )
     	    outputStream.close()
     	    return self.outputFile
     	else: 
     	    stringIO = StringIO.StringIO();
     	    outputPdf.write( stringIO )
     	    return stringIO.getvalue()
Beispiel #29
0
def generate(donor):
    os.system('mkdir -p output')
    donor_url = donor.replace(' ','%20')
    page1 = 'output/%s1' % (donor.replace(' ','-').lower())
    page2 = 'output/%s2' % (donor.replace(' ','-').lower())

    combined = 'output/%s.pdf' % (donor.replace(' ','-').lower())
    if os.path.exists(combined): return

    os.system('cp "%s" "%s.svg"' % (page1_svg, page1))
    os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page1_svg, page1))
    os.system('inkscape  --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page1))
    os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" 2> /dev/null' % (page1, page1))
    os.system('cp "%s" "%s.svg"' % (page2_svg, page2))
    os.system('sed "s|/France/|/%s/|" "%s" > "%s.svg"' % (donor_url, page2_svg, page2))
    os.system('inkscape  --file="%s.svg" --verb=za.co.widgetlabs.update --verb=FileSave --verb=FileQuit 2> /dev/null' % (page2))
    os.system('inkscape --file="%s.svg" --export-pdf="%s.pdf" ' % (page2, page2))
    # Merge pages
    input1 = PdfFileReader(file('%s.pdf' % (page1), 'rb'))
    input2 = PdfFileReader(file('%s.pdf' % (page2), 'rb'))
    output = PdfFileWriter()
    output.addPage(input1.getPage(0))
    output.addPage(input2.getPage(0))
    outputStream = file(combined, 'wb')
    output.write(outputStream)
    outputStream.close()
    sleep(2)
Beispiel #30
0
def main():
    """
    """

    # Parse command line
    pdf_files = sys.argv[1:]
    if len(pdf_files) == 0:
        print __usage__
        sys.exit()

    # Make sure there is more than one pdf file
    if len(pdf_files) == 1:
        print "In the spirit of gnu tar, this script cowardly refuses to"
        print "combine one pdf file!"
        sys.exit()

    # Create unique name for output file
    localtime = time.localtime()
    localtime = [str(x) for x in localtime]
    localtime = [x.zfill(2) for x in localtime]
    localtime[0] = localtime[0].zfill(4)
    output_file = "%s-%s-%s_%s-%s-%s.pdf" % tuple(localtime[:6])

    # Combine pdf files in order 
    output = PdfFileWriter()
    for pdf in pdf_files:
        input = PdfFileReader(file(pdf,"rb"))
        num_pages = input.getNumPages()
        for i in range(num_pages):
            output.addPage(input.getPage(i))

    # Write final pdf  
    stream = file(output_file,"wb")
    output.write(stream) 
    stream.close()
Beispiel #31
0
	"es-co" : "spa",	"es" : "spa",	"de-de" : "deu",	"fr-fr" : "fra",	"fr-ca" : "fra"
}
# dictionary for /Root/Lang 1 - except; 2 - a file have not /Root/Lang; 3 - /Root/Lang = ''; 4 - language
ans_list = dict()


# dir of folder and filter for pdf files
files = [f for f in os.listdir('trainPDF') if os.path.isfile(os.path.join('trainPDF', f))]
files = list(filter(lambda f: f.endswith(('.pdf','.PDF')), files))

f = open("Langs.txt", "w")

for filepdf in files:
	try:
		name = 'IMAGES/'+filepdf.replace('pdf','jpg')
		pdfFile = PdfFileReader(file('trainPDF/'+filepdf, 'rb'))
		catalog = pdfFile.trailer['/Root'].getObject()
		if catalog.has_key("/Lang"):
			value = 4 
			lang = catalog['/Lang'].getObject()
			if (lang == ''):
				value = 3
				f.write(filepdf+" "+lang+" value = "+str(value)+"\n")
				ans_list.update( {name : [value,'None']} )
                        else:
				lang = lang.lower()
				language = lan_lst.get(lang)
				f.write(filepdf+" "+lang+" => "+language+" value = "+str(value)+"\n")
				ans_list.update( {name : [value,language]} )
		else:
			value = 2
Beispiel #32
0
 def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None):
     if not context:
         context = {}
     registry = openerp.registry(cr.dbname)
     attach = report_xml.attachment
     if attach:
         objs = self.getObjects(cr, uid, ids, context)
         results = []
         for obj in objs:
             aname = eval(attach, {'object': obj, 'time': time})
             result = False
             if report_xml.attachment_use and aname and context.get(
                     'attachment_use', True):
                 aids = registry['ir.attachment'].search(
                     cr, uid, [('datas_fname', '=', aname + '.pdf'),
                               ('res_model', '=', self.table),
                               ('res_id', '=', obj.id)])
                 if aids:
                     brow_rec = registry['ir.attachment'].browse(
                         cr, uid, aids[0])
                     if not brow_rec.datas:
                         continue
                     d = base64.decodestring(brow_rec.datas)
                     results.append((d, 'pdf'))
                     continue
             result = self.create_single_pdf(cr, uid, [obj.id], data,
                                             report_xml, context)
             if not result:
                 return False
             if aname:
                 try:
                     name = aname + '.' + result[1]
                     # Remove the default_type entry from the context: this
                     # is for instance used on the account.account_invoices
                     # and is thus not intended for the ir.attachment type
                     # field.
                     ctx = dict(context)
                     ctx.pop('default_type', None)
                     registry['ir.attachment'].create(
                         cr,
                         uid, {
                             'name': aname,
                             'datas': base64.encodestring(result[0]),
                             'datas_fname': name,
                             'res_model': self.table,
                             'res_id': obj.id,
                         },
                         context=ctx)
                 except Exception:
                     #TODO: should probably raise a proper osv_except instead, shouldn't we? see LP bug #325632
                     _logger.error(
                         'Could not create saved report attachment',
                         exc_info=True)
             results.append(result)
         if results:
             if results[0][1] == 'pdf':
                 from pyPdf import PdfFileWriter, PdfFileReader
                 output = PdfFileWriter()
                 for r in results:
                     reader = PdfFileReader(cStringIO.StringIO(r[0]))
                     for page in range(reader.getNumPages()):
                         output.addPage(reader.getPage(page))
                 s = cStringIO.StringIO()
                 output.write(s)
                 return s.getvalue(), results[0][1]
     return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
Beispiel #33
0
#!/usr/bin/env python
import copy, sys
from pyPdf import PdfFileWriter, PdfFileReader
input = PdfFileReader(sys.stdin)
output = PdfFileWriter()
for p in [input.getPage(i) for i in range(0, input.getNumPages())]:
    q = copy.copy(p)
    (w, h) = p.mediaBox.upperRight
    p.mediaBox.upperRight = (w / 2, h)
    q.mediaBox.upperLeft = (w / 2, h)
    output.addPage(p)
    output.addPage(q)
output.write(sys.stdout)
        prog='crop',
        description='"%(prog)s" split pdfs',
    )

    p.add_argument(
        '-i', '--input',
        type=str,
        required=True,
        help='Input pdf',
    )

    return p

if __name__ == '__main__':
    
	p = create_parsers()
	args = p.parse_args()

	input_filename = args.input
	output_filename = os.path.splitext(input_filename)[0]
	output_extension = os.path.splitext(input_filename)[-1]

	inputpdf = PdfFileReader(open(input_filename, "rb"))

	for i in xrange(inputpdf.numPages):
		output = PdfFileWriter()
		output.addPage(inputpdf.getPage(i))

		with open(output_filename + str(i) + output_extension, "wb") as outputStream:
			output.write(outputStream)
Beispiel #35
0
#!/usr/bin/python

import sys
from os import system, remove
from tempfile import mkstemp
import random

from pyPdf import PdfFileWriter, PdfFileReader

# read input pdf and instantiate output pdf
output = PdfFileWriter()
input1 = PdfFileReader(file(sys.argv[1], "rb"))

# construct and shuffle page number list
pages = list(range(input1.getNumPages()))
random.shuffle(pages)

# display new sequence
print 'Reordering pages according to sequence:'
print pages

# add the new sequence of pages to output pdf
if len(pages) > 0:
    output.addPage(input1.getPage(pages[0]))

# write the output pdf to file

[fh, tmpfile] = mkstemp(suffix='.pdf')
print tmpfile

outputStream = file(tmpfile, 'wb')
Beispiel #36
0
def analizar_file(fichero):

    ext = fichero.split('.')[-1]
    extension = magic.from_file(fichero)

    if 'PDF' in extension:
        #Procederemos a analizar metadatos de un fichero pdf

        metadata_pdf = {}
        tipo_metadatos = [
            'Title', 'CreationDate', 'Author', 'Producer', 'Creator',
            'ModDate', 'Company', 'Comments', 'Keywords', 'SourceModified',
            'Subject'
        ]

        try:
            pdf_toread = PdfFileReader(open(fichero, "rb"))
        except:
            return metadata_pdf

        pdf_info = pdf_toread.getDocumentInfo()

        for i in tipo_metadatos:
            metadata_pdf.update({i: '#'})

        for k, v in pdf_info.iteritems():
            metadata_pdf.update({
                unidecode.unidecode(unicode(k.split('/')[1])):
                unidecode.unidecode(unicode(v))
            })
            #metadata_pdf.append(v)

        metadata_pdf.update({'Fichero': fichero})
        metadata_pdf.update({'Tipo': 'PDF'})

        return metadata_pdf

    if ext == 'doc' or ext == 'ppt' or ext == 'xls':
        #Es un formato antiguo de fichero Office, no son .zip, hay que analizarlos de otra manera.
        info = magic.from_file(fichero)

        #print info

        title = re.findall(r'Title:.*', info)
        if len(title) > 0:
            title = title[0].split(':')[1].split(',')[0]
        else:
            title = '#'

        author = re.findall(r'Author:.*', info)
        if len(author) > 0:
            author = author[0].split(':')[1].split(',')[0]
        else:
            author = '#'

        lastsavedby = re.findall(r'Last Saved By:.*', info)
        if len(lastsavedby) > 0:
            lastsavedby = lastsavedby[0].split(':')[1].split(',')[0]
        else:
            lastsavedby = '#'

        revision = re.findall(r'Revision Number:.*', info)
        if len(revision) > 0:
            revision = revision[0].split(':')[1].split(',')[0]
        else:
            revision = '#'

        aplication = re.findall(r'Creating Application:.*', info)
        if len(aplication) > 0:
            aplication = aplication[0].split(':')[1].split(',')[0]
        else:
            aplication = '#'

        created = re.findall(r'Create Time/Date:.*', info)
        if len(created) > 0:
            created = created[0].split(':')[1].split(',')[0]
        else:
            created = '#'

        lastsaved = re.findall(r'Saved Time/Date:.*', info)
        if len(lastsaved) > 0:
            lastsaved = lastsaved[0].split(':')[1].split(',')[0]
        else:
            lastsaved = '#'

        pages = re.findall(r'Pages:.*', info)
        if len(pages) > 0:
            pages = pages[0].split(':')[1].split(',')[0]
        else:
            pages = '#'

        words = re.findall(r'Words:.*', info)
        if len(words) > 0:
            words = words[0].split(':')[1].split(',')[0]
        else:
            words = '#'

        chars = re.findall(r'Characters:.*', info)
        if len(chars) > 0:
            chars = chars[0].split(':')[1].split(',')[0]
        else:
            chars = '#'

        lastprinted = re.findall(r'Last Printed:.*', info)
        if len(lastprinted) > 0:
            lastprinted = lastprinted[0].split(':')[1].split(',')[0]
        else:
            lastprinted = '#'

        res = {
            'Fichero': fichero,
            'Tipo': ext,
            'creator': author,
            'lastModifiedBy': lastsavedby,
            'created': created,
            'modified': lastsaved,
            'title': title,
            'revision': revision,
            'lastPrinted': lastprinted,
            'keywords': '#',
            'Application': aplication,
            'Paginas': pages,
            'Palabras': words,
            'Caracteres': chars,
            'Lineas': '#',
            'Parrafos': '#',
            'Slides': '#',
            'PresentationFormat': '#'
        }

        return res

    if 'Word' in extension or 'Excel' in extension or 'PowerPoint' in extension:
        try:
            zf = zipfile.ZipFile(fichero)
        except:
            return {}

        #Analizamos el fichero core.xml y sacamos metadatos de ahi.
        core_xml = zf.read('docProps/core.xml')

        xmlns_cp = re.findall(r'xmlns:cp="https?:.*"', core_xml)
        xmlns_cp = xmlns_cp[0].split('"')[1]
        #print xmlns_cp

        xmlns_dc = re.findall(r'xmlns:dc="https?:.*"', core_xml)
        xmlns_dc = xmlns_dc[0].split('"')[1]
        #print xmlns_dc

        xmlns_dcterms = re.findall(r'xmlns:dcterms="https?:.*"', core_xml)
        xmlns_dcterms = xmlns_dcterms[0].split('"')[1]
        #print xmlns_dcterms

        doc = lxml.etree.fromstring(core_xml)

        # Ya hemos creado las variables para crear el diccionario namespace
        ns = {'dc': xmlns_dc, 'dcterms': xmlns_dcterms, 'cp': xmlns_cp}

        # Buscamos los metadatos en core.xml
        creator = doc.xpath('//dc:creator', namespaces=ns)
        if len(creator) > 0:
            creator = unidecode.unidecode(unicode(creator[0].text))
        else:
            creator = '#'

        lastModifiedBy = doc.xpath('//cp:lastModifiedBy', namespaces=ns)
        if len(lastModifiedBy) > 0:
            lastModifiedBy = unidecode.unidecode(
                unicode(lastModifiedBy[0].text))
        else:
            lastModifiedBy = '#'

        created = doc.xpath('//dcterms:created', namespaces=ns)
        if len(created) > 0:
            created = unidecode.unidecode(unicode(created[0].text))
        else:
            created = '#'

        modified = doc.xpath('//dcterms:modified', namespaces=ns)
        if len(modified) > 0:
            modified = unidecode.unidecode(unicode(modified[0].text))
        else:
            modified = '#'

        title = doc.xpath('//dc:title', namespaces=ns)
        if len(title) > 0:

            title = unidecode.unidecode(unicode(title[0].text))
        else:
            title = '#'

        revision = doc.xpath('//cp:revision', namespaces=ns)
        if len(revision) > 0:
            revision = unidecode.unidecode(unicode(revision[0].text))
        else:
            revision = '#'

        lastPrinted = doc.xpath('//cp:lastPrinted', namespaces=ns)
        if len(lastPrinted) > 0:
            lastPrinted = unidecode.unidecode(unicode(lastPrinted[0].text))
        else:
            lastPrinted = '#'

        keywords = doc.xpath('//cp:keywords', namespaces=ns)
        if len(keywords) > 0:
            keywords = unidecode.unidecode(unicode(keywords[0].text))
        else:
            keywords = '#'

        #Analizamos el fichero app.xml y sacamos metadatos de ahi.

        app_xml = zf.read('docProps/app.xml')

        #print app_xml

        Aplicacion = re.findall(r'<Application>.*</Application>', app_xml)

        if len(Aplicacion) > 0:
            Aplicacion = Aplicacion[0].split('>')[1].split('<')[0]
        else:
            Aplicacion = '#'

        Paginas = re.findall(r'<Pages>.*</Pages>', app_xml)

        if len(Paginas) > 0:
            Paginas = Paginas[0].split('>')[1].split('<')[0]
        else:
            Paginas = '#'

        Palabras = re.findall(r'<Words>.*</Words>', app_xml)

        if len(Palabras) > 0:
            Palabras = Palabras[0].split('>')[1].split('<')[0]
        else:
            Palabras = '#'

        Caracteres = re.findall(r'<Characters>.*</Characters>', app_xml)

        if len(Caracteres) > 0:
            Caracteres = Caracteres[0].split('>')[1].split('<')[0]
        else:
            Caracteres = '#'

        Lineas = re.findall(r'<Lines>.*</Lines>', app_xml)

        if len(Lineas) > 0:
            Lineas = Lineas[0].split('>')[1].split('<')[0]
        else:
            Lineas = '#'

        Parrafos = re.findall(r'<Paragraphs>.*</Paragraphs>', app_xml)

        if len(Parrafos) > 0:
            Parrafos = Parrafos[0].split('>')[1].split('<')[0]
        else:
            Parrafos = '#'

        Slides = re.findall(r'<Slides>.*</Slides>', app_xml)

        if len(Slides) > 0:
            Slides = Slides[0].split('>')[1].split('<')[0]
        else:
            Slides = '#'

        PresentationFormat = re.findall(
            r'<PresentationFormat>.*</PresentationFormat>', app_xml)

        if len(PresentationFormat) > 0:
            PresentationFormat = PresentationFormat[0].split('>')[1].split(
                '<')[0]
        else:
            PresentationFormat = '#'

        res = {
            'Fichero': fichero,
            'Tipo': ext,
            'creator': creator,
            'lastModifiedBy': lastModifiedBy,
            'created': created,
            'modified': modified,
            'title': title,
            'revision': revision,
            'lastPrinted': lastPrinted,
            'keywords': keywords,
            'Application': Aplicacion,
            'Paginas': Paginas,
            'Palabras': Palabras,
            'Caracteres': Caracteres,
            'Lineas': Lineas,
            'Parrafos': Parrafos,
            'Slides': Slides,
            'PresentationFormat': PresentationFormat
        }

        return res
Write a script "cover_the_emperor.py" that appends the chapter 8 practice
file named "The Emperor.pdf" to the end of the chapter 8 practice file
named "Emperor cover sheet.pdf" and outputs the full resulting PDF to the
file "The Covered Emperor.pdf" in the chapter 8 practice files Output
folder.

Obviously we need required PDF files to work with. They can be found in
book materials.
'''

import os
from pyPdf import PdfFileReader, PdfFileWriter

path = "/Users/srg/practice_files"
inputFileName = os.path.join(path, "The Emperor.pdf")
inputFile = PdfFileReader(file(inputFileName, "rb"))
coverFileName = os.path.join(path, "Emperor cover sheet.pdf")
coverFile = PdfFileReader(file(coverFileName, "rb"))
outputPDF = PdfFileWriter()

for pageNum in xrange(0, inputFile.getNumPages()):
    page = inputFile.getPage(pageNum)
    outputPDF.addPage(page)

outputPDF.addPage(coverFile.getPage(0))

outputFileName = os.path.join(path, "Output/The Covered Emperor.pdf")
outputFile = file(outputFileName, "wb")
outputPDF.write(outputFile)
outputFile.close()
Beispiel #38
0
	def update_file_info(self, file):
		# set defaults to blank
		file.add_string_attribute('title', '')
		file.add_string_attribute('album', '')
		file.add_string_attribute('artist', '')
		file.add_string_attribute('tracknumber', '')
		file.add_string_attribute('genre', '')
		file.add_string_attribute('date', '')
		file.add_string_attribute('bitrate', '')
		file.add_string_attribute('samplerate', '')
		file.add_string_attribute('length', '')
		file.add_string_attribute('exif_datetime_original', '')
		file.add_string_attribute('exif_software', '')
		file.add_string_attribute('exif_flash', '')
		file.add_string_attribute('exif_pixeldimensions', '')
		file.add_string_attribute('pixeldimensions', '')

		if file.get_uri_scheme() != 'file':
			return

		# strip file:// to get absolute path
		filename = urllib.unquote(file.get_uri()[7:])
		
		# mp3 handling
		if file.is_mime_type('audio/mpeg'):
			# attempt to read ID3 tag
			try:
				audio = EasyID3(filename)
				# sometimes the audio variable will not have one of these items defined, that's why
				# there is this long try / except attempt
				try: file.add_string_attribute('title', audio["title"][0])
				except: file.add_string_attribute('title', "[n/a]")
				try: file.add_string_attribute('album', audio["album"][0])
				except: file.add_string_attribute('album', "[n/a]")
				try: file.add_string_attribute('artist', audio["artist"][0])
				except: file.add_string_attribute('artist', "[n/a]")
				try: file.add_string_attribute('tracknumber', audio["tracknumber"][0])
				except: file.add_string_attribute('tracknumber', "[n/a]")
				try: file.add_string_attribute('genre', audio["genre"][0])
				except: file.add_string_attribute('genre', "[n/a]")
				try: file.add_string_attribute('date', audio["date"][0])
				except: file.add_string_attribute('date', "[n/a]")
			except:
				# [SabreWolfy] some files have no ID3 tag and will throw this exception:
				file.add_string_attribute('title', "[no ID3]")
				file.add_string_attribute('album', "[no ID3]")
				file.add_string_attribute('artist', "[no ID3]")
				file.add_string_attribute('tracknumber', "[no ID3]")
				file.add_string_attribute('genre', "[no ID3]")
				file.add_string_attribute('date', "[no ID3]")
				
			# try to read MP3 information (bitrate, length, samplerate)
			try:
				mpfile = open (filename)
				mpinfo = MPEGInfo (mpfile)
				file.add_string_attribute('bitrate', str(mpinfo.bitrate/1000) + " Kbps")
				file.add_string_attribute('samplerate', str(mpinfo.sample_rate) + " Hz")
				# [SabreWolfy] added consistent formatting of times in format hh:mm:ss
				# [SabreWolfy[ to allow for correct column sorting by length
				mp3length = "%02i:%02i:%02i" % ((int(mpinfo.length/3600)), (int(mpinfo.length/60%60)), (int(mpinfo.length%60)))
				mpfile.close()
				file.add_string_attribute('length', mp3length)
			except:
				file.add_string_attribute('bitrate', "[n/a]")
				file.add_string_attribute('length', "[n/a]")
				file.add_string_attribute('samplerate', "[n/a]")
				try:
					mpfile.close()
				except:	pass
	
		# image handling
		if file.is_mime_type('image/jpeg') or file.is_mime_type('image/png') or file.is_mime_type('image/gif') or file.is_mime_type('image/bmp'):
			# EXIF handling routines
			try:
				metadata = pyexiv2.ImageMetadata(filename)
				metadata.read()
				try:
					exif_datetimeoriginal = metadata['Exif.Photo.DateTimeOriginal']
					file.add_string_attribute('exif_datetime_original',str(exif_datetimeoriginal.raw_value))
				except:
					file.add_string_attribute('exif_datetime_original',"")
				try:
					exif_imagesoftware = metadata['Exif.Image.Software']
					file.add_string_attribute('exif_software',str(exif_imagesoftware.raw_value))
				except:
					file.add_string_attribute('exif_software',"")
				try:
					exif_photoflash = metadata['Exif.Photo.Flash']
					file.add_string_attribute('exif_flash',str(exif_photoflash.raw_value))
				except:
					file.add_string_attribute('exif_flash',"")
				try:
					exif_pixelydimension = metadata['Exif.Photo.PixelYDimension']
					exif_pixelxdimension = metadata['Exif.Photo.PixelXDimension']
					file.add_string_attribute('exif_pixeldimensions',str(exif_pixelydimension.raw_value)+'x'+str(exif_pixelxdimension.raw_value))
				except:
					file.add_string_attribute('exif_pixeldimensions',"")
			except:
				# no exif data?
				file.add_string_attribute('exif_datetime_original',"")
				file.add_string_attribute('exif_software',"")
				file.add_string_attribute('exif_flash',"")
				file.add_string_attribute('exif_pixeldimensions',"")
			# try read image info directly
			try:
				im = Image.open(filename)
				file.add_string_attribute('pixeldimensions',str(im.size[0])+'x'+str(im.size[1]))
			except:
				file.add_string_attribute('pixeldimensions',"[image read error]")

		# video/flac handling
		if file.is_mime_type('video/x-msvideo') | file.is_mime_type('video/mpeg') | file.is_mime_type('video/x-ms-wmv') | file.is_mime_type('video/mp4') | file.is_mime_type('audio/x-flac') | file.is_mime_type('video/x-flv') | file.is_mime_type('video/x-matroska') | file.is_mime_type('audio/x-wav'):
			try:
				info=kaa.metadata.parse(filename)
				try: file.add_string_attribute('length',"%02i:%02i:%02i" % ((int(info.length/3600)), (int(info.length/60%60)), (int(info.length%60))))
				except: file.add_string_attribute('length','[n/a]')
				try: file.add_string_attribute('pixeldimensions', str(info.video[0].width) + 'x'+ str(info.video[0].height))
				except: file.add_string_attribute('pixeldimensions','[n/a]')
				try: file.add_string_attribute('bitrate',str(round(info.audio[0].bitrate/1000)))
				except: file.add_string_attribute('bitrate','[n/a]')
				try: file.add_string_attribute('samplerate',str(int(info.audio[0].samplerate))+' Hz')
				except: file.add_string_attribute('samplerate','[n/a]')
				try: file.add_string_attribute('title', info.title)
				except: file.add_string_attribute('title', '[n/a]')
				try: file.add_string_attribute('artist', info.artist)
				except: file.add_string_attribute('artist', '[n/a]')
				try: file.add_string_attribute('genre', info.genre)
				except: file.add_string_attribute('genre', '[n/a]')
				try: file.add_string_attribute('tracknumber',info.trackno)
				except: file.add_string_attribute('tracknumber', '[n/a]')
				try: file.add_string_attribute('date',info.userdate)
				except: file.add_string_attribute('date', '[n/a]')					
				try: file.add_string_attribute('album',info.album)
				except: file.add_string_attribute('album', '[n/a]')
			except:
				file.add_string_attribute('length','error')
				file.add_string_attribute('pixeldimensions','error')
				file.add_string_attribute('bitrate','error')
				file.add_string_attribute('samplerate','error')
				file.add_string_attribute('title','error')
				file.add_string_attribute('artist','error')
				file.add_string_attribute('genre','error')
				file.add_string_attribute('track','error')
				file.add_string_attribute('date','error')
				file.add_string_attribute('album','error')
		# pdf handling
		if file.is_mime_type('application/pdf'):
			try:
				f = open(filename, "rb")
				pdf = PdfFileReader(f)
				try: file.add_string_attribute('title', pdf.getDocumentInfo().title)
				except: file.add_string_attribute('title', "[n/a]")
				try: file.add_string_attribute('artist', pdf.getDocumentInfo().author)
				except: file.add_string_attribute('artist', "[n/a]")
				f.close()
			except:
				file.add_string_attribute('title', "[no info]")
				file.add_string_attribute('artist', "[no info]")
					
		self.get_columns()
##    print inch
##    can.drawString(0.3*inch, -inch, "Hello World")

    #change cage code
    cage = '55910'
    
    can.drawString(450*mult, start, "ECPVG2")
    can.drawString(450*mult, (start - 15*mult), "CAGE: " + cage)
    can.save()

    #move to the beginning of the StringIO buffer
    packet.seek(0)
    name = 'Dave'
    can.beginForm(name, lowerx=0, lowery=0, upperx=None, uppery=None)
    can.endForm()
    new_pdf = PdfFileReader(packet)
    
    # read your existing PDF
    fname = 'McMaster-Carr_Source_files\\' + i
    existing_pdf = PdfFileReader(file(fname, "rb"))
    output = PdfFileWriter()
    # add the "watermark" (which is the new pdf) on the existing page
    nump = existing_pdf.getNumPages()
    page = existing_pdf.getPage(0)
    for l in range(nump):
        output.addPage(existing_pdf.getPage(l))
    page.mergePage(new_pdf.getPage(0))
    # finally, write "output" to a real file
    outputStream = file(a[0]+"_"+b, "wb")
    output.write(outputStream)
    outputStream.close()
Beispiel #40
0
# 11.1 review exercises

import os
from pyPdf import PdfFileReader, PdfFileWriter

path = "C:/Real Python/refactor/chp12/practice_files"
input_file_name = os.path.join(path, "The Whistling Gypsy.pdf")
input_file = PdfFileReader(open(input_file_name, "rb"))

# Display meta-data about file
print("Title:", input_file.getDocumentInfo().title)
print("Author:", input_file.getDocumentInfo().author)
print("Number of pages:", input_file.getNumPages())

# Specify and open output text file
output_file_name = os.path.join(path, "Output/The Whistling Gypsy.txt")
with open(output_file_name, "w") as output_file:
    # Extract every page of text
    for page_num in range(0, input_file.getNumPages()):
        text = input_file.getPage(page_num).extractText()
        text = text.encode("utf-8")  # convert text to unicode
        output_file.write(text)

# Save file without cover page
output_PDF = PdfFileWriter()
for page_num in range(1, input_file.getNumPages()):
    output_PDF.addPage(input_file.getPage(page_num))

output_file_name = os.path.join(path,
                                "Output/The Whistling Gypsy un-covered.pdf")
with open(output_file_name, "wb") as output_file:
Beispiel #41
0
def create_source_pdf(self, cr, uid, ids, data, report_xml, context=None):
    if not context:
        context={}
    pool = pooler.get_pool(cr.dbname)
    pool_attach = pool.get('ir.attachment')
    picking_obj = pool.get('stock.picking.out')
    myflag = False

#     if data['model'] == 'stock.picking.out':
    if context['active_model'] == 'stock.picking.out':
        if report_xml.name in ['Bill of Lading', 'Master Bill of Lading']:
            myflag = True
    attach = report_xml.attachment
    singleton = False
    MBOL = []
    M_attach = False
    if attach:
        objs = self.getObjects(cr, uid, ids, context)
        results = []
        for obj in objs:
            aname = eval(attach, {'object':obj, 'time':time})
            result = False
            if not myflag:
                if report_xml.attachment_use and aname and context.get('attachment_use', True):
                    aids = pool_attach.search(cr, uid, [('datas_fname','=',aname+'.pdf'),('res_model','=',self.table),('res_id','=',obj.id)])
                    if aids:
                        brow_rec = pool_attach.browse(cr, uid, aids[0])
                        if not brow_rec.datas:
                            continue
                        d = base64.decodestring(brow_rec.datas)
                        results.append((d,'pdf'))
                        continue
            if myflag and report_xml.name == 'Master Bill of Lading':
                data.update({'objects' : objs})

            if not MBOL:
                result = self.create_single_pdf(cr, uid, [obj.id], data, report_xml, context)
            else:
                result = MBOL

#             if data['model'] == 'stock.picking.out' and report_xml.name == 'Master Bill of Lading':
            if context['active_model'] == 'stock.picking.out' and report_xml.name == 'Master Bill of Lading':
                MBOL = result
                singleton = True

            if not result:
                return False
            if aname:
                try:
                    name = aname+'.'+result[1]
                    if myflag:
                        if report_xml.name == 'Master Bill of Lading':
                            att_id = picking_obj.browse(cr, uid, obj.id).attached_mbol_report_id.id
                            if att_id:
                                pool_attach.unlink(cr, uid, [att_id])
                            aname = 'Master BOL-'+ time.strftime('%Y-%m-%d %H:%M:%S')
                        else:
                            #unlink the previous attached BOL report
                            att_id = picking_obj.browse(cr, uid, obj.id).attached_report_id.id
                            if att_id:
                                pool_attach.unlink(cr, uid, [att_id])
                            aname = 'BOL-'+ time.strftime('%Y-%m-%d %H:%M:%S')
                    if not M_attach:
                       new_attach = pool_attach.create(cr, uid, {
                        'name': aname,
                        'datas': base64.encodestring(result[0]),
                        'datas_fname': name,
                        'res_model': self.table,
                        'res_id': obj.id,
                        }, context=context
                    )
                    else:
                        new_attach = M_attach
                    if myflag:
                        # Create new attachment of BOL report
                        new_val = {'attached_report_id':new_attach}
                        if report_xml.name == 'Master Bill of Lading':
                            M_attach = new_attach
                            new_val = {'attached_mbol_report_id':new_attach}
                        picking_obj.write(cr, uid, obj.id, new_val)
                except Exception:
                    #TODO: should probably raise a proper osv_except instead, shouldn't we? see LP bug #325632
                    logging.getLogger('report').error('Could not create saved report attachment', exc_info=True)
            if not MBOL:
                results.append(result)
        if results:
            if results[0][1]=='pdf':
                from pyPdf import PdfFileWriter, PdfFileReader
                output = PdfFileWriter()
                for r in results:
                    reader = PdfFileReader(cStringIO.StringIO(r[0]))
                    for page in range(reader.getNumPages()):
                        output.addPage(reader.getPage(page))
                s = cStringIO.StringIO()
                output.write(s)
                return s.getvalue(), results[0][1]
    return self.create_single_pdf(cr, uid, ids, data, report_xml, context)
Beispiel #42
0
from pyPdf import PdfFileReader, PdfFileWriter
from pyPdf.generic import NameObject, createStringObject

inpfn = raw_input('Enter PDF path : ')

fin = file(inpfn, 'rb')
pdf_in = PdfFileReader(fin)

writer = PdfFileWriter()

for page in range(pdf_in.getNumPages()):
    writer.addPage(pdf_in.getPage(page))

infoDict = writer._info.getObject()

info = pdf_in.documentInfo
for key in info:
    infoDict.update({NameObject(key): createStringObject(info[key])})

# add the grade
list_of_data_to_delete = [
    '/CreationDate', '/Author', '/Creator', '/ModDate', '/Producer', '/Title'
]
for item in list_of_data_to_delete:
    try:
        infoDict.update({NameObject(item): createStringObject(u'')})
    except:
        print("can't delete : ", i)

fout = open('outputFile.pdf', 'wb')
Beispiel #43
0
import os
from pyPdf import PdfFileReader

path = "/Users/KevinKoshy/PycharmProjects/RealPythonEg"

input_file_name = os.path.join(path, "Pride and Prejudice.pdf")
input_file = PdfFileReader(file(input_file_name, "rb"))

print "Number of pages = ", input_file.getNumPages()
print "Title = ", input_file.getDocumentInfo().title
Beispiel #44
0
import os
from pyPdf import PdfFileReader

my_path = "D:/Training/Python-Learning/realpython-webster/Course1/Practice files_12"

input_file_name = os.path.join(my_path, "half and half.pdf")
input_file = PdfFileReader(file(input_file_name, "rb"))
page = input_file.getPage(0)
print page.mediaBox
Beispiel #45
0
from pyPdf import PdfFileReader
import re
from Question import Question

if __name__ == '__main__':
    pdffile = PdfFileReader(file("SY0-301.pdf", "r"))
    all_text = ''
    for page in pdffile.pages:
        text = page.extractText().strip()
        text = text.replace('Explanation:', '')
        text = text.replace(
            'CompTIA SY0-301 Exam"Pass Any Exam. Any Time." - ' +
            'www.actualtests.com', '')
        text = text.replace('CompTIA SY0-301', '')
        text = text.replace('CompTIA Security+Version: 15.0', '')
        text = text.replace('  ', ' ')
        all_text += ' ' + text

    qp = re.compile(r'QUESTION\sNO:\s\d+.*?Answer:\s\w,?\w?')
    pp = re.compile(r'QUESTION\sNO:\s(\d+)(.*?)(A[.].*?)Answer:\s(\w,?\w?)')
    q_list = re.findall(qp, all_text)

    questions = []
    for q in q_list:
        parts = re.match(pp, q)
        question = Question()
        question.number = int(parts.group(1))
        question.question = parts.group(2).strip()
        question.correct_answer = parts.group(4).strip()

        letters = ['A', 'B', 'C', 'D', 'E', 'F']
        text = text.replace(r, u" ")
    text = text.replace(u"\ufb01", u"fi")

    return text


files = os.listdir(papers_dir)
files = [x for x in files if x[-3:] == 'pdf']

titles = []
authors = []
keywords = []
all_papers = []

for i, f in enumerate(files):
    reader = PdfFileReader(open(papers_dir + f, 'rb'))
    info = dict(reader.documentInfo)
    title = info['/Title'].encode('ascii', 'ignore')
    author = info['/Author'].encode('ascii', 'ignore')
    keyword = info['/Keywords'].encode('ascii', 'ignore')
    # titles += [info['/Title']]
    # authors += [info['/Author']]
    # keywords += [info['/Keywords']]
    all_papers += [
        Paper(id_=random.randint(0, 999999999),
              title=title,
              authors=author,
              filename=f,
              keywords=keyword)
    ]
Beispiel #47
0
                # call_log(Request_id,status,call_log_bat)
                call_log_new(Request_id, Process_Type, Process_Head,
                             Process_Name, Process_Time, Current_Status,
                             Time_Stamp)

                pdf_list = [x.replace('\n', '') for x in pdfinput]

                bat_xlsx = listpath_new.replace(
                    'Internal Omni Request', 'Internal Process Sql Request')

                if os.path.isfile(bat_xlsx):
                    os.remove(bat_xlsx)

                for pdf in pdf_list:

                    input1 = PdfFileReader(open(pdf, "rb"))
                    input1.getPage(0).mediaBox
                    pxcel_files = list(input1.getPage(0).mediaBox)

                    if int(pxcel_files[2]) > 2015 or int(
                            pxcel_files[3]) > 2015:
                        x = float(1000 / float(pxcel_files[3]))
                        print x
                        # print input1.getPage(0).scale(0.5, 0.5)
                        print input1.getPage(0).scaleBy(.3)
                        output = PdfFileWriter()
                        print input1.getPage(0).mediaBox
                        file_folder = pdf.split('\\')
                        p_name = file_folder[-1]
                        omni_filesave = omni_savedir + '\\' + p_name
                        output.addPage(input1.getPage(0))
Beispiel #48
0
import sys

if __name__ == '__main__' and len(
        sys.argv) > 0 and sys.argv[1][-3:].upper() == 'PDF':
    from pyPdf import PdfFileWriter, PdfFileReader
    print len(sys.argv)
    #inp[]
    total = len(sys.argv)
    original = sys.argv[1]
    target = 'Combine.pdf'
    inp = PdfFileReader(file(sys.argv[1], "rb"))
    page = inp.getPage(0)
    output = PdfFileWriter()
    for i in range(1, total):

        inp = PdfFileReader(file(sys.argv[i], "rb"))
        page.mergePage(inp.getPage(0))

    #numPages = input1.getNumPages()
    # print the title of document1.pdf
    # print "title = %s" % (input1.getDocumentInfo().title)
    #for i in total:
    #       page.mergePage(inp[i].getPage(1))

output.addPage(page)
outputStream = file(target, "wb")
output.write(outputStream)
outputStream.close()
print "DONE !"
    def post(self):
        name = self.request.get('name')
        sendmail('*****@*****.**', '*****@*****.**', name)

        if name:
            # if not id:
            # 	self.error(500)
            # 	return
            try:

                q = db.Query(Priestessess)
                q = Priestessess.all()
                q.filter('name =', name)
                e = q.get()
                cert_name = e.name

                d = [dates[x] for x in dates if x == date.split('-')[1]]
                m = [months[x] for x in months if x == date.split('-')[0]]
                y = [years[x] for x in years if x == date.split('-')[2]]

                self.response.write(str(m) + str(d))
                date_str = "     In witness whereof we have placed our name on this,"
                date_str2 = "the {0} day of {1}, in the year Two Thousand {2}." \
                .format(str(d).split("'")[1], str(m).split("'")[1], str(y).split("'")[1])

                packet = StringIO.StringIO()
                # create a new PDF with Reportlab
                can = canvas.Canvas(packet, pagesize=letter)
                text = can.beginText()
                text2 = can.beginText()
                text.setTextOrigin(10.3 * cm, 10.8 * cm)
                text2.setTextOrigin(8.8 * cm, 7.7 * cm)
                text2.setFont("Tangerine_Bold", 20)
                text.setFont('VeraBd', 20)
                text.textLine(cert_name.upper())
                text2.textLine(date_str)
                text2.textLine(date_str2)
                can.drawText(text)
                can.drawText(text2)

                # can.drawString(300, 310, name)
                can.save()

                #move to the beginning of the StringIO buffer
                packet.seek(0)
                new_pdf = PdfFileReader(packet)
                # read your existing PDF
                existing_pdf = PdfFileReader(
                    file("AmericanPriestessCert.pdf", "rb"))
                output = PdfFileWriter()
                # add the "watermark" (which is the new pdf) on the existing page
                page = existing_pdf.getPage(0)
                page.mergePage(new_pdf.getPage(0))
                output.addPage(page)
                outputstream = StringIO.StringIO()
                output.write(outputstream)
                self.response.headers['Content-Type'] = 'application/pdf'
                self.response.headers[
                    'Content-Disposition'] = 'attachment; filename=AmericanPriestessCert.pdf'
                self.response.headers['Content-Transfer-Encoding'] = 'binary'
                self.response.out.write(outputstream.getvalue())

            except:
                error = """There was an error.\n Please either re-enter your name \ 
				exactly as you typed it in field one of the application form, or check \ 
				your email to ensure payment has been successful."""
                self.render('pdf.html', error=error)
            else:
                error = "Please enter you full name, as it appears on your certificate."
                self.render('pdf.html', error=error)
Beispiel #50
0
# 8.2 cover_the_emperor.py
# Add a cover sheet to a PDF; save the full output as a new PDF

import os
from pyPdf import PdfFileReader, PdfFileWriter

path = "C:/Real Python/Course materials/Chapter 8/Practice files"
inputFileName1 = os.path.join(path, "Emperor cover sheet.pdf")
inputFile1 = PdfFileReader(file(inputFileName1, "rb"))
inputFileName2 = os.path.join(path, "The Emperor.pdf")
inputFile2 = PdfFileReader(file(inputFileName2, "rb"))
outputPDF = PdfFileWriter()

# Read in all pages from the cover sheet PDF file
for pageNum in range(0, inputFile1.getNumPages()):
    page = inputFile1.getPage(pageNum)
    outputPDF.addPage(page)

# Read in all pages from "The Emperor.pdf" into the same output file
for pageNum in range(0, inputFile2.getNumPages()):
    page = inputFile2.getPage(pageNum)
    outputPDF.addPage(page)

# Output the results into a new PDF
outputFileName = os.path.join(path, "Output/The Covered Emperor.pdf")
outputFile = file(outputFileName, "wb")
outputPDF.write(outputFile)
outputFile.close()
Beispiel #51
0
    def do_update_file_info(self, file):
        info = FileExtensionInfo()

        # strip file:// to get absolute path
        filename = urllib.unquote(file.get_uri()[7:])

        # mp3 handling
        if file.is_mime_type('audio/mpeg'):
            # attempt to read ID3 tag
            try:
                audio = EasyID3(filename)
                # sometimes the audio variable will not have one of these items defined, that's why
                # there is this long try / except attempt
                try:
                    info.title = audio["title"][0]
                except:
                    pass
                try:
                    info.album = audio["album"][0]
                except:
                    pass
                try:
                    info.artist = audio["artist"][0]
                except:
                    pass
                try:
                    info.tracknumber = "{:0>2}".format(audio["tracknumber"][0])
                except:
                    pass
                try:
                    info.genre = audio["genre"][0]
                except:
                    pass
                try:
                    info.date = audio["date"][0]
                except:
                    pass
            except:
                pass

            # try to read MP3 information (bitrate, length, samplerate)
            try:
                mpfile = open(filename)
                mpinfo = MPEGInfo(mpfile)
                info.bitrate = str(mpinfo.bitrate / 1000) + " Kbps"
                info.samplerate = str(mpinfo.sample_rate) + " Hz"
                # [SabreWolfy] added consistent formatting of times in format hh:mm:ss
                # [SabreWolfy[ to allow for correct column sorting by length
                mp3length = "%02i:%02i:%02i" % ((int(mpinfo.length / 3600)),
                                                (int(mpinfo.length / 60 % 60)),
                                                (int(mpinfo.length % 60)))
                mpfile.close()
                info.length = mp3length
            except:
                try:
                    mpfile.close()
                except:
                    pass

        # image handling
        elif file.is_mime_type('image/jpeg') or file.is_mime_type(
                'image/png') or file.is_mime_type(
                    'image/gif') or file.is_mime_type('image/bmp'):
            # EXIF handling routines
            try:
                metadata = pyexiv2.ImageMetadata(filename)
                metadata.read()
                try:
                    exif_datetimeoriginal = metadata[
                        'Exif.Photo.DateTimeOriginal']
                    info.exif_datetime_original = str(
                        exif_datetimeoriginal.raw_value)
                except:
                    pass
                try:
                    exif_imagesoftware = metadata['Exif.Image.Software']
                    info.exif_software = str(exif_imagesoftware.raw_value)
                except:
                    pass
                try:
                    exif_photoflash = metadata['Exif.Photo.Flash']
                    info.exif_flash = str(exif_photoflash.raw_value)
                except:
                    pass
                try:
                    exif_rating = metadata['Xmp.xmp.Rating']
                    info.exif_rating = str(exif_rating.raw_value)
                except:
                    pass
            except:
                pass
            # try read image info directly
            try:
                im = PIL.Image.open(filename)
                info.pixeldimensions = str(im.size[0]) + 'x' + str(im.size[1])
            except error as e:
                print e
                pass

        # video/flac handling
        elif file.is_mime_type('video/x-msvideo') | file.is_mime_type(
                'video/mpeg'
        ) | file.is_mime_type('video/x-ms-wmv') | file.is_mime_type(
                'video/mp4'
        ) | file.is_mime_type('audio/x-flac') | file.is_mime_type(
                'video/x-flv') | file.is_mime_type(
                    'video/x-matroska') | file.is_mime_type('audio/x-wav'):
            try:
                metadata = kaa.metadata.parse(filename)
                try:
                    info.length = "%02i:%02i:%02i" % (
                        (int(metadata.length / 3600)),
                        (int(metadata.length / 60 % 60)),
                        (int(metadata.length % 60)))
                except:
                    pass
                try:
                    info.pixeldimensions = str(
                        metadata.video[0].width) + 'x' + str(
                            metadata.video[0].height)
                except:
                    pass
                try:
                    info.bitrate = str(round(metadata.audio[0].bitrate / 1000))
                except:
                    pass
                try:
                    info.samplerate = str(int(
                        metadata.audio[0].samplerate)) + ' Hz'
                except:
                    pass
                try:
                    info.title = metadata.title
                except:
                    pass
                try:
                    info.artist = metadata.artist
                except:
                    pass
                try:
                    info.genre = metadata.genre
                except:
                    pass
                try:
                    info.tracknumber = metadata.trackno
                except:
                    pass
                try:
                    info.date = metadata.userdate
                except:
                    pass
                try:
                    info.album = metadata.album
                except:
                    pass
            except:
                pass

        # pdf handling
        elif file.is_mime_type('application/pdf'):
            try:
                f = open(filename, "rb")
                pdf = PdfFileReader(f)
                try:
                    info.title = pdf.getDocumentInfo().title
                except:
                    pass
                try:
                    info.artist = pdf.getDocumentInfo().author
                except:
                    pass
                f.close()
            except:
                pass

        self.set_file_attributes(file, info)

        del info
Beispiel #52
0
                print "Output file must be a PDF."

margin = {"l": 0, "t": 0, "r": 0, "b": 0}

for a in opts[:]:
    if a[0] == '-m' or a[0] == '--margin':
        if a[1] != None:
            m_temp = a[1].strip("\"").split()
            margin["l"] = float(m_temp[0])
            margin["t"] = float(m_temp[1])
            margin["r"] = float(m_temp[2])
            margin["b"] = float(m_temp[3])
        else:
            print "Error"

input1 = PdfFileReader(file(input_file, "rb"))

output = PdfFileWriter()
outputstream = file(output_file, "wb")

pages = input1.getNumPages()

top_right = {
    'x': input1.getPage(0).mediaBox.getUpperRight_x(),
    'y': input1.getPage(0).mediaBox.getUpperRight_y()
}
top_left = {
    'x': input1.getPage(0).mediaBox.getUpperLeft_x(),
    'y': input1.getPage(0).mediaBox.getUpperLeft_y()
}
bottom_right = {
Beispiel #53
0
# 8.2 review exercises

import os
import copy
from pyPdf import PdfFileReader, PdfFileWriter

path = "C:/Real Python/Course materials/Chapter 8/Practice files"
inputFileName = os.path.join(path, "Walrus.pdf")
inputFile = PdfFileReader(file(inputFileName, "rb"))
outputPDF = PdfFileWriter()

inputFile.decrypt("IamtheWalrus") # decrypt the input file

for pageNum in range(0, inputFile.getNumPages()):
    # rotate pages (call everything pageLeft for now; will make a copy)
    pageLeft = inputFile.getPage(pageNum)
    pageLeft.rotateCounterClockwise(90)
    
    pageRight = copy.copy(pageLeft) # split each page in half
    upperRight = pageLeft.mediaBox.upperRight # get original page corner
    
    # crop and add left-side page
    pageLeft.mediaBox.upperRight = (upperRight[0]/2, upperRight[1])
    outputPDF.addPage(pageLeft)
    # crop and add right-side page
    pageRight.mediaBox.upperLeft = (upperRight[0]/2, upperRight[1])
    outputPDF.addPage(pageRight)
    
# save new pages to an output file    
outputFileName = os.path.join(path, "Output/Updated Walrus.pdf")
with file(outputFileName, "wb") as outputFile:
Beispiel #54
0
 def clean(self, *args, **kwargs):
     try:
         PdfFileReader(self.pdf)
     except Exception as e:
         raise ValidationError(_('Разрешен только PDF файл'))
Beispiel #55
0
'''THIS IS A PYTHON 2 CODE'''

import pyPdf

from pyPdf import PdfFileReader
file = PdfFileReader(
    open('path\to\file.pdf', 'rb')
)  # First open the file and then pass the object as an args to the PdfFileReader
info = file.getDocumentInfo()  # Returns a dictionary

for meta_item in info:
    print "{}     Info: {}".format(meta_item, info[meta_item])
Beispiel #56
0
# Below is to add on the weblink as text and present date&time on PDF generated

outputPDF = PdfFileWriter()
packet = StringIO.StringIO()
# create a new PDF with Reportlab
can = canvas.Canvas(packet, pagesize=letter)
can.setFont("Helvetica", 9)
# Writting the new line
oknow = time.strftime("%a, %d %b %Y %H:%M")
can.drawString(5, 2, url)
can.drawString(605, 2, oknow)
can.save()

#move to the beginning of the StringIO buffer
packet.seek(0)
new_pdf = PdfFileReader(packet)
# read your existing PDF
existing_pdf = PdfFileReader(file(tem_pdf, "rb"))
pages = existing_pdf.getNumPages()
output = PdfFileWriter()
# add the "watermark" (which is the new pdf) on the existing page
for x in range(0, pages):
    page = existing_pdf.getPage(x)
    page.mergePage(new_pdf.getPage(0))
    output.addPage(page)
# finally, write "output" to a real file
outputStream = file(final_file, "wb")
output.write(outputStream)
outputStream.close()

print(final_file, 'is ready.')
Beispiel #57
0
import StringIO
from reportlab.pdfgen import canvas

# To register a specific font
# from reportlab.pdfbase import pdfmetrics
# from reportlab.pdfbase.ttfonts import TTFont
# pdfmetrics.registerFont(TTFont('Allura', 'Allura.ttf'))

for line in open('list.csv'):
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, (864, 608.9))
    can.setFillColorRGB(0, 0, 100 / 256)
    #can.setFont("Allura", 40)
    can.drawCentredString(432, 240, line)
    can.save()

    #move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    # read your existing PDF
    existing_pdf = PdfFileReader(file("original.pdf", "rb"))
    output = PdfFileWriter()
    page = existing_pdf.getPage(0)
    page.mergePage(new_pdf.getPage(0))
    output.addPage(page)
    # finally, write "output" to a real file
    outputStream = file(line[:-1] + ".pdf", "wb")
    output.write(outputStream)
    outputStream.close()
from pyPdf import PdfFileWriter, PdfFileReader

output = PdfFileWriter()
input1 = PdfFileReader(file("CONSTITUCION-Interiores.pdf", "rb"))

# print the title of document1.pdf
print "title = %s" % (input1.getDocumentInfo())
print "title = %s" % (input1.getPage(0).extractText())
for i in range(5):
    print "title = %s" % (input1.getPage(i).extractText())
from sys import argv
from pyPdf import PdfFileReader
from os import path

filename = argv[1]

document = PdfFileReader(file(filename, "rb"))
pages = document.getNumPages()

with open(filename+".info", 'w') as out:
    path = path.dirname(filename)
    if path:
        path = path + '/'
    out.write("""import json

def UpdateInfo():
    global FileName, FileList, PageCount
    global DocumentTitle
    global Pcurrent, Pnext, Tcurrent, Tnext, InitialPage
    global RTrunning, RTrestart, StartTime, PageEnterTime, CurrentTime

    with open('"""+path+"""json.txt', 'w') as io:
        json.dump(({"page_count": PageCount, "current_page": Pcurrent, "previous_page": Pnext, "start_time": StartTime, "pageenter_time": PageEnterTime, "current_time": CurrentTime, "notes": PageProps[Pcurrent]['notes']}), io)

PageProps = {
""")

    for i in range(1,pages + 1):
        if i < pages:
            out.write("    "+str(i)+": {\n        'transition': None,\n        'overview': True,\n        'notes': '',\n        'OnEnter': UpdateInfo\n    },\n")
        else:
Beispiel #60
0
    def export_to_file(self, file_out, only_selected=False):
        """Export to file"""

        selection = self.iconview.get_selected_items()
        pdf_output = PdfFileWriter()
        pdf_input = []
        for pdfdoc in self.pdfqueue:
            pdfdoc_inp = PdfFileReader(file(pdfdoc.copyname, 'rb'))
            if pdfdoc_inp.getIsEncrypted():
                try:  # Workaround for lp:#355479
                    stat = pdfdoc_inp.decrypt('')
                except:
                    stat = 0
                if (stat != 1):
                    errmsg = _(
                        'File %s is encrypted.\n'
                        'Support for encrypted files has not been implemented yet.\n'
                        'File export failed.') % pdfdoc.filename
                    raise Exception, errmsg
                #FIXME
                #else
                #   ask for password and decrypt file
            pdf_input.append(pdfdoc_inp)

        for row in self.model:

            if only_selected and row.path not in selection:
                continue

            # add pages from input to output document
            nfile = row[2]
            npage = row[3]
            current_page = copy(pdf_input[nfile - 1].getPage(npage - 1))
            angle = row[6]
            angle0 = current_page.get("/Rotate", 0)
            crop = [row[7], row[8], row[9], row[10]]
            if angle != 0:
                current_page.rotateClockwise(angle)
            if crop != [0., 0., 0., 0.]:
                rotate_times = (((angle + angle0) % 360 + 45) / 90) % 4
                crop_init = crop
                if rotate_times != 0:
                    perm = [0, 2, 1, 3]
                    for it in range(rotate_times):
                        perm.append(perm.pop(0))
                    perm.insert(1, perm.pop(2))
                    crop = [crop_init[perm[side]] for side in range(4)]
                #(x1, y1) = current_page.cropBox.lowerLeft
                #(x2, y2) = current_page.cropBox.upperRight
                (x1,
                 y1) = [float(xy) for xy in current_page.mediaBox.lowerLeft]
                (x2,
                 y2) = [float(xy) for xy in current_page.mediaBox.upperRight]
                x1_new = int(x1 + (x2 - x1) * crop[0])
                x2_new = int(x2 - (x2 - x1) * crop[1])
                y1_new = int(y1 + (y2 - y1) * crop[3])
                y2_new = int(y2 - (y2 - y1) * crop[2])
                #current_page.cropBox.lowerLeft = (x1_new, y1_new)
                #current_page.cropBox.upperRight = (x2_new, y2_new)
                current_page.mediaBox.lowerLeft = (x1_new, y1_new)
                current_page.mediaBox.upperRight = (x2_new, y2_new)

            pdf_output.addPage(current_page)

        # finally, write "output" to document-output.pdf
        pdf_output.write(file(file_out, 'wb'))