def endElement(self, tag): tag, attrs, data = self._history.pop() data = data.strip() if untranslated._translatable( data) and not untranslated._tal_replaced_content(tag, attrs): # not enclosed if (self._i18nlevel == 0) and tag not in ['script', 'style', 'html']: severity = untranslated._severity(tag, attrs) or '' if severity: if untranslated.IGNORE_UNTRANSLATED in attrs.keys(): # Ignore untranslated data. This is necessary for # including literal content, that does not need to be # translated. pass elif not untranslated.CHAMELEON_SUBST.match(data): h = HTMLParser.HTMLParser() with open(self._filename, 'r') as source_file: bs = BeautifulSoup.BeautifulSoup( source_file, 'html.parser') source_file.close() attr = {} for key in attrs.keys(): if key not in ['selected']: attr[key] = attrs.getValue(key) values = bs.findAll(tag.lower(), attrs=attr) if not values: self.log( 'i18n:translate missing for this:\n' '"""\n%s\n"""\nTag:<%s> Attrs:%s' % (data.encode('utf8'), tag, attr), severity) for v in values: if not v.has_attr('i18n:translate'): v.name = tag escaper = EntitySubstitution() substitute = copy(v) if v.string: substitute.string = escaper.substitute_html( v.string) for i in [v, substitute]: pattern = h.unescape(str(i)) i['i18n:translate'] = "" substring = h.unescape(str(i)) match = replace( self._filename, str(pattern), str(substring), self._parser.getLineNumber()) if match: break if not match: self.log( 'i18n:translate missing for this:\n' '"""\n%s\n"""\nPattern: %s' % (data.encode('utf8'), str(pattern)), severity) if self._i18nlevel != 0: self._i18nlevel -= 1
def make_opening_tag(self, tag_node, self_closing=False): components = [tag_node.name] for attr, value in tag_node.attrs.iteritems(): if isinstance(value, list): value = u" ".join(value) components.append(u'%s="%s"'%(attr, EntitySubstitution.substitute_html(value))) start = u"<" end = u"/>" if self_closing else u">" return start + u" ".join(components) + end
def save_sent_viz_file(x, name, scores, k, args): escaper = EntitySubstitution() with open(os.path.join(args.outdir, 'id_to_word.pkl'),'rb') as f: id_to_word = pickle.load(f) new_data = list() new_texts = list() with open(os.path.join(args.outdir, 'sent_viz_L2X'+name+'.html'), 'w') as txt_file: txt_file.write(u"<!DOCTYPE html>\n<html>\n<body>\n".encode("utf-8")) for i, x_single in enumerate(x): x_selected = get_selected_words(x_single, scores[i], id_to_word, k) # new_data.append(x_selected) for s_i, s in enumerate(x_single): if s != 0: break # txt_file.write( (u" ".join([id_to_word[i] for i in x_single[s_i:] if i != 0]) + u"\n").encode("utf-8") ) sent_viz = list() for wp, wi in enumerate(x_single[s_i:]): # if x_selected[s_i:][wp] != 0: # placeholder = u"-" * len(id_to_word[wi]) # else: # placeholder = u" " * len(id_to_word[wi]) if x_selected[s_i:][wp] != 0: placeholder = u"<mark><strong>" + escaper.substitute_html(id_to_word[wi]) + u"</strong></mark>" else: placeholder = escaper.substitute_html(id_to_word[wi]) sent_viz.append(placeholder) txt_file.write((u"<p>" + u" ".join(sent_viz) + u"</p><br>\n").encode("utf-8")) txt_file.write(u"</body>\n</html>\n".encode("utf-8"))
def extract_content(self): soup = BeautifulSoup(self.html, from_encoding="utf-8") content = soup.find("div", {"id" : "contenu"}) #print dir(content) #print help(content) for x in dir(content): print "\t", x.title() ,type(getattr(content, x)) print content.parent #print type(content) if content is not None: #todo test all function to find the best #print content.string #print content.contents #print content.getText() #print content.get_text() #Get all child strings, concatenated using the given separator. #print content.getText() #print content.Text() #print content.Name() #print content.strings # Get all child strings, concatenated using the given separator. print "title", soup.title.string with open("wiki_content.html", "wb") as myfile: myfile.write(EntitySubstitution.substitute_html(unicode(content)).encode("UTF-8")) #myfile.write(EntitySubstitution.substitute_html(unicode(content)).encode("UTF-8")) #myfile.write(unicode(content.prettify(formatter="html")).encode("UTF-8")) """ for elem in content.contents: print type(elem) myfile.write(elem.encode("UTF-8")) """ print "content",type(content) #self.content = content.string self.content = content.contents print type(self.content) print len(self.content)
# -*- coding: utf-8 -*- from __future__ import print_function import clipboard text = clipboard.get() from bs4.dammit import EntitySubstitution print(EntitySubstitution.substitute_html(text))
trndNo = 0 trend_list = "NA" else: try: results = twitter.trends.place(_id = loc_id_int) trend_list = "" for location in results: for trend in location["trends"]: trndNo += 1 print trend["name"] trend_list += trend["name"] trend_list += ", " trend_list = trend_list[:-2] printable = set(string.printable) trend_list = filter(lambda x: x in printable, trend_list) trend_list = esub.substitute_html(trend_list) except: trndNo = 0 trend_list = "NA" print "404 " htmlStr += "\']," continue htmlStr += "</br>Trends#: " + str(trndNo) + "</br>" htmlStr += "Trending: " + trend_list + "</br>" img1 = per[3] img2 = per[4] img3 = per[5] img4 = per[6] img5 = per[7]
# @viticci # A simple HTML encoder for clipboard contents # -*- coding: utf-8 -*- import clipboard text = clipboard.get() from bs4.dammit import EntitySubstitution print EntitySubstitution.substitute_html(text)
def output_ready(self, substitute_html_entities=False): if substitute_html_entities: output = EntitySubstitution.substitute_html(self) else: output = self return self.PREFIX + output + self.SUFFIX
def custom_formatter(string): """add " and ' to entity substitution""" return EntitySubstitution.substitute_html(string).replace( '"', '"').replace("'", ''')
def substitute_html_entities(str): # return EntitySubstitution.substitute_html(str) # return EntitySubstitution.substitute_html(str).replace("’","\'").replace("“","\"").replace("â€","\"").replace(">",">")\ return EntitySubstitution.substitute_html(str).replace("“","\"").replace("”","\"").replace("’","'")
def envioMail(): print "Comenzamos envioMail" import base64 from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText from email.mime.image import MIMEImage # Establecemos conexion con el servidor smtp de gmail mailServer = smtplib.SMTP('smtp.gmail.com',587) mailServer.ehlo() mailServer.starttls() mailServer.ehlo() password = base64.b64decode("Q29uc3RhbmNpYTIx") mailServer.login("*****@*****.**",password) # Construimos un mensaje Multipart, con un texto y una imagen adjunta # Establecemos la cuentadesde cuentaDesde = "*****@*****.**" from pymongo import MongoClient as Connection from pymongo import DESCENDING cadenaCon= 'mongodb://*****:*****@ds029635.mlab.com:29635/othesoluciones1' MONGODB_URI =cadenaCon MONGODB_URI = 'mongodb://*****:*****@ds029635.mlab.com:29635/othesoluciones1' db = Connection(MONGODB_URI).othesoluciones1 import datetime import numpy as np import pandas as pd fecha = (datetime.date.today()+datetime.timedelta(days=0)).strftime('%d/%m/%Y') fecha = datetime.datetime.strptime(fecha,'%d/%m/%Y') print "Fecha de hoy-->", fecha dfmm = pd.DataFrame() for doc in db.coleccion_notificaciones.find(): if ((datetime.datetime.strptime(doc['fdesde'],'%d/%m/%Y')<= fecha) and (fecha <= datetime.datetime.strptime(doc['fhasta'],'%d/%m/%Y'))): df_aux=pd.DataFrame([doc['email'],doc['municipio'], doc['fhasta']]) dfmm= dfmm.append(df_aux.T, ignore_index=True) print "****************************************************************" from lxml import etree import time doc=etree.parse("static/Municipios/madrid.xml") muni=doc.findall("municipio") print dfmm if (len(dfmm)>0): print "Existen notificaciones que enviar" #Obtenemos la lista de emails distintos for j in dfmm[0].unique(): # Construimos un mensaje Multipart, en el que vamos a incluir texto y una imagen adjunta # El cuerpo del texto del mensaje dependera del numero de suscripciones activas que tenga un usuario para el dia actual texto="" mensaje = MIMEMultipart() mensaje['From']=cuentaDesde cuentaPara=j mensaje['To']=cuentaPara for i in range(0, len(dfmm)): if (dfmm.ix[i,0]==j): for k in range(0,len(muni)): if (muni[k].attrib["value"][-5:]==dfmm.ix[i,1]): hoy = (datetime.date.today()+datetime.timedelta(days=0)).strftime('%d-%m-%Y') manana=(datetime.date.today()+datetime.timedelta(days=1)).strftime('%d-%m-%Y') pasadomanana=(datetime.date.today()+datetime.timedelta(days=2)).strftime('%d-%m-%Y') collection1 = db.PrediccionOTHE name2 = elimina_tildes(unicode(muni[k].text[:])) cursor1 = collection1.find_one({"Municipio": name2}) predHoy = cursor1["Alerta "+hoy] predManana= cursor1["Alerta "+manana] predPasadoManana=cursor1["Alerta "+pasadomanana] from bs4.dammit import EntitySubstitution unsubbed = unicode(muni[k].text[:]) esub = EntitySubstitution() subbed = esub.substitute_html(unsubbed) print "Activa hasta el: ", dfmm.ix[i,2] fhasta = str(dfmm.ix[i,2]).replace("/","-") texto = texto+str("<h3>"+subbed+":</h3><p> </p>") texto = texto+str("<p>El Nivel de Alerta de Gramíneas para el día " +hoy+" es: <b>"+str((predHoy))+"</b></p>") texto = texto+str("<p>El Nivel de Alerta de Gramíneas para el día " +manana+" es: <b>"+str((predManana))+"</b></p>") texto = texto+str("<p>El Nivel de Alerta de Gramíneas para el día " +pasadomanana+" es: <b>"+str((predPasadoManana))+"</b></p>") if (hoy!=fhasta): texto = texto+str("<p>Recibirá esta notificación hasta el: <b>"+fhasta+"</b></p>") else: texto = texto+str("<p>Hoy día <b>"+fhasta+"</b> es el último en el que recibirá esta notificación</p>") texto = texto+str("<hr>") #Establecemos el Asunto del Email mensaje['Subject']= hoy+". Servicio de Notificaciones" #Establecemos el texto comun de los emails html_inic = """\ <html> <head></head> <body> <p>Buenos días,</p> <p>Estas son las notificaciones que ha solicitado:</p><br></br>""" html_fin="""\ <br></br> <p>Deseamos que pase un gran día.</p> <p>Para más información puede consultar nuestra web: http://gramineas-madrid.herokuapp.com/</p> <p>Reciba un cordial saludo por parte del equipo de Othe Soluciones</p> <img src="cid:logo" alt="Othe Soluciones" height="52" width="52"></img> </html>""" #Y lo juntamos en una cadena html=str(html_inic+texto+html_fin) #Montamos todo el cuerpo del mensaje mensaje.attach(MIMEText(html,'html')) # Adjuntamos la imagen file = open("static/style/logo.jpg", "rb") contenido = MIMEImage(file.read()) contenido.add_header('Content-ID', '<logo>') mensaje.attach(contenido) print "Envio mail a: ", cuentaPara # Enviamos el correo, con los campos from y to. mailServer.sendmail(cuentaDesde, cuentaPara, mensaje.as_string()) # Cierre de la conexion mailServer.close() print "Fin de envioMail con emails enviados" else: # Cierre de la conexion mailServer.close() print "Fin de envioMail no habia emails que enviar"
def _html_entities(self, string): if '&' in string: return string else: return EntitySubstitution.substitute_html(string)
def uppercase_and_substitute_html_entities(string): #string = string.encode('utf-8') HTMFormatted = EntitySubstitution.substitute_html(string) HTMFormatted = HTMFormatted.replace('\n', '') #print "str = [", string , "] HTMFormatted = [", HTMFormatted, "]" return HTMFormatted
def encode_url(string): encoder = EntitySubstitution() return encoder.substitute_html(string)
def substitute_html_entities(str): return EntitySubstitution.substitute_html(str).replace("“","\"").replace("”","\"").replace("’","'")