Python EntitySubstitution.substitute_htmlの例、bs4.dammit.EntitySubstitution.substitute_html Pythonの例

コード例 #1

0

ファイルを表示

    def endElement(self, tag):
        tag, attrs, data = self._history.pop()
        data = data.strip()

        if untranslated._translatable(
                data) and not untranslated._tal_replaced_content(tag, attrs):
            # not enclosed
            if (self._i18nlevel
                    == 0) and tag not in ['script', 'style', 'html']:
                severity = untranslated._severity(tag, attrs) or ''
                if severity:
                    if untranslated.IGNORE_UNTRANSLATED in attrs.keys():
                        # Ignore untranslated data. This is necessary for
                        # including literal content, that does not need to be
                        # translated.
                        pass
                    elif not untranslated.CHAMELEON_SUBST.match(data):
                        h = HTMLParser.HTMLParser()
                        with open(self._filename, 'r') as source_file:
                            bs = BeautifulSoup.BeautifulSoup(
                                source_file, 'html.parser')
                            source_file.close()
                        attr = {}
                        for key in attrs.keys():
                            if key not in ['selected']:
                                attr[key] = attrs.getValue(key)
                        values = bs.findAll(tag.lower(), attrs=attr)
                        if not values:
                            self.log(
                                'i18n:translate missing for this:\n'
                                '"""\n%s\n"""\nTag:<%s> Attrs:%s' %
                                (data.encode('utf8'), tag, attr), severity)
                        for v in values:
                            if not v.has_attr('i18n:translate'):
                                v.name = tag
                                escaper = EntitySubstitution()
                                substitute = copy(v)
                                if v.string:
                                    substitute.string = escaper.substitute_html(
                                        v.string)
                                for i in [v, substitute]:
                                    pattern = h.unescape(str(i))
                                    i['i18n:translate'] = ""
                                    substring = h.unescape(str(i))
                                    match = replace(
                                        self._filename, str(pattern),
                                        str(substring),
                                        self._parser.getLineNumber())
                                    if match:
                                        break
                                if not match:
                                    self.log(
                                        'i18n:translate missing for this:\n'
                                        '"""\n%s\n"""\nPattern: %s' %
                                        (data.encode('utf8'), str(pattern)),
                                        severity)
        if self._i18nlevel != 0:
            self._i18nlevel -= 1

コード例 #2

0

ファイルを表示

ファイル: beautiful_print.py プロジェクト: dimlev/beautiful_print

	def make_opening_tag(self, tag_node, self_closing=False):
		components = [tag_node.name]
		for attr, value in tag_node.attrs.iteritems():
			if isinstance(value, list):
				value = u" ".join(value)
			components.append(u'%s="%s"'%(attr, EntitySubstitution.substitute_html(value)))
		start = u"<"
		end = u"/>" if self_closing else u">"
		return start +  u" ".join(components) + end

コード例 #3

0

ファイルを表示

def save_sent_viz_file(x, name, scores, k, args):
	escaper = EntitySubstitution()

	with open(os.path.join(args.outdir, 'id_to_word.pkl'),'rb') as f:
		id_to_word = pickle.load(f)
	new_data = list()
	new_texts = list()
	with open(os.path.join(args.outdir, 'sent_viz_L2X'+name+'.html'), 'w') as txt_file:
		txt_file.write(u"<!DOCTYPE html>\n<html>\n<body>\n".encode("utf-8"))

		for i, x_single in enumerate(x):
			x_selected = get_selected_words(x_single,
				scores[i], id_to_word, k)

			# new_data.append(x_selected)
			for s_i, s in enumerate(x_single):
				if s != 0:
					break

			# txt_file.write( (u" ".join([id_to_word[i] for i in x_single[s_i:] if i != 0]) + u"\n").encode("utf-8") )

			sent_viz = list()
			for wp, wi in enumerate(x_single[s_i:]):
				# if x_selected[s_i:][wp] != 0:
				# 	placeholder = u"-" * len(id_to_word[wi])
				# else:
				# 	placeholder = u" " * len(id_to_word[wi])
				if x_selected[s_i:][wp] != 0:
					placeholder = u"<mark><strong>" + escaper.substitute_html(id_to_word[wi]) + u"</strong></mark>"
				else:
					placeholder = escaper.substitute_html(id_to_word[wi])

				sent_viz.append(placeholder)

			txt_file.write((u"<p>" + u" ".join(sent_viz) + u"</p><br>\n").encode("utf-8"))
		txt_file.write(u"</body>\n</html>\n".encode("utf-8"))

コード例 #4

0

ファイルを表示

ファイル: gs_wiki.py プロジェクト: peondusud/py.Scriptz

	def extract_content(self):
		soup = BeautifulSoup(self.html, from_encoding="utf-8")
		content = soup.find("div", {"id" : "contenu"})
		#print dir(content)
		#print help(content)
		for x in dir(content):
			print "\t", x.title() ,type(getattr(content, x))
		print content.parent
		#print type(content)
		if content is not None:
			#todo test all function to find the best
			#print content.string
			#print content.contents
			#print content.getText()
			#print content.get_text() #Get all child strings, concatenated using the given separator.
			#print content.getText()
			#print content.Text()
			#print content.Name()
			#print content.strings # Get all child strings, concatenated using the given separator.
			
			print "title", soup.title.string
			
			with open("wiki_content.html", "wb") as myfile:
				
				myfile.write(EntitySubstitution.substitute_html(unicode(content)).encode("UTF-8"))
				#myfile.write(EntitySubstitution.substitute_html(unicode(content)).encode("UTF-8"))
				#myfile.write(unicode(content.prettify(formatter="html")).encode("UTF-8"))
				"""
				for elem in content.contents:
					print type(elem)
					myfile.write(elem.encode("UTF-8"))
				"""
			print "content",type(content)
			#self.content = content.string
			self.content = content.contents
			print type(self.content)
			print len(self.content)

コード例 #5

0

ファイルを表示

ファイル: Encode-HTML.py プロジェクト: cclauss/Pythonista-4

# -*- coding: utf-8 -*-
from __future__ import print_function
import clipboard
text = clipboard.get()
from bs4.dammit import EntitySubstitution
print(EntitySubstitution.substitute_html(text))

コード例 #6

0

ファイルを表示

ファイル: twitter-trends.py プロジェクト: bc564/cm1-1

			trndNo = 0
			trend_list = "NA"
		else:
			try:
				results = twitter.trends.place(_id = loc_id_int)
				trend_list = ""
				for location in results:
						for trend in location["trends"]:
			 				trndNo += 1
			 				print trend["name"]
			 				trend_list +=  trend["name"]
			 				trend_list += ", "
		 		trend_list = trend_list[:-2]
		 		printable = set(string.printable)
				trend_list = filter(lambda x: x in printable, trend_list)
				trend_list = esub.substitute_html(trend_list)
			except:
				trndNo = 0
				trend_list = "NA"
				print "404 "
				htmlStr += "\'],"
				continue

		htmlStr += "</br>Trends#: " + str(trndNo) + "</br>"
		htmlStr += "Trending: " + trend_list + "</br>"

		img1 = per[3]
		img2 = per[4]
		img3 = per[5]
		img4 = per[6]
		img5 = per[7]

コード例 #7

0

ファイルを表示

ファイル: Encode HTML.py プロジェクト: c0ns0le/Pythonista

# @viticci
# A simple HTML encoder for clipboard contents
# -*- coding: utf-8 -*-

import clipboard
text = clipboard.get()
from bs4.dammit import EntitySubstitution
print EntitySubstitution.substitute_html(text)

コード例 #8

0

ファイルを表示

 def output_ready(self, substitute_html_entities=False):
     if substitute_html_entities:
         output = EntitySubstitution.substitute_html(self)
     else:
         output = self
     return self.PREFIX + output + self.SUFFIX

コード例 #9

0

ファイルを表示

ファイル: element.py プロジェクト: biddyweb/rentfilter

 def output_ready(self, substitute_html_entities=False):
     if substitute_html_entities:
         output = EntitySubstitution.substitute_html(self)
     else:
         output = self
     return self.PREFIX + output + self.SUFFIX

コード例 #10

0

ファイルを表示

ファイル: 2.py プロジェクト: ramirosacruz/Wattpad-Free-Downloader

def custom_formatter(string):
    """add &quot; and &apos; to entity substitution"""
    return EntitySubstitution.substitute_html(string).replace(
        '"', '&quot;').replace("'", '&apos;')

コード例 #11

0

ファイルを表示

ファイル: GFGscraper.py プロジェクト: amukho14/Attempts

def substitute_html_entities(str):
    # return EntitySubstitution.substitute_html(str)
    # return EntitySubstitution.substitute_html(str).replace("&acirc;&euro;&trade;","\'").replace("&acirc;&euro;&oelig;","\"").replace("&acirc;&euro;","\"").replace("&gt",">")\
    return EntitySubstitution.substitute_html(str).replace("&ldquo;","\"").replace("&rdquo;","\"").replace("&rsquo;","'")

コード例 #12

0

ファイルを表示

ファイル: batch.py プロジェクト: othesoluciones/TFM

def envioMail():
    print "Comenzamos envioMail"
    import base64
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText
    from email.mime.image import MIMEImage
    # Establecemos conexion con el servidor smtp de gmail
    mailServer = smtplib.SMTP('smtp.gmail.com',587)
    mailServer.ehlo()
    mailServer.starttls()
    mailServer.ehlo()
    password = base64.b64decode("Q29uc3RhbmNpYTIx")
    mailServer.login("*****@*****.**",password)
    # Construimos un mensaje Multipart, con un texto y una imagen adjunta
	# Establecemos la cuentadesde
    cuentaDesde = "*****@*****.**"



    from pymongo import MongoClient as Connection
    from pymongo import DESCENDING


    cadenaCon= 'mongodb://*****:*****@ds029635.mlab.com:29635/othesoluciones1'
    MONGODB_URI =cadenaCon
    MONGODB_URI = 'mongodb://*****:*****@ds029635.mlab.com:29635/othesoluciones1'


    db = Connection(MONGODB_URI).othesoluciones1

    import datetime
    import numpy as np
    import pandas as pd
    fecha = (datetime.date.today()+datetime.timedelta(days=0)).strftime('%d/%m/%Y')
    fecha = datetime.datetime.strptime(fecha,'%d/%m/%Y')
    print "Fecha de hoy-->", fecha
    dfmm = pd.DataFrame()
    for doc in db.coleccion_notificaciones.find():
        if ((datetime.datetime.strptime(doc['fdesde'],'%d/%m/%Y')<= fecha) and (fecha <= datetime.datetime.strptime(doc['fhasta'],'%d/%m/%Y'))):

            df_aux=pd.DataFrame([doc['email'],doc['municipio'], doc['fhasta']])

            dfmm= dfmm.append(df_aux.T, ignore_index=True)
            

    print "****************************************************************"



    from lxml import etree
    import time
    doc=etree.parse("static/Municipios/madrid.xml")
    muni=doc.findall("municipio")

    print dfmm
    if (len(dfmm)>0):
		print "Existen notificaciones que enviar"
		#Obtenemos la lista de emails distintos
		for j in dfmm[0].unique():
		    # Construimos un mensaje Multipart, en el que vamos a incluir texto y una imagen adjunta
			# El cuerpo del texto del mensaje dependera del numero de suscripciones activas que tenga un usuario para el dia actual
			texto=""
			mensaje = MIMEMultipart()
			mensaje['From']=cuentaDesde
			cuentaPara=j
			mensaje['To']=cuentaPara
			for i in range(0, len(dfmm)):                 
				if (dfmm.ix[i,0]==j):
					for k in range(0,len(muni)):
						if (muni[k].attrib["value"][-5:]==dfmm.ix[i,1]):
							hoy = (datetime.date.today()+datetime.timedelta(days=0)).strftime('%d-%m-%Y')
							manana=(datetime.date.today()+datetime.timedelta(days=1)).strftime('%d-%m-%Y')
							pasadomanana=(datetime.date.today()+datetime.timedelta(days=2)).strftime('%d-%m-%Y')
							collection1 = db.PrediccionOTHE
							name2 =  elimina_tildes(unicode(muni[k].text[:]))
							cursor1 = collection1.find_one({"Municipio": name2})
							predHoy = cursor1["Alerta "+hoy]
							predManana= cursor1["Alerta "+manana]
							predPasadoManana=cursor1["Alerta "+pasadomanana]
							from bs4.dammit import EntitySubstitution
							unsubbed = unicode(muni[k].text[:])
							esub = EntitySubstitution()
							subbed = esub.substitute_html(unsubbed)
							print "Activa hasta el: ", dfmm.ix[i,2]
							fhasta = str(dfmm.ix[i,2]).replace("/","-")
							texto = texto+str("<h3>"+subbed+":</h3><p> </p>")
							texto = texto+str("<p>El Nivel de Alerta de Gram&iacute;neas para el d&iacute;a " +hoy+" es: <b>"+str((predHoy))+"</b></p>")
							texto = texto+str("<p>El Nivel de Alerta de Gram&iacute;neas para el d&iacute;a " +manana+" es: <b>"+str((predManana))+"</b></p>")
							texto = texto+str("<p>El Nivel de Alerta de Gram&iacute;neas para el d&iacute;a " +pasadomanana+" es: <b>"+str((predPasadoManana))+"</b></p>")
							if (hoy!=fhasta):
							   texto = texto+str("<p>Recibir&aacute; esta notificaci&oacute;n hasta el: <b>"+fhasta+"</b></p>")
							else:
							    texto = texto+str("<p>Hoy d&iacute;a <b>"+fhasta+"</b> es el &uacute;ltimo en el que recibir&aacute; esta notificaci&oacute;n</p>")
							texto = texto+str("<hr>")
							
			#Establecemos el Asunto del Email
			mensaje['Subject']= hoy+". Servicio de Notificaciones"
			#Establecemos el texto comun de los emails
			html_inic = """\
				<html>
					<head></head>
					<body>
					<p>Buenos d&iacute;as,</p>
					<p>Estas son las notificaciones que ha solicitado:</p><br></br>"""  
			html_fin="""\
			    <br></br>
				<p>Deseamos que pase un gran d&iacute;a.</p>
				<p>Para m&aacute;s informaci&oacute;n puede consultar nuestra web: http://gramineas-madrid.herokuapp.com/</p>
				<p>Reciba un cordial saludo por parte del equipo de Othe Soluciones</p>
				<img src="cid:logo" alt="Othe Soluciones" height="52" width="52"></img>
				</html>"""
			#Y lo juntamos en una cadena
			html=str(html_inic+texto+html_fin)
			
			#Montamos todo el cuerpo del mensaje
			mensaje.attach(MIMEText(html,'html'))
			
			# Adjuntamos la imagen
			file = open("static/style/logo.jpg", "rb")			
			contenido = MIMEImage(file.read())
			contenido.add_header('Content-ID', '<logo>')
			mensaje.attach(contenido)
			print "Envio mail a: ", cuentaPara
			# Enviamos el correo, con los campos from y to.
			mailServer.sendmail(cuentaDesde, cuentaPara, mensaje.as_string())
		# Cierre de la conexion
		mailServer.close()
		print "Fin de envioMail con emails enviados"
    else:
	    # Cierre de la conexion
		mailServer.close()
		print "Fin de envioMail no habia emails que enviar"

コード例 #13

0

ファイルを表示

 def _html_entities(self, string):
     if '&' in string:
         return string
     else:
         return EntitySubstitution.substitute_html(string)

コード例 #14

0

ファイルを表示

ファイル: bueatifulSoapHTML_newLines.py プロジェクト: 79man/pythonStuff

def uppercase_and_substitute_html_entities(string):
    #string = string.encode('utf-8')
    HTMFormatted = EntitySubstitution.substitute_html(string)
    HTMFormatted = HTMFormatted.replace('\n', '')
    #print "str = [", string , "] HTMFormatted = [", HTMFormatted, "]"
    return HTMFormatted

コード例 #15

0

ファイルを表示

def encode_url(string):
    encoder = EntitySubstitution()
    return encoder.substitute_html(string)

コード例 #16

0

ファイルを表示

ファイル: GFGscraper.py プロジェクト: amukho14/GeeksForGeeks_Summarizer

def substitute_html_entities(str):
    return EntitySubstitution.substitute_html(str).replace("&ldquo;","\"").replace("&rdquo;","\"").replace("&rsquo;","'")