def _decode_htmlescapes(self, s): """Unescape HTML code.""" # In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code from bs4 import BeautifulSoup as btflsoup # my sm2004 also ecaped & char in escaped sequences. s = re.sub("&", "&", s) # unescaped solitary chars < or > that were ok for minidom confuse btfl soup # s = re.sub(u'>',u'>',s) # s = re.sub(u'<',u'<',s) return str(btflsoup(s, "html.parser"))
def _decode_htmlescapes(self,s): """Unescape HTML code.""" #In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code from bs4 import BeautifulSoup as btflsoup #my sm2004 also ecaped & char in escaped sequences. s = re.sub('&','&',s) #unescaped solitary chars < or > that were ok for minidom confuse btfl soup #s = re.sub(u'>',u'>',s) #s = re.sub(u'<',u'<',s) return str(btflsoup(s, "html.parser"))
def _decode_htmlescapes(self,s): """Unescape HTML code.""" #In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code from bs4 import BeautifulStoneSoup as btflsoup #my sm2004 also ecaped & char in escaped sequences. s = re.sub(u'&',u'&',s) #unescaped solitary chars < or > that were ok for minidom confuse btfl soup s = re.sub(u'>',u'>',s) s = re.sub(u'<',u'<',s) return unicode(btflsoup(s,convertEntities=btflsoup.HTML_ENTITIES ))