def _decode_htmlescapes(self, s): """Unescape HTML code.""" #In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code from BeautifulSoup import BeautifulStoneSoup as btflsoup #my sm2004 also ecaped & char in escaped sequences. s = re.sub(u'&', u'&', s) #unescaped solitary chars < or > that were ok for minidom confuse btfl soup s = re.sub(u'>', u'>', s) s = re.sub(u'<', u'<', s) return unicode(btflsoup(s, convertEntities=btflsoup.HTML_ENTITIES))
def _decode_htmlescapes(self,s): """Unescape HTML code.""" #In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code from BeautifulSoup import BeautifulStoneSoup as btflsoup #my sm2004 also ecaped & char in escaped sequences. s = re.sub(u'&',u'&',s) #unescaped solitary chars < or > that were ok for minidom confuse btfl soup s = re.sub(u'>',u'>',s) s = re.sub(u'<',u'<',s) return unicode(btflsoup(s,convertEntities=btflsoup.HTML_ENTITIES ))