Example #1
0
    def _decode_htmlescapes(self, s):
        """Unescape HTML code."""
        #In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code
        from BeautifulSoup import BeautifulStoneSoup as btflsoup

        #my sm2004 also ecaped & char in escaped sequences.
        s = re.sub(u'&', u'&', s)
        #unescaped solitary chars < or > that were ok for minidom confuse btfl soup
        s = re.sub(u'>', u'&gt;', s)
        s = re.sub(u'<', u'&lt;', s)

        return unicode(btflsoup(s, convertEntities=btflsoup.HTML_ENTITIES))
Example #2
0
    def _decode_htmlescapes(self,s):
        """Unescape HTML code."""
        #In case of bad formated html you can import MinimalSoup etc.. see btflsoup source code
        from BeautifulSoup import BeautifulStoneSoup as btflsoup

        #my sm2004 also ecaped & char in escaped sequences.
        s = re.sub(u'&amp;',u'&',s)
        #unescaped solitary chars < or > that were ok for minidom confuse btfl soup
        s = re.sub(u'>',u'&gt;',s)
        s = re.sub(u'<',u'&lt;',s)

        return unicode(btflsoup(s,convertEntities=btflsoup.HTML_ENTITIES ))