Example #1
0
    def __init__(self, url, content):
        Parser.__init__(self, url, content)

        # convert hexadecimal html character into decimal notation
        massaged_html = re.sub(r'&#x([^;]+);', lambda m: '&#%d;' % int(m.group(1), 16), content)

        # Convert html characters to normal characters
        self._massaged_soup = BeautifulSoup(massaged_html, features='html')
        # Instantiate BeautifulSoup without converting html characters to normal characters
        self._unmassaged_soup = BeautifulSoup(content, features='html')
Example #2
0
 def __init__(self, url, content):
     Parser.__init__(self, url, content)
     self._soup = BeautifulSoup(content, features="html")