def parse(self, data, encoding=None): # Want to kill templeet coders data = StringIO(data.read().replace('<<', '<').replace('cite>', 'i>').replace('tt>', 'i>')) return LxmlHtmlParser.parse(self, data, encoding)
def __init__(self, browser, response, *args, **kwargs): super(HTMLPage, self).__init__(browser, response, *args, **kwargs) parser = LxmlHtmlParser() self.doc = parser.parse(StringIO(response.content), response.encoding)