def parse(self, data, encoding=None): # Want to kill templeet coders data = StringIO(data.read().replace('<<', '<').replace('cite>', 'i>').replace('tt>', 'i>')) return LxmlHtmlParser.parse(self, data, encoding)
def get_image(self, id): doc = self.get_document(self.openurl('https://unsee.cc/%s/' % id)) images = LxmlHtmlParser.select(doc, '//img/@src[starts-with(., "/image/")]', method='xpath') url = urljoin('https://unsee.cc', images[0]) return self.readurl(url)
def __init__(self, browser, response, *args, **kwargs): super(HTMLPage, self).__init__(browser, response, *args, **kwargs) parser = LxmlHtmlParser() self.doc = parser.parse(StringIO(response.content), response.encoding)