Esempio n. 1
0
 def parse(self, data, encoding=None):
     # Want to kill templeet coders
     data = StringIO(data.read().replace('<<', '<').replace('cite>', 'i>').replace('tt>', 'i>'))
     return LxmlHtmlParser.parse(self, data, encoding)
Esempio n. 2
0
 def get_image(self, id):
     doc = self.get_document(self.openurl('https://unsee.cc/%s/' % id))
     images = LxmlHtmlParser.select(doc, '//img/@src[starts-with(., "/image/")]', method='xpath')
     url = urljoin('https://unsee.cc', images[0])
     return self.readurl(url)
Esempio n. 3
0
 def __init__(self, browser, response, *args, **kwargs):
     super(HTMLPage, self).__init__(browser, response, *args, **kwargs)
     parser = LxmlHtmlParser()
     self.doc = parser.parse(StringIO(response.content), response.encoding)