Ejemplo n.º 1
0
 def _parse(self, text):
     try:
         t = unicode(text, 'utf-8')
         doc = fromstring(t)
     except Exception, e:
         logger.exception(e)
         from lxml.html.soupparser import fromstring as fromstring_soup
         doc = fromstring_soup(text)
Ejemplo n.º 2
0
 def __read_document(self, page):
     text = page.read()
     page.close()
     try:
         t = unicode(text, 'utf-8')
         doc = fromstring(t)
     except Exception, e:
         logger.exception(e)
         from lxml.html.soupparser import fromstring as fromstring_soup
         doc = fromstring_soup(text)