def _parse(self, text): try: t = unicode(text, 'utf-8') doc = fromstring(t) except Exception, e: logger.exception(e) from lxml.html.soupparser import fromstring as fromstring_soup doc = fromstring_soup(text)
def __read_document(self, page): text = page.read() page.close() try: t = unicode(text, 'utf-8') doc = fromstring(t) except Exception, e: logger.exception(e) from lxml.html.soupparser import fromstring as fromstring_soup doc = fromstring_soup(text)