def fromstring(self, html): html = encodeValue(html) try: self.doc = lxml.html.fromstring(html) except: html = html.encode('utf-8','replace') self.doc = lxml.html.fromstring(html) return self.doc
def fromstring(self, html): html = encodeValue(html) try: self.doc = lxml.html.fromstring(html) except: html = html.encode("ascii", "replace") self.doc = lxml.html.fromstring(html) return self.doc
def fromstring(self, html): encoding = get_encodings_from_content(html) encoding = encoding and encoding[0] or None if not encoding: html = encodeValue(html) self.doc = lxml.html.fromstring(html) else: html = smart_str(html, encoding=encoding) parser = lxml.html.HTMLParser(encoding=encoding) self.doc = lxml.html.fromstring(html, parser=parser) return self.doc
def fromstring(self, html): html = encodeValue(html) self.doc = lxml.html.fromstring(html) return self.doc
def fromstring(self, html): html = encodeValue(html) self.doc = soupparser.fromstring(html) return self.doc
def fromstring(self, html): from lxml.html import soupparser html = encodeValue(html) self.doc = soupparser.fromstring(html) return self.doc