def _load_doc(self, url, html): try: dom = page_parser.parse_unicode(html, url, notify=logging.info) except: logging.error("raw doc can not be parsed", url) return None return dom
def load_dom(url, encoding = None): body = load_body(url, encoding) if body == None: return None dom = page_parser.parse_unicode(body, url) return dom