def from_url(url, xpath=None): response = urllib2.urlopen(url) content = response.read() text = html.clean(content, xpath) return Document(text)
def from_html(content, xpath=None): text = html.clean(content, xpath) return Document(text)