def get_summary(url): html = urllib.request.urlopen(url).read() doc = Document(html) doc.parse(["summary", "short_title"]) readable_article = doc.summary() readable_title = doc.short_title() return readable_article, readable_title
def scrape(URL): """ Return the text of the article found at URL Some whitespace changes will usually occur. """ html = urllib.request.urlopen(URL).read() doc = Document(html) doc.parse(["summary", "short_title"]) readable_article = doc.summary() soup = BeautifulSoup(readable_article, 'html.parser') text = soup.get_text() return text