예제 #1
0
def get_summary(url):
    html = urllib.request.urlopen(url).read()
    doc = Document(html)
    doc.parse(["summary", "short_title"])
    readable_article = doc.summary()
    readable_title = doc.short_title()
    return readable_article, readable_title
예제 #2
0
def scrape(URL):
    """
    Return the text of the article found at URL
    Some whitespace changes will usually occur.
    """

    html = urllib.request.urlopen(URL).read()
    doc = Document(html)
    doc.parse(["summary", "short_title"])
    readable_article = doc.summary()
    soup = BeautifulSoup(readable_article, 'html.parser')
    text = soup.get_text()
    return text