コード例 #1
0
def get_summary(url):
    html = urllib.request.urlopen(url).read()
    doc = Document(html)
    doc.parse(["summary", "short_title"])
    readable_article = doc.summary()
    readable_title = doc.short_title()
    return readable_article, readable_title
コード例 #2
0
ファイル: scraper.py プロジェクト: za419/reddit-news
def scrape(URL):
    """
    Return the text of the article found at URL
    Some whitespace changes will usually occur.
    """

    html = urllib.request.urlopen(URL).read()
    doc = Document(html)
    doc.parse(["summary", "short_title"])
    readable_article = doc.summary()
    soup = BeautifulSoup(readable_article, 'html.parser')
    text = soup.get_text()
    return text