Example #1
0
 def hot_websites(self):
     hot_topics = newspaper.hot()
     hot_topics_str = []
     for topic in hot_topics:
         split_topic = topic.lower().split()
         hot_topics_str.extend(split_topic)
     hot_count = self._scrapper.get_web_count(hot_topics_str, 7)
     return hot_count
Example #2
0
 def hot_topics(self):
     hot_topics = newspaper.hot()
     hot_topics_str = []
     for topic in hot_topics:
         split_topic = topic.lower().split()
         hot_topics_str.extend(split_topic)
     recent_topics = self._scrapper.get_recent_topics(7)
     hot_count = self._scrapper.get_hot_count(hot_topics_str, recent_topics)
     return hot_count
 def get_trending():
     url = ('https://newsapi.org/v2/top-headlines?'
            'country=us&'
            'apiKey=bcaaf0d008994d818672b2c1141be98b')
     response = requests.get(url)
     popular_articles = {
         'articles': json.loads(response.text).get('articles')
     }
     popular_url_topics = {
         'popular_urls': newspaper.popular_urls(),
         'hot_topics': newspaper.hot()
     }
     return {**popular_articles, **popular_url_topics}
Example #4
0
 def test_hot_trending(self):
     """Grab google trending, just make sure this runs
     """
     newspaper.hot()
import newspaper
# hot() returns a list of the top trending terms on Google using a public api
print(newspaper.hot())
# popular_urls() returns a list of popular news source urls
print(newspaper.popular_urls())
newspaper.languages()
Example #6
0
 def test_hot_trending(self):
     """Grab google trending, just make sure this runs
     """
     newspaper.hot()
Example #7
0
def trending():
    trending_terms = newspaper.hot()
    trending_urls = newspaper.popular_urls()[:10]
    return trending_terms, trending_urls
Example #8
0
def trends():
    np3k = newspaper.hot()
    trend = np3k
    return trend
import newspaper

volkskrant = newspaper.build('https://www.volkskrant.nl/', language='nl')

print(volkskrant.size())

for category in volkskrant.category_urls():
    print(category)

for article in volkskrant.articles:
    print(article.url)

print('\n')

print(newspaper.hot(), end='\n\n')
print(newspaper.popular_urls(), end='\n\n')

print(newspaper.languages())

# url = 'https://www.volkskrant.nl/nieuws-achtergrond/eerste-dode-in-nederland-maar-wat-is-eigenlijk-de-kans-om-aan-corona-te-overlijden~bf716564/'
# article = newspaper.Article(url)
# article.download()

# article.parse()
# print(article.authors)
# article.publish_date
# article.text
# article.top_image

# article.nlp()
# article.keywords
Example #10
0
def get_hot_trends(self, *args, **kwargs):
    return newspaper.hot()
Example #11
0
    for genre in genres:
        wds=nltk.corpus.brown.words(categories=genre)
        wd_len=[ len(wd) for wd in wds if wd.isalpha()]
        cpw=1.0*sum(wd_len)/len(wd_len)
        num_wds=len(wds)
        num_sents=len(nltk.corpus.brown.sents(categories=genre))
        wps=1.0*num_wds/num_sents
        ari=4.71*cpw+0.5*wps-21.43
        output=output + [ [genre,ari] ]
    return output

# 30 Use the Porter Stemmer to normalize some tokenized text, calling the stemmer on each word.
# Do the same thing with the Lancaster Stemmer and see if you observe any differences.

import newspaper
hot_topics=newspaper.hot()
news_sources=newspaper.popular_urls()
cnn_paper = newspaper.build('http://cnn.com')
my_art=cnn_paper.articles[25]
my_art.download()
my_art.parse()
my_art.nlp()
raw=my_art.text
tokens=nltk.word_tokenize(raw)
port_stem = [ nltk.PorterStemmer().stem(wd).lower() for wd in tokens ]
lanc_stem = [ nltk.LancasterStemmer().stem(wd).lower() for wd in tokens ]
port_set=set(port_stem)
lanc_set=set(lanc_stem)
in_port_not_lanc = port_set-lanc_set
in_lanc_not_port = lanc_set-port_set
Example #12
0
 def hot():
     return newspaper.hot()
 def Hot(self):
     return newspaper.hot()
def hot_news():
    hot_topic = newspaper.hot()
    return hot_topic