def hot_websites(self): hot_topics = newspaper.hot() hot_topics_str = [] for topic in hot_topics: split_topic = topic.lower().split() hot_topics_str.extend(split_topic) hot_count = self._scrapper.get_web_count(hot_topics_str, 7) return hot_count
def hot_topics(self): hot_topics = newspaper.hot() hot_topics_str = [] for topic in hot_topics: split_topic = topic.lower().split() hot_topics_str.extend(split_topic) recent_topics = self._scrapper.get_recent_topics(7) hot_count = self._scrapper.get_hot_count(hot_topics_str, recent_topics) return hot_count
def get_trending(): url = ('https://newsapi.org/v2/top-headlines?' 'country=us&' 'apiKey=bcaaf0d008994d818672b2c1141be98b') response = requests.get(url) popular_articles = { 'articles': json.loads(response.text).get('articles') } popular_url_topics = { 'popular_urls': newspaper.popular_urls(), 'hot_topics': newspaper.hot() } return {**popular_articles, **popular_url_topics}
def test_hot_trending(self): """Grab google trending, just make sure this runs """ newspaper.hot()
import newspaper # hot() returns a list of the top trending terms on Google using a public api print(newspaper.hot()) # popular_urls() returns a list of popular news source urls print(newspaper.popular_urls()) newspaper.languages()
def trending(): trending_terms = newspaper.hot() trending_urls = newspaper.popular_urls()[:10] return trending_terms, trending_urls
def trends(): np3k = newspaper.hot() trend = np3k return trend
import newspaper volkskrant = newspaper.build('https://www.volkskrant.nl/', language='nl') print(volkskrant.size()) for category in volkskrant.category_urls(): print(category) for article in volkskrant.articles: print(article.url) print('\n') print(newspaper.hot(), end='\n\n') print(newspaper.popular_urls(), end='\n\n') print(newspaper.languages()) # url = 'https://www.volkskrant.nl/nieuws-achtergrond/eerste-dode-in-nederland-maar-wat-is-eigenlijk-de-kans-om-aan-corona-te-overlijden~bf716564/' # article = newspaper.Article(url) # article.download() # article.parse() # print(article.authors) # article.publish_date # article.text # article.top_image # article.nlp() # article.keywords
def get_hot_trends(self, *args, **kwargs): return newspaper.hot()
for genre in genres: wds=nltk.corpus.brown.words(categories=genre) wd_len=[ len(wd) for wd in wds if wd.isalpha()] cpw=1.0*sum(wd_len)/len(wd_len) num_wds=len(wds) num_sents=len(nltk.corpus.brown.sents(categories=genre)) wps=1.0*num_wds/num_sents ari=4.71*cpw+0.5*wps-21.43 output=output + [ [genre,ari] ] return output # 30 Use the Porter Stemmer to normalize some tokenized text, calling the stemmer on each word. # Do the same thing with the Lancaster Stemmer and see if you observe any differences. import newspaper hot_topics=newspaper.hot() news_sources=newspaper.popular_urls() cnn_paper = newspaper.build('http://cnn.com') my_art=cnn_paper.articles[25] my_art.download() my_art.parse() my_art.nlp() raw=my_art.text tokens=nltk.word_tokenize(raw) port_stem = [ nltk.PorterStemmer().stem(wd).lower() for wd in tokens ] lanc_stem = [ nltk.LancasterStemmer().stem(wd).lower() for wd in tokens ] port_set=set(port_stem) lanc_set=set(lanc_stem) in_port_not_lanc = port_set-lanc_set in_lanc_not_port = lanc_set-port_set
def hot(): return newspaper.hot()
def Hot(self): return newspaper.hot()
def hot_news(): hot_topic = newspaper.hot() return hot_topic