def get_news_search(): get_everything_filtered = '' if request.method == 'GET': newsapi = NewsApiClient(api_key=NEWS_API_KEY) request_object = {'language':'en', 'page_size': 30, 'sort_by': 'publishedAt'} keys = ['author', 'description', 'title', 'url', 'urlToImage', 'publishedAt', 'source'] error_message = '' get_everything = '' try: if 'keyword' in request.args: keyword = request.args['keyword'] request_object['q'] = keyword else: error_message = 'Provide keyword' if 'from' in request.args: from_date = request.args['from'] request_object['from_param'] = from_date else: error_message = 'Provide from_date' if 'to' in request.args: to_date = request.args['to'] request_object['to'] = to_date else: error_message = 'Provide to_date' if error_message != '': get_everything_filtered = {'error': error_message} # Optional params if 'sources' in request.args and request.args['sources'] != 'all': sources = request.args['sources'] request_object['sources'] = sources if 'sources' in request_object: get_everything = newsapi.get_everything( sources=request_object['sources'], language=request_object['language'], q=request_object['q'], from_param=request_object['from_param'], to=request_object['to'], page_size=request_object['page_size'], sort_by=request_object['sort_by']) else: get_everything = newsapi.get_everything( language=request_object['language'], q=request_object['q'], from_param=request_object['from_param'], to=request_object['to'], page_size=request_object['page_size'], sort_by=request_object['sort_by']) get_everything_filtered = transform_data(get_everything['articles'], keys) except Exception as e: if error_message == '': error_message = e.args[0] get_everything_filtered = {'error': error_message} return json.dumps({"data" : get_everything_filtered})
def make_api_calls(name): api = NewsApiClient(api_key=NEWS_API) query = '+"{}"'.format(name.replace("_", " ")) articles_en_1 = api.get_everything(q=query, language='en', page_size=20, page=1) articles_en_2 = api.get_everything(q=query, language='en', page_size=20, page=2) articles_de_1 = api.get_everything(q=query, language='de', page_size=20, page=1) articles_de_2 = api.get_everything(q=query, language='de', page_size=20, page=2) return { "en": extract_articles(articles_en_1) + extract_articles(articles_en_2), "de": extract_articles(articles_de_1) + extract_articles(articles_de_2) }
def info(): api = NewsApiClient(api_key='0c121bea533546759ad2551c94d28118') api.get_sources() api.get_everything(sources='bbc-news', q='women AND safety') apiKey_list = [ '20d469827dbb4eb384d22490ea5df888', '75d16a33351a44969f3a5ac41eb7cf20', '6496b9cb73c34054a8b58a3dee86c672', '5e6b9203fe4247369e70351f0ab2b1b3', '3907a8165aec4be89b2e12f3a5ad541a', '79bbb20ec53e4d1b85c2caca76402488', '388eff313e1a4d399d55ebb19d4db4cd', 'a0936894b7904a03a4c35ca6627ebc33', 'c15a4b03480c4081bd3d184bc8559f23', 'c05beec776fa4b1fbcc46bdad8efa951', 'be98dcb51dd64998ad08a6dd2c5f9e80', '376c9dfc704748279df3e6f30a751a1e', '46660f56bd6e45f986fea91dc87b1fc1', '5dbf8944da394e4ca003b7fea5b736c5', '31ba8f79f57d41c8b03d3334760154b3', '723db6ceb2e8465daffa882be629d6fb', '1a88fcc99b0b41de902fcdbc45bd4a97', '06d039549c914c78a46d2c0c137b7f7c', '1b47a4f26fc949c4ad280f9bfb81cd5d' ] # List of different API Keys a, b, c = fetch_keyword_news( [ "women", "women empowerment", "menstural cycle", "periods", "girl power", "women in business", "health" ], datetime.now(pytz.timezone('US/Eastern')).date(), apiKey_list[4]) # List which will contain all the latest news inputs print(a, b, c) arr = [a[0:3], b[0:3], c[0:3]] print(arr) # session['final'] = final return render_template('info.html', final=arr)
def processget(): newsapi = NewsApiClient(api_key="6c70eb3b2c36455ca87436fecbd77761") if request.method == 'GET': query = request.args.get('query') sources = request.args.get('sources') from_param = request.args.get('from_param') to = request.args.get('to') if query and sources != 'all' and from_param and to: try: output1 = newsapi.get_everything(q=query, sources=sources, from_param=from_param, to=to, language='en', sort_by='publishedAt', page_size=30) return jsonify({'output': output1}) except NewsAPIException as err: print(err.args) return (jsonify({'erroroutput': err.args})) elif query and sources == 'all' and from_param and to: try: output1 = newsapi.get_everything(q=query, from_param=from_param, sources=None, to=to, language='en', sort_by='publishedAt', page_size=30) return jsonify({'output': output1}) except NewsAPIException as err: return (jsonify({'erroroutput': err.args}))
def search(): # get form form = request.form search_input_keyword = form['search_input_keyword'] search_input_from = form['search_input_from'] search_input_to = form['search_input_to'] search_select_source = form['search_select_source'] # init api client newsapi = NewsApiClient(api_key='cf6de82e0a034204b073376d55103619') # get articles if 'all' in search_select_source: all_articles = newsapi.get_everything(q=search_input_keyword, from_param=search_input_from, to=search_input_to, language='en', sort_by='publishedAt', page_size=30) else: all_articles = newsapi.get_everything(q=search_input_keyword, sources=search_select_source, from_param=search_input_from, to=search_input_to, language='en', sort_by='publishedAt', page_size=30) return all_articles
async def _news_(self,ctx,arg1): newsapi = NewsApiClient(os.getenv('NEWS_API_KEY')) top_headlines = newsapi.get_everything(q=f'{arg1}',sources='newsweek') articles = top_headlines['articles'] article1 = articles[0] headline1 = article1['title'] content1 = article1['content'] url1=article1['url'] top_headlines = newsapi.get_everything(q=f'{arg1}',sources='the-hindu') articles = top_headlines['articles'] article2 = articles[0] headline2 = article2['title'] content2 = article2['content'] url2=article2['url'] top_headlines = newsapi.get_everything(q=f'{arg1}',sources='the-times-of-india') articles = top_headlines['articles'] articles3 = articles[0] headline3 = articles3['title'] content3 = articles3['content'] url3=articles3['url'] embed=discord.Embed(title="Search results",description=f"{ctx.author.mention}, here's what I got!",color=ctx.author.color) embed.add_field(name='\u200b' ,value=f'**[{headline1}]({url1})**'+f"\n {content1[:-13]}",inline=False) embed.add_field(name='\u200b' ,value=f'**[{headline2}]({url2})**'+f"\n {content2[:-13]}",inline=False) embed.add_field(name='\u200b' ,value=f'**[{headline3}]({url3})**'+f"\n {content3[:-13]}",inline=False) await ctx.send(embed=embed)
def create_labeled_data(): news_api = NewsApiClient(api_key='063f02817dbb49528058d7372964f645') x = 1 while x <= 10: b_headlines = \ news_api.get_everything(sources='buzzfeed', from_param=monthago_str, to=now_str, language='en', sort_by='relevancy', page_size=100, page=x)['articles'] r_headlines = \ news_api.get_everything(sources='reuters', from_param=monthago_str, to=now_str, language='en', sort_by='relevancy', page_size=100, page=x)['articles'] a_headlines = \ news_api.get_everything(sources='associated-press', from_param=monthago_str, to=now_str, language='en', sort_by='relevancy', page_size=100, page=x)['articles'] b_titles = [article['title'] for article in b_headlines] b_titles = list(filter(None.__ne__, b_titles)) b_titles = [(article, 'bait') for article in b_titles] bait_headlines.extend(b_titles) r_titles = [(article['title'], 'not_bait') for article in r_headlines] legit_headlines.extend(r_titles) a_titles = [(article['title'], 'not_bait') for article in a_headlines] legit_headlines.extend(a_titles) x += 1 all_headlines = bait_headlines + legit_headlines random.shuffle(all_headlines) pickle.dump(all_headlines, open('headlines.p', 'wb'))
def headline_extraction(day, month, term): """iterates through an entire months worth of dates and extracts the articles containing specified keywords for each given day""" """6876de1e74b64e4e862fc0b7943133be""" """1f66071454d94bf49a229003cc0a66d0""" """b87aacedc4db4880b592647e573fffd3""" num_articles = 100 start = f"2020-{month}-{day}" end = f"2020-{month}-{day}" try: news = NewsApiClient(api_key="b87aacedc4db4880b592647e573fffd3") data = news.get_everything(q=term, sources=news_sites(), from_param=start, to=end, language="en", page_size=num_articles, sort_by='relevancy') print(f"Analyzing month {month} / day {day}: Found " + str(data["totalResults"])) news_df = df_setup(data) save_df(news_df, term) except: try: news = NewsApiClient(api_key="1f66071454d94bf49a229003cc0a66d0") data = news.get_everything(q=term, sources=news_sites(), from_param=start, to=end, language="en", page_size=num_articles, sort_by='relevancy') print(f"Analyzing month {month} / day {day}: Found " + str(data["totalResults"])) news_df = df_setup(data) save_df(news_df, term) except: try: news = NewsApiClient( api_key="6876de1e74b64e4e862fc0b7943133be") data = news.get_everything(q=term, sources=news_sites(), from_param=start, to=end, language="en", page_size=num_articles, sort_by='relevancy') print(f"Analyzing month {month} / day {day}: Found " + str(data["totalResults"])) news_df = df_setup(data) save_df(news_df, term) except: print(f"skipped {start}/{end}")
def pull_one_day(the_day, key): # Init newsapi = NewsApiClient(api_key=key) ps = 100 # Init big list to hold results # Each entry is itself a list of: title, description, content, published at results = [] # Run first request to determine size of response the_day_articles = newsapi.get_everything(q='crude AND oil', from_param=the_day, to=the_day, language='en', page_size=ps, sort_by='relevancy') # How many pages pages = int(the_day_articles['totalResults'] / ps + 1) last_page_no = the_day_articles['totalResults'] % ps print('Number of articles: ' + str(the_day_articles['totalResults'])) print('Number of pages: ' + str(pages)) print('Number of articles on last page: ' + str(last_page_no)) # Crawl each page (can only crawl 10 pages x 100 articles with free subscription) for i in range(1, pages+1): # Request the page print('Processing page number ' + str(i)) this_page = newsapi.get_everything(q='crude AND oil', from_param=the_day, to=the_day, language='en', page_size=ps, sort_by='relevancy', page=i) # Process each article into a row x = ps if i == pages: x = last_page_no for j in range(0, x): row = [] for k in ['title', 'description', 'content', 'publishedAt']: row.append(this_page['articles'][j][k]) results.append(row) # Confirm results were loaded correctly print(str(len(results)) + ' articles loaded') return results
def new(message): try: text = message.text if text == "Получить новости": mark = types.ReplyKeyboardMarkup() mark.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель') t =0 newsapi = NewsApiClient(api_key='cfa09a3adc7f47eab3d72f016392e170') n = newsapi.get_everything(q=slov, language='en', sort_by='publishedAt', page_size=4, page=1) while t < len(n['articles']): bot.send_message(message.chat.id, (n['articles'][t]['url'])) t += 1 bot.send_message(message.chat.id, "Новости получены", reply_markup=mark) elif text == "Назад": mar = types.ReplyKeyboardMarkup() mar.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель') bot.send_message(message.chat.id, "Выберите действие:", reply_markup=mar) except Exception as e: m = types.ReplyKeyboardMarkup() m.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель') bot.send_message(message.chat.id, 'Что-то пошло не так...', reply_markup=m)
async def process_articles(search_query: str, language: str = "en", API_KEY: int = None): #TODO: flesh out swagger defs #TODO: sanitize inputs #TODO: error handling if API_KEY: #start news client with provided API keys newsapi_p = NewsApiClient(api_key=API_KEY) all_articles = newsapi_p.get_everything(q=search_query, language=language, sort_by='relevancy', page_size=100, page=1) else: all_articles = newsapi.get_everything(q=search_query, language=language, sort_by='relevancy', page_size=100, page=1) dup_assnd_arts, _ = dedup_svc.assign_duplicates(all_articles['articles']) gen = rel_clf.determine_relevance_batch(search_query, dup_assnd_arts) resp_articles = [] for i in gen: resp_articles.append(i) resp_len = len(resp_articles) return { "query": search_query, "results": resp_len, "articles": resp_articles, }
def fetchNews(): # Init newsapi = NewsApiClient(api_key='80979f75db4c46198e1fb95d6238d0b1') query = "Phoenix" all_articles = newsapi.get_everything(q=query, from_param='2020-07-15', to='2017-07-16', language='en', sort_by='relevancy') js = json.dumps(all_articles) f = open("outputFile.txt", "a") stop_words = set(stopwords.words('english')) for val in json.loads(js)["articles"]: descrip = val["description"] table = str.maketrans('', '', string.punctuation) s = re.split(r'\W+', descrip) stripped = [w.translate(table) for w in s] words = [word.lower() for word in stripped] for word in words: if word not in stop_words: if word != "get": if word != query.lower(): f.write("(news)"+word+"\n")
def news(message): try: text = message.text if text == "Назад": mar = types.ReplyKeyboardMarkup() mar.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель') bot.send_message(message.chat.id, "Выберите действие:", reply_markup=mar) elif text == "Добавить акцию": markup = types.ReplyKeyboardMarkup() markup.row("Получить новости", "Назад") items.append(ticket) print(items) mes = bot.send_message(message.chat.id,"Акция добавлена", reply_markup=markup) bot.register_next_step_handler(mes, news_next) elif text == "Получить новости": mark = types.ReplyKeyboardMarkup() mark.row("Добавить акцию", "Назад") t =0 newsapi = NewsApiClient(api_key='cfa09a3adc7f47eab3d72f016392e170') n = newsapi.get_everything(q=name, language='en', sort_by='publishedAt', page_size=4, page=1) while t < len(n['articles']): bot.send_message(message.chat.id, (n['articles'][t]['url'])) t += 1 msg = bot.send_message(message.chat.id, "Новости получены", reply_markup=mark) bot.register_next_step_handler(msg, news_next) except Exception as e: m = types.ReplyKeyboardMarkup() m.row('Информация о боте', 'Котировки', 'Форекс', 'Портфель') bot.send_message(message.chat.id, 'Что-то пошло не так...', reply_markup=m)
def index(): today = date.today() this_day = today.strftime("%Y-%m-%d") # Init NEWS_KEY = os.environ.get("NEWS_KEY") newsapi = NewsApiClient(api_key=NEWS_KEY) # /v2/top-headlines # top_headlines = newsapi.get_top_headlines(q='music', # # sources='bbc-news,the-verge', # # category='business', # # category='entertainment', # language='en', # country='us') # /v2/everything all_articles = newsapi.get_everything( q='music', # sources='bbc-news,the-verge', # domains='bbc.co.uk,techcrunch.com', # domains='billboard.com,digitalmusicnews.com, complex.com', # from_param='2017-12-01', from_param=this_day, # to='2017-12-12', language='en', # sort_by='relevancy', sort_by='popularity', # page=2 ) print("-------------------------------------------------") print(all_articles) print("-------------------------------------------------") return (all_articles)
def get_urls(category): """Open file with list of topics. Read each topic and call google news api to request articles ranked relevant to this topic. Write the result set of urls to a new file (one file per topic). With 20 topics and 2 categories, 40 files will be created. :param category: """ news_api = NewsApiClient(api_key=config.get_newsapi_key()) topics_list_file_path = os.path.join(config.get_app_root(), 'topics', category) with open(topics_list_file_path, 'r') as topics_file: for topic in topics_file: # remove new line character from topic string # todo: does rstrip remove last t from string like in "Brexit"? topic = topic.rstrip() topic_file_name = format_file_name(topic) # Create a new file to save result set urls_file_path = os.path.join(config.get_app_root(), 'urls', category.split('.')[0], topic_file_name) # Write result set of urls to new file with open(urls_file_path, 'w') as urls_file: all_articles = news_api.get_everything(q=topic, sort_by='relevancy', page_size=100) for article in all_articles['articles']: urls_file.write(article['url']) urls_file.write('\n')
def collect_news_data(dates, query, sources, ticker): """News collection via NewsAPI.org Args: dates ([array]): array of dates query (string): query string sources ([array]): list of sources to collect news from save_location (string): news data save location """ newsapi = NewsApiClient(api_key='XXX') for date in dates: articles = newsapi.get_everything(q=query, language='en', from_param=date, to=date, sources=sources, sort_by="relevancy", page_size=100) if not os.path.exists("../data/news/{}".format(ticker)): os.makedirs("../data/news/{}".format(ticker)) with open("../data/news/{}/{}.json".format(ticker, date), "w") as f: f.write(json.dumps(articles, indent=4))
def newsFetcher(txt, top=False): newsapi = NewsApiClient(api_key='932e630a539a47308e1cef5d6eb05ed6') if top: data = newsapi.get_top_headlines(language='en', sources='bbc-news,the-verge') hash = dict() for i in data['articles']: if i['title'] not in hash.keys(): hash[i['title']] = i['url'] return hash blob = TextBlob(txt) b = blob.noun_phrases s = '' x = 0 for i in b: s = s + i + " " if (x > 5): break x += 1 data = newsapi.get_everything(q=s, language='en', page_size=100, sort_by='relevancy') hash = dict() for i in data['articles']: if i['title'] not in hash.keys(): hash[i['title']] = i['url'] return hash
def newsapi_get_urls(search_words, agency_urls): if len(search_words) == 0 or agency_urls == None: return None print("Searching agencies for pages containing:", search_words) # This is my API key, each user must request their own # API key from https://newsapi.org/account api = NewsApiClient(api_key='6f174feb5d05447d920d538d45718afa') api_urls = [] # Iterate over agencies and search words to pull more url's # Limited to 1,000 requests/day - Likely to be exceeded for agency in agency_urls: domain = agency_urls[agency].replace("http://", "") print(agency, domain) for word in search_words: # Get articles with q= in them, Limits to 20 URLs try: articles = api.get_everything(q=word, language='en',\ sources=agency, domains=domain) except: print("--->Unable to pull news from:", agency, "for", word) continue # Pull the URL from these articles (limited to 20) d = articles['articles'] for i in range(len(d)): url = d[i]['url'] api_urls.append([agency, word, url]) df_urls = pd.DataFrame(api_urls, columns=['agency', 'word', 'url']) n_total = len(df_urls) # Remove duplicates df_urls = df_urls.drop_duplicates('url') n_unique = len(df_urls) print("\nFound a total of", n_total, " URLs, of which", n_unique,\ " were unique.") return df_urls
def hello(): # Init newsapi = NewsApiClient(api_key='966611677d694731833118dddf7bd7fc') # /v2/top-headlines forecast = newsapi.get_everything(q='forecast', sources='bbc-news,the-verge', domains='bbc.co.uk,techcrunch.com', from_param='2021-03-10', to='2021-12-11', language='en', sort_by='relevancy', page=1) # Find out what data type we are working with print(type(forecast)) # output buffer' output = '{' # loop over the key and values in the dict for k, v in forecast.items(): output = output + str(v) # add the value onto the buffer output = output + '}' json_format = json.dumps(output) jsnn = json.loads(json_format) print('type of output: ', type(jsnn)) return jsnn # return buffer with data
def index(request): newsapi = NewsApiClient(api_key=settings.API_KEY) top_headlines = newsapi.get_top_headlines( sources='bbc-news,the-verge,techcrunch') pprint(top_headlines) all_articles = newsapi.get_everything( sources='bbc-news,the-verge,techcrunch') # pprint(all_articles) # /v2/sources sources = newsapi.get_sources() # pprint(sources) # pprint(dir(newsapi)) l = top_headlines['articles'] desc = [] news = [] img = [] url = [] for i in range(len(l)): f = l[i] news.append(f['title']) desc.append(f['description']) img.append(f['urlToImage']) url.append(f['url']) mylist = zip(news, desc, img, url) return render(request, 'index.html', context={"mylist": mylist})
def news_api(request, token): """ returns featured news article for the user's field of study """ # this sql queries the user's field of study by user's token sql = "SELECT field_of_study FROM `" + USER_TABLENAME + "` WHERE token='{}'".format( token) cursor = connection.cursor() cursor.execute(sql) field_of_study = cursor.fetchall()[0][0] newsapi = NewsApiClient(api_key=NEWS_API_KEY) all_articles = newsapi.get_everything(q=field_of_study, sources='bbc-news', language='en', sort_by='relevancy') title_list = [] url_list = [] url_to_image_list = [] feed = all_articles["articles"] if len(all_articles["articles"]) < 10: top_headlines = newsapi.get_top_headlines(sources='bbc-news') feed = top_headlines["articles"] for key in feed: title_list.append(key["title"]) url_list.append(key["url"]) url_to_image_list.append(key["urlToImage"]) context = { 'url': url_list, 'imageUrl': url_to_image_list, 'title': title_list } return Response(json.dumps(context), 200)
def search(request): if request.method == "GET": count = 0 var = request.GET.get('u') newsapi = NewsApiClient(api_key="e3af2b7c171c4a8a9101eb0c4bae5280") topheadlines = newsapi.get_everything(q=var, language='en') articles = topheadlines['articles'] desc = [] news = [] img = [] link = [] for i in range(len(articles)): myarticles = articles[i] content = myarticles['description'] if var in content: count += 1 news.append(myarticles['title']) desc.append(myarticles['description']) img.append(myarticles['urlToImage']) link.append(myarticles['url']) print(desc) mylist = zip(news, desc, img, link) if count == 0: return HttpResponse("No results found") else: return render(request, 'newsblog/newsource.html', context={"mylist": mylist})
def getArticles(): #Don't worry, I update by API key every time I publish this on Github news = NewsApiClient(api_key='405cbb818b64428ab934b95bfec1426d') newsSources = 'bbc-news,the-verge,abc-news,ary news,associated press,wired,aftenposten,bbc news,bild,blasting news,bloomberg,business insider,engadget,google news,the verge' toDate = datetime.datetime.now().date() fromDate = toDate - datetime.timedelta(weeks=-4) ## Collect new contents all_news = [] for i in range(1, 11): all_news.append( news.get_everything(sources=newsSources, from_param=format(fromDate), to=format(toDate), language='en', page_size=100, page=i)) ## Create List of Articles rawArticles = [] for news in all_news: rawArticles = rawArticles + news['articles'] # dict comprehension syntax - similar to usage in f#, or the foreach library in R rawArticles = {i: rawArticles[i] for i in range(len(rawArticles))} return rawArticles
def get_news_results(api_key: str, city: str, state: str, country: str, article_limit: int = 5) -> str: key = (city, state, country) if key in NEWSPAPER_RESULTS: return NEWSPAPER_RESULTS[key] news_api = NewsApiClient(api_key=api_key) articles = news_api.get_everything( q=f'coronavirus+{city}+{state}')['articles'][:article_limit] article_template = '<li><a href="{url}" target="_blank">{title}</a></li>' articles_html_parts = [ article_template.format(url=article['url'], title=article['title']) for article in articles ] articles_html = '\n'.join(articles_html_parts) news_results = f""" <ul> {articles_html} </ul> """ NEWSPAPER_RESULTS[key] = news_results return news_results
def newsByCompanyName(companies, N): ''' Get news articles for given companies and given amount of days param: companies (dict) of company names as keys and position as values param: N (int) number of days of news to search for ''' from_date = (datetime.now() - timedelta(days=N)).strftime("%Y-%m-%d") newsClient = NewsApiClient(api_key='551a38056949460ea29156d24df9735f') result= dict() for company in companies: wordsToSearchFor = company # TODO: enhance search by searching for keywords related to the company and not just the name all_articles = newsClient.get_everything(q=wordsToSearchFor, sources='bbc-news, cnbc', # sources='cnbc', from_param=from_date, language='en', sort_by='relevancy') if len(all_articles['articles']) > 0: # Only want to keep description, link and timestamp keysToKeep = ['title', 'url', 'publishedAt', 'source'] articles = all_articles['articles'] refinedArticles = [] for article in articles: refinedArticle = {} for key in keysToKeep: refinedArticle[key] = article[key] refinedArticles.append(refinedArticle) # result[company] = all_articles['articles'] result[company] = refinedArticles return result
def display_exam(): Subject = request.form['Subject'] if (str(Subject).isspace() or Subject == ''): Subject = 'America' print(Subject) # Init newsapi = NewsApiClient(api_key='a9288315ab104f7aa9564ca6d08b8ce4') # /v2/top-headlines top_headlines = newsapi.get_top_headlines(q=str(Subject), language='en') # /v2/everything all_articles = newsapi.get_everything(q=str(Subject), language='en') # /v2/sources sources = newsapi.get_sources() #print(sources) print(type(all_articles)) #print(top_headlines) #print(newsapi) return render_template('home.html', sources=sources, all_articles=json.dumps(all_articles), top_headlines=top_headlines, newsapi=newsapi)
class GoogleNews(): def __init__(self): self.news_api = NewsApiClient( api_key='cb7a4ae15a98429890aeedb9a7b460a0') self.key_word = [ 'Latest', 'World', 'U.S.', 'Business', 'Technology', 'Entertainment', 'Sports', 'Science', 'Health' ] self.t = time.time() self.point_time = time.strftime('%Y-%m-%d', time.localtime(self.t)) self.google_crawler = 1 def googleNews(self): if self.google_crawler == 1: # 从google新闻中获取热门新闻 news_url = "https://news.google.com/news/rss" ssl._create_default_https_context = ssl._create_unverified_context Client = urllib.request.urlopen(news_url) xml_page = Client.read() Client.close() soup_page = soup(xml_page, "xml") news_list = soup_page.findAll("item") return news_list elif self.google_crawler == 2: # 返回 google-news 指定日期和分类的资讯 today = self.point_time url_list = [] for kw in self.key_word: all_articles = self.news_api.get_everything( q=kw, sources='google-news', domains='news.google.com', from_param=today, to=today[:-1] + str(int(today[-1]) - 1), language='en', sort_by='relevancy', page_size=100, ) articles = all_articles['articles'] for i in range(len(articles)): url = articles[i]['url'] url_list.append(url) return url_list else: # 返回google-news的头条新闻 top_headlines = self.news_api.get_top_headlines( sources='google-news', language='en', page_size=100, ) articles = top_headlines['articles'] url_list = [] for i in range(len(articles)): url = articles[i]['url'] url_list.append(url) return url_list
class BlogContentTemplate: def __init__(self): self.newsapi = NewsApiClient(api_key=settings.NEWS_API_KEY) def get_content(self, context, tag="", page=1): result = [] try: all_articles = self.newsapi.get_everything(q=context, page=page) except ConnectionError: pass except ReadTimeout: pass except NewsAPIException: pass else: if all_articles['status'] == 'ok': articles = all_articles['articles'] for item in articles: title = item['title'] content = item['content'] user = item['author'] image = item['urlToImage'] tags = tag url = item['url'] date = item['publishedAt'] new_content = Content(title, content, user, image, url, tags, date) result.append(new_content) return result
def get_news(search_term, considered_sources): newsapi = NewsApiClient(api_key=config.NEWS_API_KEY) d = { 'id': [], 'name': [], 'author': [], 'publication_bias': [], 'title': [], 'url': [], 'description': [], 'publishedAt': [], 'content': [], 'fulltext': [] } for source in considered_sources: newspkg = newsapi.get_everything(q=search_term, sources=source) articlepkg = newspkg['articles'] print("Gathering Articles from " + source) for a in articlepkg: sourcepkg = a['source'] d['id'].append(sourcepkg['id']) d['name'].append(sourcepkg['name']) d['author'].append(a['author']) d['publication_bias'].append(get_bias(sourcepkg['id'])) d['title'].append(a['title']) d['url'].append(a['url']) d['description'].append(a['description']) d['publishedAt'].append(a['publishedAt']) d['content'].append(a['content']) d['fulltext'].append(download_fulltext(a['url'])) df = pd.DataFrame.from_dict(d) return df
def sensex(request): newsapi = NewsApiClient(api_key="92bfa5b740c3487390408cb5721efd7f") topheadlines = newsapi.get_everything(q="sensex",language='en',page=1, qintitle='sensex' ) articles = topheadlines['articles'] desc = [] news = [] img = [] post = [] for i in range(len(articles)): myarticles = articles[i] img.append(myarticles['urlToImage']) news.append(myarticles['title']) desc.append(myarticles['content']) post.append(myarticles['publishedAt']) mylist = zip(news, desc, img , post) print(topheadlines) return render(request, 'sensex.html', context={"mylist":mylist})