def craft_newsletter(): ''' Craft the newsletter. Returns JSON. :return: the newsletter json ''' a = Articles(API_KEY=os.environ["NEWSAPI_KEY"]) top_results = a.get_by_top(source="google-news") breaking = requests.get("https://librenews.io/api").json()["latest"] period = "AM" greeting = "It's 5:30 ZULU time." if datetime.datetime.now(tz=None).time() > datetime.time(12): period = "PM" greeting = "It's 17:30 ZULU time." name = period + " - " + datetime.date.today().strftime("%A, %d %B %Y") for story in top_results["articles"]: story["source"] = urlparse(story["url"]).netloc return { "top_stories": top_results["articles"][:3], "breaking": [story for story in breaking if story["channel"] == "Breaking News"][:5], "name": name, "greeting": greeting }
def news(): with open('X.pkl', 'rb') as f: X = pickle.load(f) with open('y.pkl', 'rb') as f: y = pickle.load(f) #Generating the training and testing dataset count_vectorizer = CountVectorizer() X = count_vectorizer.fit_transform(X) # Fit the Data from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.0, random_state=42) #Naive Bayes Classifier clf = LogisticRegression() clf.fit(X_train, y_train) apikey = 'c9c0b7a1fc944a02bdadda8c09dace91' a = Articles(API_KEY=apikey) data = a.get(source="abc-news-au", sort_by='top') data = pd.DataFrame.from_dict(data) data = pd.concat( [data.drop(['articles'], axis=1), data['articles'].apply(pd.Series)], axis=1) description = data['description'] def pre(x): data1 = str(x) data1 = remove_new_lines(data1) data1 = remove_stop_words(data1) data1 = strip(data1) data1 = remove_weird(data1) data1 = np.array(data1).reshape(-1) vect = count_vectorizer.transform(data1) my_prediction1 = clf.predict(vect) return my_prediction1 pred0 = pre(description[0]) pred1 = pre(description[1]) pred2 = pre(description[2]) pred3 = pre(description[3]) pred4 = pre(description[4]) return render_template('news.html', des0=description[0], des1=description[1], des2=description[2], des3=description[3], des4=description[4], pred0=pred0, pred1=pred1, pred2=pred2, pred3=pred3, pred4=pred4)
def __init__(self, *args): super(ReporterModule, self).__init__(*args) self.API_KEY = self.get_configuration("newsapi.org_key") self.threshold = int(self.get_configuration("news_limit")) if self.API_KEY: self.articles = Articles(self.API_KEY) self.sources = Sources(self.API_KEY) else: print(_("error.news.configuration")) return False self.sources_url = {} self.sources.information()
def listOfArticles(): sourceList = listOfSources() articleList = [] a = Articles(API_KEY="40e40820d389493abb369f099605fec3") for source in sourceList: try: b = a.get_by_latest(source=source) articleList.extend(b['articles']) except: pass return articleList
def __init__(self, *args): super(ReporterModule, self).__init__(*args) self.API_KEY = self.get_configuration("newsapi.org_key") self.threshold = int(self.get_configuration("news_limit")) if self.API_KEY: self.articles = Articles(self.API_KEY) self.sources = Sources(self.API_KEY) else: print( "Kindly look back at the documentation to configure news module properly especially the API keys." ) return False self.sources_url = {} self.sources.information()
def get_news(sources): NEWS_API_KEY='1bae2e39f2b540f3a15dbbcb269eba9b' articles=Articles(API_KEY=NEWS_API_KEY) info=articles.get(source=sources) news_array=[] news_objects=[] length_of_articles=len(info['articles']) for i in range(0,int(length_of_articles)): headline=info['articles'][i]['title'] body=info['articles'][i]['description'] url_web=info['articles'][i]['url'] image=info['articles'][i]['urlToImage'] time=info['articles'][i]['publishedAt'] news_objects.append(Template.GenericElement(title=headline,subtitle=body,item_url=url_web, image_url=image,buttons=[Template.ButtonWeb(title='Open in web',url=url_web)])) return news_objects
class Scraper: # example code # ----------------------- # x = Scraper(api_key='xyz') # print(x.scrape_all_articles(language='en')) articles = None sources = None api_key = None def __init__(self, api_key) -> None: super().__init__() self.api_key = api_key self.articles = Articles(API_KEY=self.api_key) self.sources = Sources(API_KEY=self.api_key) def scrape_articles_for_sources(self, sources): ''' Accepts the list of source names and returns all articles downloaded from the given sources :param sources: List of source id's :return: List of article json objects, containing: 'author', 'title', 'description', 'url', 'urlToImage', 'publishedAt' ''' articles = [] for source in sources: try: # list of json objects # author, title, description, url, urlToImage, publishedAt articles_for_source = self.articles.get(source=source).articles except BaseException: # if the server does not respond continue for article in articles_for_source: articles.append(article) return articles def scrape_sources(self, categories=[], language=None): ''' Gets the newsapi sources associated with the given category (optional) and language (optional) :param categories: List of categories (optional) :param language: Language (optional) :return: List of source id's ''' sources_dict = [] for category in categories: sources_dict += self.sources.get(category, language).sources sources = set([source['id'] for source in sources_dict]) return sources def scrape_all_articles(self, categories=[], language=None): ''' Scrapes and returns all articles for the given category and language (parameters are optional) :param categories: list of categories (optional) :param language: language (optional) :return: List of article json objects, containing: 'author', 'title', 'description', 'url', 'urlToImage', 'publishedAt' ''' return self.scrape_articles_for_sources( self.scrape_sources(categories, language))
class ReporterModule(BaseModule): AFFIRMATIVE = ["YES", "YEAH", "SURE", "YAH", "YA"] NEGATIVE = ["NO", "NEGATIVE", "NAH", "NA", "NOPE"] def __init__(self, *args): super(ReporterModule, self).__init__(*args) self.API_KEY = self.get_configuration("newsapi.org_key") self.threshold = int(self.get_configuration("news_limit")) if self.API_KEY: self.articles = Articles(self.API_KEY) self.sources = Sources(self.API_KEY) else: print( "Kindly look back at the documentation to configure news module properly especially the API keys." ) return False self.sources_url = {} self.sources.information() def get_all_categories(self): return list(self.sources.all_categories()) def get_by_category(self, category): srcs = self.sources.get_by_category(category).sources self.sources_url = {} for src in srcs: self.sources_url[src['name']] = src['url'] return self.sources_url def get_sort_bys_of_source(self, source_name): return self.sources.search(source_name)[0]['sortBysAvailable'] def all_sources(self): self.sources_url = self.sources.all_names() return self.sources_url def get_news(self): self.assistant.say( "Would you prefer any specific category? If yes then what would it be?" ) category_status = self.assistant.listen().decipher() if category_status.upper() in self.NEGATIVE: category = False else: categories = self.get_all_categories() category = self.search(categories, category_status) self.assistant.say( "Any preference you would like to have about source of your news? like CNN" "or Time magazine or maybe The hindu?") source_status = self.assistant.listen().decipher() if source_status.upper() in self.NEGATIVE: source = False else: if category: sources_available = self.get_by_category(category) response = "Out of all the sources as follows" for source_name, source_url in sources_available.items(): response += " %s," % source_name response += ", which one would you like to pick?" self.assistant.say(response) source_command = self.assistant.listen().decipher() source = self.search(list(sources_available), source_command) else: self.assistant.say( "So would you want me to list all the sources around 70 which to be" "honest would be a hefty task, so if not, then just let me know of" "your source name and I would let you know if it's available or not." ) all_sources_status = self.assistant.listen().decipher() sources_available = self.all_sources() if all_sources_status.upper() in self.AFFIRMATIVE: response = "Good job, lazy ass, so here are all the available sources as follows " sources_available_list = list(sources_available) for source_name in sources_available_list: response += " %s," % source_name response += ", which one would you like to pick?" self.assistant.say(response) source_command = self.assistant.listen().decipher() all_sources_status = source_command source_found = self.search(list(sources_available), all_sources_status) source = source_found if source: sort_bys_available = self.get_sort_bys_of_source(source) if len(sort_bys_available) == 1: sort_by = sort_bys_available[0] else: if len(sort_bys_available) == 2: response = "And what kind of news sort would you like? " \ "%s or %s?" % (sort_bys_available[0], sort_bys_available[1]) else: response = "And what kind of news sort would you like? " \ "%s or %s, or maybe %s?" % (sort_bys_available[0], sort_bys_available[1], sort_bys_available[2]) self.assistant.say(response) sort_by_command = self.assistant.listen().decipher() sort_by = self.search(sort_bys_available, sort_by_command) else: self.assistant.say("And what kind of news sort would you like?" "latest or maybe top ones shown in front page?") sort_status_command = self.assistant.listen().decipher() sort_by = self.search(['top', 'popular' 'latest'], sort_status_command) if not source: if sort_by.lower() == "top": source = "google-news" elif sort_by.lower() == "latest": source = "the-telegraph" else: source = "time" response = self.get_response(source, sort_by) return response def handle(self): source = self.get_configuration("news_source") response = self.get_response(source) return response def get_response(self, source, sort_by=None, threshold=5): if self.threshold: threshold = self.threshold source = source.lower().replace(" ", "-") articles = self.articles.get(source, sort_by=sort_by).articles articles = articles[:threshold] response = "So the %s news from %s news source are as follows " % ( sort_by, source) for article in articles: if article['title']: response += "%s, " % article['title'] if article['description']: response += "%s, " % article['description'] if article['author']: response += "was reported by %s." % article['author'] response += "and in the other news. " return response @staticmethod def search(dataset, query): values = [0 for _ in range(0, len(dataset))] search = query.lower().split() upper_threshold = len(search) for index, data in enumerate(dataset): search_array = data.split() for index2, text in enumerate(search_array): if index2 >= upper_threshold: break threshold = len(search[index2]) for i in range(0, len(text)): if i >= threshold - 1: break if text[i] == search[index2][i]: values[index] += 1 max_value = max(values) max_index = values.index(max_value) return dataset[max_index]
""" news.py: Scrapes news sites (urls) specified in util.SOURCES for articles. """ import json import newspaper import nltk import datetime from newsapi.articles import Articles config = newspaper.Config() config.memoize_articles = False with open('api_keys.json') as keyfile: NEWSAPI_KEY = json.load(keyfile)['key'] news_container = Articles(API_KEY=NEWSAPI_KEY) def get_news(): # TODO: Generalize for an input of different sources or something bbc_news = news_container.get_by_top(source="bbc-news") wsj = news_container.get_by_top(source="the-wall-street-journal") natgeo = news_container.get_by_top(source="national-geographic") reuters = news_container.get_by_top(source="reuters") nyt = news_container.get_by_top(source="the-new-york-times") techcrunch = news_container.get_by_top(source="techcrunch") espn = news_container.get_by_top(source="espn") independent = news_container.get_by_top(source="independent") polygon = news_container.get_by_top(source="polygon") time_mag = news_container.get_by_top(source="time") huffpost = news_container.get_by_top(source="the-huffington-post")
def __init__(self, API_KEY): self.articles = Articles(API_KEY) self.sources = Sources(API_KEY) self.sources.information()
import json import os import re import requests from newsapi.articles import Articles from newsapi.sources import Sources API_KEY = "f044f5b63a7c4139858611a1ae6dc5f0" s = Sources(API_KEY=API_KEY) a = Articles(API_KEY=API_KEY) # print(s.information().all_categories()) # print(s.get_by_category("general")) def get_country_news(country): country_id = country + "&" url = ('https://newsapi.org/v2/top-headlines?' 'country=' + country_id + 'apiKey=' + API_KEY) response = requests.get(url) response = response.json() path = os.path.join(os.getcwd(), "posts") path = os.path.join(path, "regional_news") path = os.path.join(path, country) for main_key in response.items(): if main_key[0] == "articles":
import newsapi import pandas as pd from tkinter import * import tkinter.messagebox apikey = '*****' from newsapi.articles import Articles a = Articles(API_KEY=apikey) def nw(): ans = tkinter.messagebox.askquestion("Action", "Want to Update?") if ans == "yes": print("Updated!") tkinter.messagebox.showinfo("Updated", "Updated!") root = Tk() tf = Frame(root) label = Label(tf, text="News Update") label.pack(side=TOP) def gr(): print(var.get()) data = a.get(source=var.get(), sort_by='top') data = pd.DataFrame.from_dict(data) data = pd.concat( [data.drop(['articles'], axis=1), data['articles'].apply(pd.Series)],
def __init__(self, api_key=keys.news['api_key']): self.api_key = api_key self.article = Articles(self.api_key) self.source = Sources(self.api_key) self.base_url = keys.news['base_everything_url'] self.logger = logutils.get_logger('News Data Ingestion')
import newsapi import requests import json import os from newsapi.articles import Articles from newsapi.sources import Sources a = Articles(API_KEY="537b165a4f314fedae8cb39788d4d713") s = Sources(API_KEY="537b165a4f314fedae8cb39788d4d713") res = a.get(source="daily-mail")['articles'] bbc = a.get(source="bbc-news")['articles'] telegraph = a.get(source="the-telegraph")['articles'] guardian = a.get(source="the-guardian-uk")['articles'] independent = a.get(source="independent")['articles'] sports = a.get(source="the-sport-bible")['articles'] # results = s.get_by_country("gb").sources # # s.get_by_category("politics") #resultsString = ''.join(str(e) for e in results) # filename = 'news_stream.py' # with open(filename, 'a') as file: # for result in independent: # print(result['title']) # # If you want other things from the tweet object you can specify it here # file.write(result['title'] + os.linesep)
from newsapi.articles import Articles import requests import json url = 'https://hooks.slack.com/services/T8N4B1741/B8NPU0hjjjFJR/Bm3J7dDdYFDmKcYtTCwggxsj' a = Articles(API_KEY="25ead91356d64bf38bff3fc87bcd5d8e") payload = { "attachments": [{ "title": "The Further Adventures of Slackbot", "author_icon": "http://a.slack-edge.com/7f18https://a.slack-edge.com/bfaba/img/api/homepage_custom_integrations-2x.png", "image_url": "http://i.imgur.com/OJkaVOI.jpg?1" }, { "title": "Headline", "text": "After @episod pushed exciting changes to a devious new branch back in Issue 1, Slackbot notifies @don about an unexpected deploy..." }, { "fallback": "Read More About it.", "title": "Read More About it.", "callback_id": "comic_1234_xyz", "color": "#3AA3E3", "attachment_type": "default", "actions": [
def __init__(self, api_key) -> None: super().__init__() self.api_key = api_key self.articles = Articles(API_KEY=self.api_key) self.sources = Sources(API_KEY=self.api_key)
from newsapi.articles import Articles from newsapi.sources import Sources import secrets a = Articles(API_KEY=secrets.newapi_api) s = Sources(API_KEY=secrets.newapi_api) print(s.informaion())
from newsapi.articles import Articles # import pymongo # from pymongo import MongoClient from cassandra.cluster import Cluster from cassandra.query import BatchStatement, SimpleStatement import json import pprint import sys # Key to make requests through API api = Articles(API_KEY="3e40232f1ea246cb85c76b46bc7543d3") # The sources selected sources = ["abc-news-au","al-jazeera-english","ars-technica","associated-press","bbc-news","bbc-sport","bild","bloomberg","breitbart-news","business-insider", "business-insider-uk","buzzfeed","cnbc","cnn","daily-mail","engadget","entertainment-weekly","espn", "espn-cric-info","financial-times","focus","football-italia","fortune","four-four-two","fox-sports","google-news","gruenderszene", "hacker-news","ign","independent","mashable","metro","mirror","mtv-news","mtv-news-uk","national-geographic","new-scientist", "newsweek","new-york-magazine","nfl-news","polygon","recode","reddit-r-all","reuters","spiegel-online","t3n","talksport","techcrunch", "techradar","the-economist","the-guardian-au","the-guardian-uk","the-hindu","the-huffington-post","the-lad-bible","the-new-york-times", "the-sport-bible","the-telegraph","the-times-of-india","the-verge","the-wall-street-journal","the-washington-post", "time","usa-today","wired-de"] '''["the-wall-street-journal", "al-jazeera-english", "bbc-news", "bloomberg", "business-insider", "cnbc", "cnn", "daily-mail", "engadget", "espn", "financial-times", "fortune", "fox-sports", "mtv-news", "new-scientist","new-york-magazine","nfl-news","reuters", "talksport","techcrunch","the-economist","the-guardian-uk","the-hindu","the-new-york-times","the-sport-bible","the-times-of-india", "the-verge","the-wall-street-journal","time","usa-today"]''' #wsj = api.get(source="the-wall-street-journal", sort_by="top") # wsj_arts_json = json.dumps(wsj.articles,ensure_ascii=False) # type(wsj_arts_json)
import newsapi import numpy import pandas as pd from newsapi.articles import Articles apikey = '455e01c84ca44ff387187f10f202bed3' a = Articles(API_KEY=apikey) data = a.get(source="the-new-york-times", sort_by='top') #print (data) ## raw news data ## ----------------------------------------------------------- data = pd.DataFrame.from_dict(data) data = pd.concat( [data.drop(['articles'], axis=1), data['articles'].apply(pd.Series)], axis=1) #data.head() # drop unused columns # display only title and discription news_df = data.drop(columns=[ 'status', 'source', 'sortBy', 'author', 'url', 'urlToImage', 'publishedAt' ]) #print(news_df) print("---------------------------------------------------------------------") print("---------------------------------------------------------------------")
import rq from rq import get_current_job, Queue from rq.job import Job import simplify import json import requests import time import base64 import calendar import io utility_referral = '' # simplify.public_key = "sbpb_Njc3ZDkyMmYtYTE0OS00MTRjLWE5YmUtZjQ3MTI5ZWUzNmE3" # simplify.private_key = "3KzZq8dCCUhQMh1dTCU6jPrwdG0O4wwwizAP82LcfpN5YFFQL0ODSXAOkNtXTToq" #ALPHAVANTAGE_API_KEY = "OYL0XNT0O85E76PM" newsapi = Articles(API_KEY='3c0850b9cd1041989ae33dd295793c51') job_id = '' # queue = rq.Queue(connection=Redis.from_url('redis://')) # queue.delete(delete_jobs=True) # queue = rq.Queue('tasks', connection=Redis.from_url('redis://')) @app.route('/') @app.route('/index') # @login_required def index(): return render_template('index.html') ##################### BEGIN SPRINT 1 CODE ##########################
# -*- coding: utf-8 -*- """ """ About: Python wrapper for the New York Times Archive API https://developer.nytimes.com/article_search_v2.json """ from newsapi.articles import Articles import sys,json import requests key = '522497f7b4b940b7946eeed6909ed817' params = {} api = Articles(API_KEY=key) class APIKeyException(Exception): def __init__(self, message): self.message = message class InvalidQueryException(Exception): def __init__(self, message): self.message = message class ArchiveAPI(object): def __init__(self, key=None): self.key = key self.root = 'http://api.nytimes.com/svc/archive/v1/{}/{}.json?api-key={}' if not self.key:
from flask import Flask, jsonify, render_template, request, session, flash, redirect,abort from newsapi.articles import Articles from newsapi.sources import Sources #from flask.ext.socketio import SocketIO, emit from sqlalchemy.orm import sessionmaker import os from tabledef import * engine = create_engine('sqlite:///database.db', echo=True) app = Flask(__name__) a = Articles(API_KEY="867af1dffb80450b9770b4bcc10c8e14") s = Sources(API_KEY="867af1dffb80450b9770b4bcc10c8e14") """app.config['SECRET_KEY'] = 'secret!' socketio = SocketIO(app) @socketio.on('my event') # Decorator to catch an event called "my event": def test_message(message): # test_message() is the event callback function. emit('my response', {'data': 'got it!'}) # Trigger a new event called "my response" """ # that can be caught by another callback later in the program. @app.route("/") def home(): if session.get('logged_in'): return render_template("Welcome.html") else: return render_template("login.html") @app.route('/login',methods=["POST"]) def do_admin_login(): POST_USERNAME = str(request.form['username'])
class News: def __init__(self, api_key=keys.news['api_key']): self.api_key = api_key self.article = Articles(self.api_key) self.source = Sources(self.api_key) self.base_url = keys.news['base_everything_url'] def get_data(self, query, from_date=None, to_date=None, page_size=100, sort_by='publishedAt', language='en', **kwargs): key_value_params = { 'apiKey': self.api_key, 'q': query, 'from': from_date, 'to': to_date, 'sortBy': sort_by, 'pageSize': page_size, 'language': language } url = self._data_config(self.base_url, query_separator='?', key_value_params=key_value_params) response = requests.get( url, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36' }) return self._parse_data(response.json()) @staticmethod def _data_config(base_url, **kwargs): query_separator = None key_value_params = None join_sep = '&' url = base_url if 'query_separator' in kwargs.keys(): query_separator = kwargs['query_separator'] if 'key_value_params' in kwargs.keys(): key_value_params = kwargs['key_value_params'] if query_separator is not None: url = base_url + str(query_separator) if key_value_params is not None: for key in key_value_params.keys(): if key_value_params[key] is not None: url = url + str(key) + '=' + str( key_value_params[key]) + join_sep return url[:-1] @staticmethod def _parse_data(news_response_json): article_list = list([]) if news_response_json['status'] == 'ok': article_list = news_response_json['articles'] for article in article_list: try: article['source'] = article['source']['name'] except: pass return article_list def get_articles(self, source_id, selection_type="popular"): if selection_type == 'latest': return self.article.get_by_latest(source_id) elif selection_type == 'top': return self.article.get_by_top(source_id) else: return self.article.get_by_popular(source_id)
class NewsDataIngestion(DataIngestionInterface): def __init__(self, api_key=keys.news['api_key']): self.api_key = api_key self.article = Articles(self.api_key) self.source = Sources(self.api_key) self.base_url = keys.news['base_everything_url'] self.logger = logutils.get_logger('News Data Ingestion') def get_data(self, query, from_date=None, to_date=None, page_size=100, sort_by='publishedAt', language='en', **kwargs): key_value_params = { 'apiKey': self.api_key, 'q': query, 'from': from_date, 'to': to_date, 'sortBy': sort_by, 'pageSize': page_size, 'language': language } url = self.data_config(self.base_url, query_separator='?', key_value_params=key_value_params) response = requests.get(url) return response.json() def data_config(self, base_url, **kwargs): query_separator = None key_value_params = None join_sep = '&' url = base_url if 'query_separator' in kwargs.keys(): query_separator = kwargs['query_separator'] if 'key_value_params' in kwargs.keys(): key_value_params = kwargs['key_value_params'] if query_separator is not None: url = base_url + str(query_separator) if key_value_params is not None: for key in key_value_params.keys(): if key_value_params[key] is not None: url = url + str(key) + '=' + str( key_value_params[key]) + join_sep return url[:-1] def store_data(self, data_list, connection_object): connection_object.set_collection(constants.NEWS_COLLECTION_NAME) for data_dict in data_list: try: if newsutils.check_duplicate_document(data_dict) is False: connection_object.insert_document(data_dict) except: self.logger.error('Error while inserting data') connection_object.close_connection() def parse_data(self, news_response_json, product='default'): article_list = list([]) if news_response_json['status'] == 'ok': article_list = news_response_json['articles'] for article in article_list: try: article['source'] = article['source']['name'] article['product'] = product article[ 'human_date'] = newsutils.convert_string_timestamp_to_python_date( article['publishedAt']) except: self.logger.error("error while parsing data") return article_list def get_articles(self, source_id, selection_type="popular"): if selection_type == 'latest': return self.article.get_by_latest(source_id) elif selection_type == 'top': return self.article.get_by_top(source_id) else: return self.article.get_by_popular(source_id) def data_ingestion_pipeline(self, query, product='default'): news_json = self.get_data(query) self.logger.info("News data fetched for product %s " % product) parsed_news_list = self.parse_data(news_json, product=product) self.logger.info("News Data parsed for product %s" % product) mongo_connector = dbutils.get_mongodb_connection() self.store_data(parsed_news_list, mongo_connector) self.logger.info("News data stored for product %s " % product)
def __init__(self, api_key=keys.news['api_key']): self.api_key = api_key self.article = Articles(self.api_key) self.source = Sources(self.api_key) self.base_url = keys.news['base_everything_url']
from newsapi.articles import Articles from newsapi.sources import Sources key = '96af62a035db45bda517a9ca62a25ac3' a, s = Articles(API_KEY=key), Sources(API_KEY=key) s.all() # get all sources offered by newsapi a.get(source='the-new-york-times') s.get(category='technology', language='en', country='US') from newsapi import NewsAPI key = '96af62a035db45bda517a9ca62a25ac3' params = {} api = NewsAPI(key) sources = api.sources(params) articles = api.articles(sources[0]['id'], params) ################ NY Times API ############################################# import sys, csv, json reload(sys) sys.setdefaultencoding('utf8') """ About: Python wrapper for the New York Times Archive API https://developer.nytimes.com/article_search_v2.json """ class APIKeyException(Exception): def __init__(self, message):
class ReporterModule(BaseModule): AFFIRMATIVE = ["YES", "YEAH", "SURE", "YAH", "YA"] NEGATIVE = ["NO", "NEGATIVE", "NAH", "NA", "NOPE"] def __init__(self, *args): super(ReporterModule, self).__init__(*args) self.API_KEY = self.get_configuration("newsapi.org_key") self.threshold = int(self.get_configuration("news_limit")) if self.API_KEY: self.articles = Articles(self.API_KEY) self.sources = Sources(self.API_KEY) else: print(_("error.news.configuration")) return False self.sources_url = {} self.sources.information() def get_all_categories(self): return list(self.sources.all_categories()) def get_by_category(self, category): srcs = self.sources.get_by_category(category).sources self.sources_url = {} for src in srcs: self.sources_url[src['name']] = src['url'] return self.sources_url def get_sort_bys_of_source(self, source_name): return self.sources.search(source_name)[0]['sortBysAvailable'] def all_sources(self): self.sources_url = self.sources.all_names() return self.sources_url def get_news(self): self.assistant.say(_("news.category.ask")) category_status = self.assistant.listen().decipher() if category_status.upper() in self.NEGATIVE: category = False else: categories = self.get_all_categories() category = self.search(categories, category_status) self.assistant.say(_("news.sources.ask")) source_status = self.assistant.listen().decipher() if source_status.upper() in self.NEGATIVE: source = False else: if category: sources_available = self.get_by_category(category) response = _("news.sources.list") for source_name, source_url in sources_available.items(): response += " %s," % source_name response += _("news.sources.select") self.assistant.say(response) source_command = self.assistant.listen().decipher() source = self.search(list(sources_available), source_command) else: self.assistant.say(_("news.sources.all.ask")) all_sources_status = self.assistant.listen().decipher() sources_available = self.all_sources() if all_sources_status.upper() in self.AFFIRMATIVE: response = _("news.sources.all") sources_available_list = list(sources_available) for source_name in sources_available_list: response += " %s," % source_name response += _("news.sources.select") self.assistant.say(response) source_command = self.assistant.listen().decipher() all_sources_status = source_command source_found = self.search(list(sources_available), all_sources_status) source = source_found if source: sort_bys_available = self.get_sort_bys_of_source(source) if len(sort_bys_available) == 1: sort_by = sort_bys_available[0] else: if len(sort_bys_available) == 2: response = _("news.sort.two_options").format(sort_bys_available[0], sort_bys_available[1]) else: response = _("news.sort.three_options").format( sort_bys_available[0], sort_bys_available[1], sort_bys_available[2], ) self.assistant.say(response) sort_by_command = self.assistant.listen().decipher() sort_by = self.search(sort_bys_available, sort_by_command) else: self.assistant.say(_("news.sort.described_options")) sort_status_command = self.assistant.listen().decipher() sort_by = self.search(['top', 'popular' 'latest'], sort_status_command) if not source: if sort_by.lower() == "top": source = "google-news" elif sort_by.lower() == "latest": source = "the-telegraph" else: source = "time" response = self.get_response(source, sort_by) return response def handle(self): source = self.get_configuration("news_source") response = self.get_response(source) return response def get_response(self, source, sort_by=None, threshold=5): if self.threshold: threshold = self.threshold source = source.lower().replace(" ", "-") articles = self.articles.get(source, sort_by=sort_by).articles articles = articles[:threshold] response = _("news.report").format(sort_by, source) for article in articles: if article['title']: response += "%s, " % article['title'] if article['description']: response += "%s, " % article['description'] if article['author']: response += _("news.report.by").format(article['author']) response += _("news.report.continue") return response @staticmethod def search(dataset, query): values = [0 for _ in range(0, len(dataset))] search = query.lower().split() upper_threshold = len(search) for index, data in enumerate(dataset): search_array = data.split() for index2, text in enumerate(search_array): if index2 >= upper_threshold: break threshold = len(search[index2]) for i in range(0, len(text)): if i >= threshold - 1: break if text[i] == search[index2][i]: values[index] += 1 max_value = max(values) max_index = values.index(max_value) return dataset[max_index]
import pandas as pd from nltk.classify import NaiveBayesClassifier from nltk.corpus import subjectivity from nltk.sentiment import SentimentAnalyzer from nltk.sentiment.util import * from nltk.sentiment.vader import SentimentIntensityAnalyzer import unicodedata import math import h5py import matplotlib.pyplot as plt import tensorflow as tf from tensorflow.python.framework import ops key = '96af62a035db45bda517a9ca62a25ac3' a = Articles(API_KEY=key) s = Sources(API_KEY=key) class APIKeyException(Exception): def __init__(self, message): self.message = message class InvalidQueryException(Exception): def __init__(self, message): self.message = message class ArchiveAPI(object): def __init__(self, key=None):
# -*- coding: utf-8 -*- """ Created on Mon Apr 03 10:57:46 2017 @author: Sarang """ from newsapi.articles import Articles from newsapi.sources import Sources f = open("API_KEY.txt") api_key = f.read() a = Articles(api_key) s = Sources(api_key) print a #print s.get(category='technology', language='en', country='uk') import requests r = requests.get( 'https://newsapi.org/v1/articles?source=the-next-web&sortBy=latest&apiKey=153cffe401b84aa8ab8f19d01a354747' ) print r.text