def save_article(url): apikey = "087e404043fb6f7df4c4ed55e72f3f7f" appid = "dc8dc66d" configuration = aylien_news_api.Configuration() configuration.api_key['X-AYLIEN-NewsAPI-Application-ID'] = appid configuration.api_key['X-AYLIEN-NewsAPI-Application-Key'] = apikey client = aylien_news_api.ApiClient(configuration) api_instance = aylien_news_api.DefaultApi(client) try: print("URRRLLLL:") print(url) api_response = api_instance.list_stories( published_at_end='NOW', links_permalink=[url], ) if len(api_response.stories) == 0: return JsonResponse({"msg": "No article found for this url"}, status=404) if len(api_response.stories) > 1: # print(api_response.stories) return JsonResponse( { "msg": "More than one article found for this url. Articles found: " + str(len(api_response.stories)) }, status=404) story = api_response.stories[0] save_from_story(story) return JsonResponse({"msg": "Success"}, status=200) except ApiException as e: return JsonResponse({"msg": "Internal API error"}, status=500)
def generate_articles(): """ Function to generate article models to put into the database. Args: none Returns: none """ apikey = "087e404043fb6f7df4c4ed55e72f3f7f" appid = "dc8dc66d" configuration = aylien_news_api.Configuration() configuration.api_key['X-AYLIEN-NewsAPI-Application-ID'] = appid configuration.api_key['X-AYLIEN-NewsAPI-Application-Key'] = apikey client = aylien_news_api.ApiClient(configuration) api_instance = aylien_news_api.DefaultApi(client) # 1. Fetch articles eligible_publishers = [ "theguardian.com", "nytimes.com", "ft.com", "bloomberg.com", "reuters.com", "apnews.com", "thetimes.co.uk", "washingtonpost.com", "afp.com", "abcnews.go.com", "time.com", "wsj.com", "economist.com", "politico.com", "bbc.com", "pbs.com", "thehill.com", "usatoday.com", "npr.org", "cbsnews.com", "axios.com", "huffpost.com", "newyorker.com", "nationalreview.com", "slate.com", "theatlantic.com", "theweek.com", "vanityfair.com", "msnbc.com", "cnn.com", "theamericanconservative.com", "vox.com", "mic.com", "independent.co.uk", "thesun.co.uk", "metro.co.uk", "dailymail.co.uk", "telegraph.co.uk", "latimes.com", "cnet.com", "engadget.com", "theverge.com", "vice.com", "hollywoodreporter.com", "newsweek.com", "forbes.com", "sciencemag.org", "rte.com", "natgeo.com", "wanderlust.co.uk", "skysports.com", "espn.com", "theathletic.co.uk", "phys.org", "physicsworld.com", "sky.com", "techradar.com", "entertainmentdaily.co.uk", "digitalspy.com", "inews.co.uk", "ign.com", "france24.com", "dw.com", "euronews.com", "thelocal.it", "elpais.com", "cbc.ca", "globalnews.ca", "nationalpost.com", "msn.com", "nbcnews.com", "abc.net.au", "scmp.com", "seattletimes.com", "independent.ie", "standard.co.uk", "wired.co.uk", "fortune.com", "techcrunch.com", "usnews.com" ] for publisher in eligible_publishers: api_response = fetch_articles(api_instance, publisher) if api_response: stories = api_response.stories # TBD error handling # 2. Get the full text for every article # 2.1 Analyse sentiment for story in stories: save_from_story(story)
def config(self, Aylien_ID, Aylien_key): configuration = aylien_news_api.Configuration() client = aylien_news_api.ApiClient(configuration) configuration.api_key['X-AYLIEN-NewsAPI-Application-ID'] = Aylien_ID configuration.api_key['X-AYLIEN-NewsAPI-Application-Key'] = Aylien_key self.api_instance = aylien_news_api.DefaultApi(client)
def updateData(): ############################################### Update aylien_news.csv ############################################### df = pd.read_csv("data/aylien_news.csv") df['date'] = pd.to_datetime(df.date, format='%Y/%m/%d') df.sort_values(by='date', inplace=True, ascending=False) configuration = aylien_news_api.Configuration() configuration.api_key['X-AYLIEN-NewsAPI-Application-ID'] = '97c45927' configuration = aylien_news_api.Configuration() configuration.api_key['X-AYLIEN-NewsAPI-Application-Key'] = '8e5a95f7bafa835d340a1be85a90588a' configuration.host = "https://api.aylien.com/news" api_instance = aylien_news_api.DefaultApi(aylien_news_api.ApiClient(configuration)) date = list(df["date"])[0] e = str(datetime.date.today()) s = str((date + datetime.timedelta(days=1)).date()) from_date = s to_date = str((datetime.datetime.strptime(str(from_date), "%Y-%m-%d") + datetime.timedelta(days=5)).date()) rows = [] crimes = {'burglary': 0, 'robbery': 0, 'murder': 0, 'kidnapping': 0, 'rape': 0} total_news_count = 0 while dateutil.parser.parse(from_date).date() < dateutil.parser.parse(e).date(): if dateutil.parser.parse(to_date).date() > dateutil.parser.parse(e).date(): to_date = e for crime in crimes.keys(): opts = { 'title': crime, 'sort_by': 'social_shares_count.facebook', 'language': ['en'], 'published_at_start': from_date + 'T00:00:00Z', 'published_at_end': to_date + 'T00:00:00Z', 'per_page': 100, 'source_locations_country': ['IN'], 'source_locations_state': ['Delhi'], 'source_locations_city': ['Delhi'], } headlines = api_instance.list_stories_with_http_info(**opts) for i in headlines[0].stories: date = i.published_at.date() source = i.source.name title = i.title url = i.links.permalink content = i.body rows.append([date, source, title, crime, url, content]) total_news_count += 1 crimes[crime] += 1 from_date = str((datetime.datetime.strptime(str(to_date), "%Y-%m-%d") + datetime.timedelta(days=1)).date()) to_date = str((datetime.datetime.strptime(str(from_date), "%Y-%m-%d") + datetime.timedelta(days=5)).date()) with open('data/aylien_news.csv', 'a') as f: writer = csv.writer(f) writer.writerows(rows) ############################################### Update news_data.csv ############################################### contents = "" for i in range(len(rows)): contents = contents + rows[i][5] + " " locations = findLocations(contents) f = open('data/loc.txt', 'r') s = f.read() loc = s.split('\n') delhi_locs = [] for i in loc: delhi_locs.append(i.lower()) all_locations = list(set(delhi_locs).intersection(locations)) if len(all_locations) > 0: df1 = pd.read_csv("data/news_data.csv") d = {} for i in range(len(df1)): lst = list(df1.iloc[i]) location = lst[0] dates_lst = lst[1].split("\t") dates = [] for j in dates_lst: dates.append(str(j)) srcs = lst[2].split("\t") headlines = lst[3].split("\t") types = lst[4].split("\t") articles = lst[5].split("\t") ages_lst = lst[6].split("\t") ages = [] for i in ages_lst: ages.append(int(i)) businessmans_lst = lst[7].split("\t") businessmans = [] for i in businessmans_lst: businessmans.append(int(i)) urls = lst[8].split("\t") d[location] = [dates, headlines, types, articles, ages, businessmans, urls, srcs] business = ['businessman', 'jeweller', 'jeweler', 'shop owner', 'property dealer'] c = 0 for i in range(len(rows)): article = rows[i][-1] # [date, source, title, crime, url, content] date = rows[i][0] headline = rows[i][2] crime_type = rows[i][3] news_url = rows[i][-2] if str(news_url) == "nan": news_url = "na" news_src = rows[i][1] businessman = 0 for j in business: if j in article: businessman = 1 break age = findAge(article) for i in all_locations: if i in article.lower(): c += 1 flag = 1 for j in loc: if i == j.lower() or i == j: loc_name = j break lst = d.get(loc_name, [[], [], [], [], [], [], [], []]) lst[0].append(str(date)) lst[1].append(headline) lst[2].append(crime_type) lst[3].append(article) lst[4].append(age) lst[5].append(businessman) lst[6].append(news_url) lst[7].append(news_src) d[i] = lst crime_counts = {} max_count = 0 for i in d.keys(): crime_counts[i] = len(d[i][0]) if len(d[i][0]) > max_count: max_count = len(d[i][0]) percentile = {} for i in crime_counts.keys(): p = 100 - int((crime_counts[i] / max_count) * 100) percentile[i] = p rows = [] header = ['location', 'date', 'source', 'headline', 'crime_type', 'article', 'age', 'businessman', 'url', 'percentile'] rows.append(header) for i in d.keys(): for j in loc: if i == j.lower() or i == j: loc_name = j break row = [loc_name] lst = d[i] row.append('\t'.join(lst[0])) row.append('\t'.join(lst[7])) row.append('\t'.join(lst[1])) row.append('\t'.join(lst[2])) row.append('\t'.join(lst[3])) row.append('\t'.join(repr(n) for n in lst[4])) row.append('\t'.join(repr(n) for n in lst[5])) row.append('\t'.join(lst[6])) row.append(percentile[i]) rows.append(row) data = open('data/news_data.csv', 'w') writer = csv.writer(data) writer.writerows(rows) data.close() df1 = pd.read_csv("data/news_data.csv") df2 = pd.read_csv("data/area.csv") crime_locations = list(df1["location"]) rows = [list(df2.columns)] for i in range(len(df2)): loc = df2.iloc[i, 0] if loc == "AIIMS": continue if loc not in crime_locations: rows.append(list(df2.iloc[i])) else: dt = list(df1[df1["location"] == loc].iloc[0]) age_lst = dt[-4].split("\t") crimes = dt[4].split("\t") businessman_count = list(dt[-3].split("\t")).count('1') crime_count = len(age_lst) age_crimes = {"0-21": 0, "22-50": 0, "50+": 0} crime_type = {'burglary': 0, 'robbery': 0, 'murder': 0, 'kidnapping': 0, 'rape': 0} for j in age_lst: age = int(j) if age > 0 and age <= 21: age_crimes["0-21"] += 1 elif age > 21 and age <= 50: age_crimes["22-50"] += 1 elif age > 50: age_crimes["50+"] += 1 for j in crimes: crime_type[j] += 1 lst = list(df2.iloc[i]) lst[1] = crime_count lst[2] = age_crimes["0-21"] lst[3] = age_crimes["22-50"] lst[4] = age_crimes["50+"] lst[5] = crime_type["murder"] lst[6] = crime_type["burglary"] lst[7] = crime_type["robbery"] lst[8] = crime_type["kidnapping"] lst[9] = crime_type["rape"] lst[10] = 100 - dt[-1] lst[-1] = businessman_count rows.append(lst) with open('data/area.csv', 'w') as f: writer = csv.writer(f) writer.writerows(rows)
import os import aylien_news_api configuration = aylien_news_api.Configuration() configuration.api_key['X-AYLIEN-NewsAPI-Application-ID'] = os.environ.get( 'NEWSAPI_APP_ID') configuration.api_key['X-AYLIEN-NewsAPI-Application-Key'] = os.environ.get( 'NEWSAPI_APP_KEY') client = aylien_news_api.ApiClient(configuration) api_instance = aylien_news_api.DefaultApi(client) def get_stories(): """ Returns a list of story objects """ response = api_instance.list_stories(title='Donald Trump', published_at_start='NOW-6HOURS', per_page=100) return response.stories stories = get_stories() clustered_stories = {} clusters = [] for story in stories: if len(story.clusters) > 0: cluster = story.clusters[0]