def test_places_v1(self): test_data = "Lets all go to Virginia beach before it gets too cold to wander outside." response = places(test_data, version=1) self.assertTrue(isinstance(response, list)) sorted_response = sorted(response, key=lambda x: x['confidence'], reverse=True) self.assertTrue('Virginia' in sorted_response[0]['text']) test_data = [test_data] * 2 response = places(test_data, version = 1) self.assertTrue(isinstance(response, list)) sorted_response = [sorted(arr, key=lambda x: x['confidence'], reverse=True) for arr in response] self.assertTrue('Virginia' in sorted_response[0][0]['text'])
def test_places(self): test_data = "Lets all go to Virginia Beach before it gets too cold to wander outside." response = places(test_data) self.assertTrue(isinstance(response, list)) sorted_response = sorted(response, key=lambda x: x['confidence'], reverse=True) self.assertTrue('Virginia' in sorted_response[0]['text']) test_data = [test_data] * 2 response = places(test_data) self.assertTrue(isinstance(response, list)) sorted_response = [sorted(arr, key=lambda x: x['confidence'], reverse=True) for arr in response] self.assertTrue('Virginia' in sorted_response[0][0]['text'])
def test_places_v2(self): test_data = ( "Lets all go to Virginia beach before it gets too cold to wander outside." ) response = places(test_data) self.assertTrue(isinstance(response, list)) sorted_response = sorted(response, key=lambda x: x["confidence"], reverse=True) self.assertTrue("Virginia" in sorted_response[0]["text"]) test_data = [test_data] * 2 response = places(test_data) self.assertTrue(isinstance(response, list)) sorted_response = [ sorted(arr, key=lambda x: x["confidence"], reverse=True) for arr in response ] self.assertTrue("Virginia" in sorted_response[0][0]["text"])
def find_plot_country(book_page_name): page_results = wikipedia.page(book_page_name) page_summary = page_results.summary places = indicoio.places(page_summary) # print('page_results', page_results) # print('page_summary', page_summary) if places == []: print(page_results.section("Plot")) page_plot = page_results.section("Plot") plot_places = indicoio.places(page_plot) pp = plot_places[1]['text'] print(pp) else: plot_places_sum = indicoio.places(page_summary) ps = plot_places_sum[1]['text'] print(ps)
def bing_search(query, name): # base API url url = 'https://api.cognitive.microsoft.com/bing/v5.0/news/search' # url += '?$format=json&$top=1&Query=%27{}%27'.format(quote_plus(query)) # count max at 100 # url += '?q={}&count=10'.format(quote_plus(query)) url += '?q={}&count=100'.format((query)) headers = {'Ocp-Apim-Subscription-Key': BING_KEY} r = requests.get(url, headers=headers) resp = json.loads(r.text) # print(len(resp['value'])) # print(resp) news = [] # after studying the json, learned what the data structure is # set up is dictionary > list > dictionary # need to call the data in the right format for article in resp["value"]: if name in article['name']: title_place = indicoio.places(article['name']) descrip_place = indicoio.places(article['description']) sorted_locations = sorted( descrip_place, key=lambda descrip_place: descrip_place['confidence'], reverse=True) # if there is content in the places, store it in a dic if len(title_place) > 0: temp_dic = {} temp_dic['title'] = article['name'] temp_dic['location'] = title_place[0]['text'] temp_dic['description'] = article['description'] temp_dic['date_pub'] = article['datePublished'] temp_dic['url'] = article['url'] news.append(temp_dic) elif len(sorted_locations) > 0: temp_dic = {} temp_dic['title'] = article['name'] temp_dic['location'] = sorted_locations[0]['text'] temp_dic['description'] = article['description'] temp_dic['date_pub'] = article['datePublished'] temp_dic['url'] = article['url'] news.append(temp_dic) # print(news) return news
def find_plot_country(book_page_name): try: page_results = wikipedia.page(book_page_name) except wikipedia.exceptions.DisambiguationError: return 'DisambiguationError' except wikipedia.exceptions.PageError: return 'PageError' places = [] page_plot = page_results.section('Plot') if page_plot is not None and page_plot != '': places = indicoio.places(page_plot) if places == []: page_summary = page_results.summary places = indicoio.places(page_summary) potentials = [] for item in places: potentials.append((item['confidence'], item['text'])) potentials.sort(reverse=True) if potentials != []: return potentials[0][1] return 'Plot location not found on Wikipedia'
def keywords(blob): things = [] indicoio.config.api_key = 'ab83001ca5c484aa92fc18a5b2d6585c' people = indicoio.people(blob) for person in people: if person['confidence'] > 0.5: things.append(person['text']) places = indicoio.places(blob) for place in places: if place['confidence'] > 0.5: things.append(place['text']) print(things) blob = parse_stop_words(blob) tfdic = tf(blob) things.append(list(tfidf(tfdic, idf(tfdic, blob)).keys())) things = list(set(source_list)) return things
# print(michelle_news[0][3]['description']) # positive = [tweet for tweet in tweet_data if tweet['sentiment']['pos'] > 0] michelle_places = [] # there are four main search_phrases for search_phrases in michelle_news: # for each article in the results for each search phrase, checks if the # the title contains the phrase "Michelle Obama" for article in search_phrases: if 'Michelle Obama' in article['title']: # apply the indico place API to the title # if there are results, keep the article # print(article['title']) place = indicoio.places(article['title']) if len(place) > 0: temp_dic = {} temp_dic['title'] = article['title'] temp_dic['location'] = place[0]['text'] temp_dic['description'] = article['description'] temp_dic['date_pub'] = article['date_pub'] temp_dic['url'] = article['url'] michelle_places.append(temp_dic) print(len(michelle_places)) print(michelle_places) # print(relevant[1]['description']) # sort list to show greatest location with highest confidence to location with lowest
def execute(USERNAME, target, refresh): r_data = io_helper.read_raw(USERNAME, target) og = sys.stdout fpath = io_helper.out_path(USERNAME, target) def analysis(raw='', limit=5, text='', percent=True): global meta_dict # print lines if input is a list of non-dicts # if input is list of dicts, merge dicts and resend to analysis if isinstance(raw, list): for item in raw: if not isinstance(item, dict): print(item) else: create_meta_dict(item) analysis(meta_dict, limit, text, percent) # if input is dict: print k, v pairs # optional args for return limit and description text if isinstance(raw, dict): print(text) ct = 0 for v in sorted(raw, key=raw.get, reverse=True): ct += 1 if ct > limit: break if isinstance(raw[v], float): if percent: per = r'%' else: per = '' print(" " + v, str(round(raw[v] * 100, 2)) + per) else: print(v, raw[v]) print() def create_meta_dict(item): # merge list of dicts into master dict global meta_dict meta_dict[item['text']] = item['confidence'] return meta_dict rClean = '' for i in range(len(r_data)): if r_data[i - 1] == '\\': rClean = rClean[:-1] if r_data[i] != "'": continue if r_data[i] == '*': rClean += ' ' else: rClean += r_data[i] r_data = rClean del rClean indicoio.config.api_key = keycheck.get_key() # Big 5 big5 = { 'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data) } # Meyers briggs mbtiLabels = indicoio.personas(r_data) mbti_dict = { 'architect': 'intj', 'logician': 'intp', 'commander': 'entj', 'debater': 'entp', 'advocate': 'infj', 'mediator': 'infp', 'protagonist': 'enfj', 'campaigner': 'enfp', 'logistician': 'istj', 'defender': 'isfj', 'executive': 'estj', 'consul': 'esfj', 'virtuoso': 'istp', 'adventurer': 'isfp', 'entrepreneur': 'estp', 'entertainer': 'esfp' } def replace_mbti(): for k, v in mbtiLabels.items(): k = k.replace(k, mbti_dict[k]) yield k k = (list(replace_mbti())) v = map(lambda x: x, mbtiLabels.values()) payload = (dict(zip(k, v))) mbti = { 'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True } # Political pol = { 'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1) } # Sentiment sen = { 'text': "Sentiment: ", "payload": { 'Percent positive': indicoio.sentiment(r_data) }, 'ct': 3 } # Emotion emo = { 'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5 } # Keywords kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5} # Text tags tt = { 'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10 } # Place pla = { 'text': "Key locations: ", 'payload': indicoio.places(r_data, version=2), 'ct': 3, 'percent': True } def Karma(USERNAME): import praw import collections kList = [] user_agent = ("N2ITN") r = praw.Reddit(user_agent=user_agent) thing_limit = 100 user = r.get_redditor(USERNAME) gen = user.get_submitted(limit=thing_limit) karma_by_subreddit = {} for thing in gen: subreddit = thing.subreddit.display_name karma_by_subreddit[subreddit] = ( karma_by_subreddit.get(subreddit, 0) + thing.score) for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True): kList.append(str(w) + ': ' + str(karma_by_subreddit[w])) kList.insert(0, 'Karma by Sub') print("\n\t".join(kList[:10])) def show(results): # Accepts bag of dicts, or single dict if not isinstance(results, dict): for X in results: show(X) else: if results == pla and pla['payload'] == []: print("Not enough information to infer place of origin") print() else: i = results analysis(raw=i.get('payload', ''), limit=i.get('ct', 5), text=i.get('text', ''), percent=i.get('percent', True)) with open(fpath, 'w') as outtie: sys.stdout = outtie print(target + USERNAME) print() show([kw, pla, big5, emo, sen, pol, mbti, tt]) Karma(USERNAME) sys.stdout = og return
#with open('textfile.txt', 'r') as myfile: # data = myfile.read().replace('\n', '') #print(data) import os import indicoio # reads from the file which contains the audio to speech content __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) file_contents = open(os.path.join(__location__, "textfile.txt")) text = file_contents.read() # next, feed it into the ML API indicoio.config.api_key = 'd08fbca96c4341957f0a8a0b21d08b5d' print("Political Allegiance: ") print(indicoio.political(text)) print("\n") print("Key Words: ") print(indicoio.keywords(text, version=2)) print("\n") print("Important Persons: ") print(indicoio.people(text)) print("\n") print("Significant Locations: ") print(indicoio.places(text)) print("\n") print("Relevant Organizations: ") print(indicoio.organizations(text))
def execute(USERNAME, target, refresh): r_data = io_helper.read_raw(USERNAME, target) og = sys.stdout fpath = io_helper.out_path(USERNAME, target) def analysis(raw='', limit=5, text='', percent=True): global meta_dict # print lines if input is a list of non-dicts # if input is list of dicts, merge dicts and resend to analysis if isinstance(raw, list): for item in raw: if not isinstance(item, dict): print(item) else: create_meta_dict(item) analysis(meta_dict, limit, text, percent) # if input is dict: print k, v pairs # optional args for return limit and description text if isinstance(raw, dict): print(text) ct = 0 for v in sorted(raw, key=raw.get, reverse=True): ct += 1 if ct > limit: break if isinstance(raw[v], float): if percent: per = r'%' else: per = '' print(" " + v, str(round(raw[v] * 100, 2)) + per) else: print(v, raw[v]) print() def create_meta_dict(item): # merge list of dicts into master dict global meta_dict meta_dict[item['text']] = item['confidence'] return meta_dict rClean = '' for i in range(len(r_data)): if r_data[i - 1] == '\\': rClean = rClean[:-1] if r_data[i] != "'": continue if r_data[i] == '*': rClean += ' ' else: rClean += r_data[i] r_data = rClean del rClean indicoio.config.api_key = keycheck.get_key() # Big 5 big5 = {'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data)} # Meyers briggs mbtiLabels = indicoio.personas(r_data) mbti_dict = { 'architect': 'intj', 'logician': 'intp', 'commander': 'entj', 'debater': 'entp', 'advocate': 'infj', 'mediator': 'infp', 'protagonist': 'enfj', 'campaigner': 'enfp', 'logistician': 'istj', 'defender': 'isfj', 'executive': 'estj', 'consul': 'esfj', 'virtuoso': 'istp', 'adventurer': 'isfp', 'entrepreneur': 'estp', 'entertainer': 'esfp' } def replace_mbti(): for k, v in mbtiLabels.items(): k = k.replace(k, mbti_dict[k]) yield k k = (list(replace_mbti())) v = map(lambda x: x, mbtiLabels.values()) payload = (dict(zip(k, v))) mbti = {'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True} # Political pol = {'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1)} # Sentiment sen = {'text': "Sentiment: ", "payload": {'Percent positive': indicoio.sentiment(r_data)}, 'ct': 3} # Emotion emo = {'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5} # Keywords kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5} # Text tags tt = {'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10} # Place pla = { 'text': "Key locations: ", 'payload': indicoio.places(r_data, version=2), 'ct': 3, 'percent': True } def Karma(USERNAME): import praw import collections kList = [] user_agent = ("N2ITN") r = praw.Reddit(user_agent=user_agent) thing_limit = 100 user = r.get_redditor(USERNAME) gen = user.get_submitted(limit=thing_limit) karma_by_subreddit = {} for thing in gen: subreddit = thing.subreddit.display_name karma_by_subreddit[subreddit] = (karma_by_subreddit.get(subreddit, 0) + thing.score) for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True): kList.append(str(w) + ': ' + str(karma_by_subreddit[w])) kList.insert(0, 'Karma by Sub') print("\n\t".join(kList[:10])) def show(results): # Accepts bag of dicts, or single dict if not isinstance(results, dict): for X in results: show(X) else: if results == pla and pla['payload'] == []: print("Not enough information to infer place of origin") print() else: i = results analysis( raw=i.get('payload', ''), limit=i.get('ct', 5), text=i.get('text', ''), percent=i.get('percent', True) ) with open(fpath, 'w') as outtie: sys.stdout = outtie print(target + USERNAME) print() show([kw, pla, big5, emo, sen, pol, mbti, tt]) # Karma(USERNAME) sys.stdout = og return