def test_political(self): response = political(TEST_DATA) self.assertTrue(isinstance(response, dict)) self.assertEqual(POLITICAL_SET, set(response.keys())) test_string = "pro-choice" response = political(test_string, version=2) self.assertTrue(isinstance(response, dict)) assert response['Libertarian'] > 0.25
def test_political(self): response = political(TEST_DATA) self.assertTrue(isinstance(response, dict)) self.assertEqual(POLITICAL_SET, set(response.keys())) test_string = "pro-choice" response = political(test_string, version=1) self.assertTrue(isinstance(response, dict)) assert response['Libertarian'] > 0.25
def test_political(self): political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green']) test_string = "Guns don't kill people, people kill people." response = political(test_string) self.assertTrue(isinstance(response, dict)) self.assertEqual(political_set, set(response.keys())) test_string = "pro-choice" response = political(test_string) self.assertTrue(isinstance(response, dict)) assert response['Libertarian'] > 0.25
def analysis(data): sentiment = ind.sentiment_hq(data) tags = sort(ind.text_tags(data)) languages = sort(ind.language(data)) politics = sort(ind.political(data)) keywords = sort(ind.keywords(data)) names = sort(ind.named_entities(data)) print "Sentiment", sentiment print "\n\n\nTags" for t in tags: print t[0], float(t[1]) * 100 print "\n\n\nLanguages" for l in languages: print l[0], float(l[1]) * 100 print "\n\n\nPolitical" for p in politics: print p[0], float(p[1]) * 100 print "\n\nkeywords" for k in keywords: print k[0], float(k[1]) * 100
def getResult(strArray): sent = indicoio.sentiment(strArray) pers = indicoio.personality(strArray) poli = indicoio.political(strArray) keyw = indicoio.keywords(strArray) result = dict([("sentiment", sent), ("personality", pers), ("political", 4098), ("keywords", keyw)]) return result
def analyze_tweets_politicaly(self): try: self.political_stats = Factor( indicoio.political( self.person.all_text_as_one().content).items(), 'Political stats') self.plotter.add_factor(self.political_stats) except IndicoError: raise PersonAnalyzerException( 'Error while fetching data from indicoio')
def main(): configure() examples = filter(lambda sen: sen != '', get_examples('input.txt')) # single examples sentiments = indicoio.sentiment_hq(examples) poli = indicoio.political(examples) for i in range(len(examples)): print('============') print('{}\n\n{}\n\n{}\n'.format(examples[i], sentiments[i], poli[i])) print('============')
def get_political(self): political_scores = [0, 0, 0, 0] political_dict = indicoio.political(self.tweet_text) for key, value in political_dict.iteritems(): if key == 'Libertarian': political_scores[0] += value elif key == 'Green': political_scores[1] += value elif key == 'Liberal': political_scores[2] += value elif key == 'Conservative': political_scores[3] += value return political_scores
def analyze_text(journal_num): f= open(folders[journal_num], 'r') line=f.readline() url_dummy=line.split(',') # dummy-> lists or urls,get all urls from the saved file for i in range(len(url_dummy)-1): # get rid of useless html. url_dummy[i]=url_dummy[i][3:-1] url_dummy[-1]=url_dummy[-1][3:-2] ## because last url has on more ' , get rid of it ## do political analysis using indicoio using the API and apped it to the array for j in range(len(url_dummy)): analysis[journal_num].append(indicoio.political(url_dummy[j])) f.close()
def getOverallResult(self, strArray): result = indicoio.personality(strArray) extraversion = [] openness = [] agreeableness = [] conscientiousness = [] for things in result: extraversion.append(things["extraversion"]) openness.append(things["openness"]) agreeableness.append(things["agreeableness"]) conscientiousness.append(things["conscientiousness"]) result = indicoio.political(strArray) libertarian = [] green = [] liberal = [] conservative = [] for things in result: libertarian.append(things["Libertarian"]) green.append(things["Green"]) liberal.append(things["Liberal"]) conservative.append(things["Conservative"]) result = indicoio.sentiment(strArray) t = [ result, libertarian, green, liberal, conservative, extraversion, openness, agreeableness, conscientiousness, ] return t
def gimme_the_goods(text, tag_count=3, persona_count=3): # Consume some of that api for analysis sentiment = indicoio.sentiment(text) # TODO figure out a better way to handle this bug political = indicoio.political(text[0:1100]) personality = indicoio.personality(text) personas = indicoio.personas(text) tags = indicoio.text_tags(text, top_n=tag_count) # Sort the personas to grab top ones top_personas = dict(sorted(personas.items(), key=operator.itemgetter(1), reverse=True)[:persona_count]) # Truncate the values to 3 decimals for cleanliness roundness = 3 sentiment = truncate_values(sentiment, roundness) political = truncate_values(political, roundness) personality = truncate_values(personality, roundness) top_personas = truncate_values(top_personas, roundness) tags = truncate_values(tags, roundness) # Rearrange the personas a bit final_personas = [] for key, value in top_personas.items(): final_personas.append({ 'type': persona_mapping[key], 'name': key, 'value': value, }) return_dict = { 'sentiment': sentiment, 'political': political, 'personality': personality, 'personas': final_personas, 'tags': tags } return return_dict
def gimme_the_goods(text, tag_count=3, persona_count=3): # Consume some of that api for analysis sentiment = indicoio.sentiment(text) # TODO figure out a better way to handle this bug political = indicoio.political(text[0:1100]) personality = indicoio.personality(text) personas = indicoio.personas(text) tags = indicoio.text_tags(text, top_n=tag_count) # Sort the personas to grab top ones top_personas = dict( sorted(personas.items(), key=operator.itemgetter(1), reverse=True)[:persona_count]) # Truncate the values to 3 decimals for cleanliness roundness = 3 sentiment = truncate_values(sentiment, roundness) political = truncate_values(political, roundness) personality = truncate_values(personality, roundness) top_personas = truncate_values(top_personas, roundness) tags = truncate_values(tags, roundness) # Rearrange the personas a bit final_personas = [] for key, value in top_personas.items(): final_personas.append({ 'type': persona_mapping[key], 'name': key, 'value': value, }) return_dict = { 'sentiment': sentiment, 'political': political, 'personality': personality, 'personas': final_personas, 'tags': tags } return return_dict
def askInfo(self, request, dictOrString): if request == "mood": tempDict = indicoio.emotion(self.opinionString, api_key=config["indico_key"]) if dictOrString == "dictionary": return tempDict else: maxVal = max(tempDict.values()) for i in tempDict: if tempDict[i] == maxVal: return i elif request == "party": tempDict = indicoio.political(self.opinionString, api_key=config["indico_key"]) if dictOrString == "dictionary": return tempDict else: maxVal = max(tempDict.values()) for i in tempDict: if tempDict[i] == maxVal: return i else: warnings.warn("invalid request", UserWarning)
#WHACK 2016 import indicoio from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features indicoio.config.api_key = "f09f509655f721e3adac6df5b35abfed" api_key_Lisa = "f09f509655f721e3adac6df5b35abfed" result1 = political("Guns don't kill people. People kill people.") result2 = sentiment("It's so cold outside!") result3 = sentiment("I'm doing okay") result4 = sentiment("indico is so easy to use!") result5 = sentiment( "this api isn't bad at recognizing double negatives either.") result6 = sentiment("I'm doing okay") result7 = sentiment("Best day ever!") #print result1 print result7
def indicoPolitics(tweet): tag_dict = indicoio.political(tweet) return sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:1]
def indicoPoliticsNumber(tweet): tag_dict = indicoio.political(tweet) print(tag_dict) top = sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:1] print(tag_dict[top[0]]) return tag_dict[top[0]]
'liberal':[], 'conservative':[], 'libertarian':[], 'green':[] } for x in range(0, len(response_json["response"]["docs"])): article_to_analyze = response_json["response"]["docs"][x] data['dates'].append(article_to_analyze["pub_date"]) snippet_to_analyze = article_to_analyze["lead_paragraph"] if snippet_to_analyze == None: snippet_to_analyze = response_json["response"]["docs"][x]["snippet"] print "Before leanings" leanings = indicoio.political(snippet_to_analyze) print "After leanings" data['liberal'].append(leanings['Liberal']) data['green'].append(leanings['Green']) data['conservative'].append(leanings['Conservative']) data['libertarian'].append(leanings['Libertarian']) print data['liberal'][x] My_Graph = TrendGraph(data) print My_Graph.getGraphImage() #sample_data = {} #sample_data['dates'] = ["Year 2010", "Year 2011", "Year 2012", "Year 2013"] #sample_data['liberals'] = [0.25, 0.25, 0.25, 0.25] #sample_data['conservatives'] = [0.25, 0.25, 0.25, 0.25]
def test_batch_political(self): response = political([TEST_DATA], version=2) self.assertTrue(isinstance(response, list))
import requests, indicoio, pickle total_responses = [] for p in range(150): params = { "apikey": "PAXCY03JG4B4QZ0Z", "q": "*", "count": 100, "page": p, "view": "full" } resp = requests.get("https://api.fiscalnote.com/bills", params=params).json() for response in resp: try: political = indicoio.political( response["description"], api_key="df43365f5a827d884eb683b836fcb78a"), response["title"] #print response["title"] response["political"] = political total_responses += [response] if len(total_responses) % 100 == 0: print(len(total_responses)) except: pass print(len(total_responses)) pickle.dump(total_responses, open("fiscal_note_raw_data_full.txt", "w+"))
#with open('textfile.txt', 'r') as myfile: # data = myfile.read().replace('\n', '') #print(data) import os import indicoio # reads from the file which contains the audio to speech content __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) file_contents = open(os.path.join(__location__, "textfile.txt")) text = file_contents.read() # next, feed it into the ML API indicoio.config.api_key = 'd08fbca96c4341957f0a8a0b21d08b5d' print("Political Allegiance: ") print(indicoio.political(text)) print("\n") print("Key Words: ") print(indicoio.keywords(text, version=2)) print("\n") print("Important Persons: ") print(indicoio.people(text)) print("\n") print("Significant Locations: ") print(indicoio.places(text)) print("\n") print("Relevant Organizations: ") print(indicoio.organizations(text))
def _get_political(self): return indicoio.political(" ".join(self.text))
def politicalAnalysis(newsMessages): ''' Uses indicoio API to perform political analysis on all posts for all pages in newsMessages. Creates average for each page to allow easy visualization later on. Args: newsMessages: dictionary with page name keys and values that are a list of post messages (strings) Returns: dict: keys: page names values: dict of values with chance that the page is Libertarian, Green, Liberal, or Conservative, as defined by indicoio's political analysis API TODO: insert into sql table? ''' # for debugging purposes; don't wanna make 500 calls 500 times now do we writeUpdates = False try: analysesFile = open('politicalAnalysis.json', 'r') analyses = json.loads(analysesFile.read()) analysesFile.close() # clean-up from previous calls toDelete = [] for s in analyses['average']: if s not in newsMessages: toDelete.append(s) for s in toDelete: writeUpdates = True del analyses['average'][s] del analyses['all'][s] except: writeUpdates = True print("Please wait while a whole ton of requests are made...") analyses = {'all': {}, 'average': {}} for company in newsMessages: # don't recalculate if we alredy did before... if company in analyses['average']: continue writeUpdates = True analyses['all'][company] = indicoio.political(newsMessages[company]) # analyses['all'][company] is now a list of batch results, an analysis for each post libertarianSum = 0 greenSum = 0 liberalSum = 0 conservativeSum = 0 # so let's go get the average and classify this page for res in analyses['all'][company]: libertarianSum += res['Libertarian'] greenSum += res['Green'] liberalSum += res['Liberal'] conservativeSum += res['Conservative'] analyses['average'][company] = {'Libertarian': libertarianSum/len(analyses['all'][company]), 'Green': greenSum/len(analyses['all'][company]), 'Liberal': liberalSum/len(analyses['all'][company]), 'Conservative': conservativeSum/len(analyses['all'][company])} # save if there were changes if writeUpdates: analysesFile = open('politicalAnalysis.json', 'w') analysesFile.write(json.dumps(analyses, indent=2)) analysesFile.close() return analyses['average']
import indicoio indicoio.config.api_key = '5eced02755ca5225746b0a83c0a03104' # Multiple examples for political indicoio.political("I have a constitutional right to bear arms!") indicoio.political("I have a constitutional right to fee speech!") indicoio.political("Through the constitution I do not have to quarter troops!") indicoio.political("It is my right as a women to vote")
def get_political(): if request.method == 'POST': data = dict(request.form)['data_to_analyze'] return json.dumps({ 'political': indicoio.political(data) })
'liberal': [], 'conservative': [], 'libertarian': [], 'green': [] } for x in range(0, len(response_json["response"]["docs"])): article_to_analyze = response_json["response"]["docs"][x] data['dates'].append(article_to_analyze["pub_date"]) snippet_to_analyze = article_to_analyze["lead_paragraph"] if snippet_to_analyze == None: snippet_to_analyze = response_json["response"]["docs"][x]["snippet"] print "Before leanings" leanings = indicoio.political(snippet_to_analyze) print "After leanings" data['liberal'].append(leanings['Liberal']) data['green'].append(leanings['Green']) data['conservative'].append(leanings['Conservative']) data['libertarian'].append(leanings['Libertarian']) print data['liberal'][x] My_Graph = TrendGraph(data) print My_Graph.getGraphImage() #sample_data = {} #sample_data['dates'] = ["Year 2010", "Year 2011", "Year 2012", "Year 2013"] #sample_data['liberals'] = [0.25, 0.25, 0.25, 0.25] #sample_data['conservatives'] = [0.25, 0.25, 0.25, 0.25]
passwd="", db="nytimes") cur = db.cursor() sentimentValues = [] politicalValues = [] personalityValues = [] emotionValues = [] start = 3474 itr = start for i in range(start, num + start): print("starting chunk " + str(itr) + " !") itr += 1 curText = allText[i][:1000] sentimentValues = indicoio.sentiment_hq(curText) politicalValues = indicoio.political(curText) personalityValues = indicoio.personality(curText) emotionValues = indicoio.emotion(curText) abst = repr(allText[i]).replace("'", '').replace('"', '') SQLline = 'INSERT INTO `article`(`abst`, `url`, `sent`, `serv`, `gree`, `libe`, `libt`, `agre`, `cons`, `extr`, `open`, `ange`, `fear`, `joyy`, `sadd`, `surp`) VALUES ("' + abst + '" ,"' + repr( articles[i]["url"]) + '",' + str(sentimentValues) + ',' + str( politicalValues["Conservative"] ) + ',' + str(politicalValues["Green"]) + ',' + str( politicalValues["Liberal"] ) + ',' + str(politicalValues["Libertarian"]) + ',' + str( personalityValues["agreeableness"] ) + ',' + str(personalityValues["conscientiousness"]) + ',' + str( personalityValues["extraversion"]) + ',' + str( personalityValues["openness"]) + ',' + str( emotionValues["anger"]) + ',' + str( emotionValues["fear"]) + ',' + str(
def test_batch_political_v2(self): test_data = [TEST_DATA, TEST_DATA] response = political(test_data, version=2) self.assertTrue(isinstance(response, list)) self.assertEqual(POLITICAL_SET, set(response[0].keys())) self.assertEqual(response[0], response[1])
api = twitter.Api(consumer_key='...', consumer_secret='...', access_token_key='...', access_token_secret='...') indicoio.config.api_key = "ac9e3e9c5e603a59d6752fad66fdeb51" politicianTwitters = ['LincolnChafee', 'HillaryClinton', 'lessig', 'MartinOMalley', 'BernieSanders', 'JimWebbUSA', 'JebBush', 'RealBenCarson', 'ChrisChristie', 'tedcruz', 'CarlyFiorina', 'gov_gilmore', 'GrahamBlog', 'GovMikeHuckabee', 'BobbyJindal', 'JohnKasich', 'GovernorPataki', 'RandPaul', 'marcorubio', 'RickSantorum', 'ScottWalker', 'realDonaldTrump'] output = open('politicianScores.txt', 'w') libertarian = 'Libertarian' green = 'Green' liberal = 'Liberal' conservative = 'Conservative' for user in politicianTwitters: statuses = api.GetUserTimeline(screen_name=user, count=200) l = [s.text for s in statuses] count = len(l) scores = {libertarian: 0, green: 0, liberal: 0, conservative: 0} for entry in l: politicianScore = political(entry) scores[libertarian] += politicianScore[u'Libertarian'] scores[green] += politicianScore[u'Green'] scores[liberal] += politicianScore[u'Liberal'] scores[conservative] += politicianScore[u'Conservative'] scores[libertarian] /= count scores[green] /= count scores[liberal] /= count scores[conservative] /= count output.write(user + " " + libertarian + ": " + str(scores[libertarian]) + green + ": " + str(scores[green]) + liberal + ": " + str(scores[liberal]) + conservative + ": " + str(scores[conservative]) + '\n')
def test_batch_political(self): test_data = ["Guns don't kill people, people kill people."] response = political(test_data, api_key=self.api_key) self.assertTrue(isinstance(response, list))
def bias(phrase): biasDict = indicoio.political("phrase") biasKeys = biasDict.keys() for x in range(0, 4): biasList[x] = biasDict[biasKeys[x]]
def test_political_v2(self): response = political(TEST_DATA, version=2) self.assertTrue(isinstance(response, dict)) self.assertEqual(POLITICAL_SET, set(response.keys()))
import requests, indicoio, pickle total_responses = [] for p in range(150): params = { "apikey" : "PAXCY03JG4B4QZ0Z", "q" : "*", "count" : 100, "page" : p, "view" : "full" } resp = requests.get("https://api.fiscalnote.com/bills", params=params).json() for response in resp: try: political = indicoio.political(response["description"], api_key="df43365f5a827d884eb683b836fcb78a"), response["title"] #print response["title"] response["political"] = political total_responses += [response] if len(total_responses) % 100 == 0: print(len(total_responses)) except: pass print(len(total_responses)) pickle.dump(total_responses, open("fiscal_note_raw_data_full.txt", "w+"))
for sent in sentences: for word in nltk.word_tokenize(sent.lower()): if word in word_frequencies.keys(): if len(sent.split(' ')) < 30: if sent not in sentence_scores.keys(): sentence_scores[sent] = word_frequencies[word] else: sentence_scores[sent] += word_frequencies[word] summary_sentences = heapq.nlargest(len(sentences), sentence_scores, key=sentence_scores.get) video_info["summary_variable"] = summary_sentences politicalValues = indicoio.political(text) personalityValues = indicoio.personality(text) emotionValues = indicoio.emotion(text) video_info["political"] = politicalValues video_info["personality"] = personalityValues video_info["emotion"] = emotionValues video_info["url"] = url class MyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, numpy.integer): return int(obj) elif isinstance(obj, numpy.floating): return float(obj)
clean_tweet = html.unescape(clean_tweet) clean_tweet = " ".join(filter(lambda x:x[0]!="@", clean_tweet.split())) clean_tweet = " ".join(filter(lambda x:x[:4]!="http", clean_tweet.split())) tweets[index] = clean_tweet print("There are " + str(len(tweets)) + " about to be printed!") print("\n-\n".join(tweets)) exit() # join the tweets into a big ol paragraph combined_tweets = " ".join(tweets) # get some sweet stats sentiment = i.sentiment(combined_tweets) personas = i.personas(combined_tweets) political = i.political(combined_tweets) # sorty sort sorted_personas = sorted(personas.items(), key=operator.itemgetter(1), reverse=True) sorted_political = sorted(political.items(), key=operator.itemgetter(1), reverse=True) print() print(sorted_personas[:3]) print(sorted_political[0]) print(sentiment) # Show rate limit status for this application
data_str = data_bytes.decode('utf-8') tweet_data = json.loads(data_str) just_tweets = [tweet["tweet_text"] for tweet in tweet_data] # # single example # indicoio.political("I have a constitutional right to bear arms!") max_send = 30000 # batch example finished = False i = 12 while not finished: up_bound = min((i+1)*max_send, len(just_tweets)) if up_bound == len(just_tweets): finished = True to_send = just_tweets[i*max_send:up_bound] stuff = indicoio.political(to_send) print(len(stuff)) stuff2 = {"stuff": stuff} print ("dumping", i) json.dump(stuff2,(open("testing5.json", 'a'))) print ("done dumping", i) i += 1
import indicoio indicoio.config.api_key = '5eced02755ca5225746b0a83c0a03104' # single example indicoio.political("I have a constitutional right to bear arms!")
def info(yt_url): video_info = {} url = "https://www.youtube.com/watch?v=" + yt_url yt = YouTube(url) video_info["timestamped"] = [] # get the audio file a = yt.captions.get_by_language_code('en') caps = a.generate_srt_captions() caps = caps.split("\n\n") caps = [i.split("\n") for i in caps] text = "" for i in caps: for j in i[2:]: text += j line = " ".join(i[2:]) line = re.sub(r"<[^<]+?>", '', line) try: video_info["timestamped"].append([ i[1].split(" --> ")[0], i[1].split(" --> ")[1], line ]) except: pass text = re.sub(r"<[^>]*>", " ", text) text = re.sub(r'\s+', ' ', text) text = re.sub(r"<[^<]+?>", '', text) text = text.replace("...", ".") text = text.replace("…", "") text = text.replace(".", ". ") text = re.sub(r'\s+', ' ', text) sentences = nltk.sent_tokenize(text) video_info["full_transcript"] = text stopwords = nltk.corpus.stopwords.words('english') word_frequencies = {} for word in nltk.word_tokenize(text): if word not in stopwords: if word not in word_frequencies.keys(): word_frequencies[word] = 1 else: word_frequencies[word] += 1 maximum_frequency = max(word_frequencies.values()) for word in word_frequencies.keys(): word_frequencies[word] = (word_frequencies[word] / maximum_frequency) sentence_scores = {} for sent in sentences: for word in nltk.word_tokenize(sent.lower()): if word in word_frequencies.keys(): if len(sent.split(' ')) < 30: if sent not in sentence_scores.keys(): sentence_scores[sent] = word_frequencies[word] else: sentence_scores[sent] += word_frequencies[word] summary_sentences = heapq.nlargest(len(sentences), sentence_scores, key=sentence_scores.get) video_info["summary_variable"] = summary_sentences politicalValues = indicoio.political(text) personalityValues = indicoio.personality(text) emotionValues = indicoio.emotion(text) video_info["political"] = politicalValues video_info["personality"] = personalityValues video_info["emotion"] = emotionValues video_info["sentiment"] = indicoio.sentiment(text) video_info["url"] = url class MyEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, numpy.integer): return int(obj) elif isinstance(obj, numpy.floating): return float(obj) elif isinstance(obj, numpy.ndarray): return obj.tolist() else: return super(MyEncoder, self).default(obj) return json.dumps(video_info, cls=MyEncoder)
def execute(USERNAME, target, refresh): r_data = io_helper.read_raw(USERNAME, target) og = sys.stdout fpath = io_helper.out_path(USERNAME, target) def analysis(raw='', limit=5, text='', percent=True): global meta_dict # print lines if input is a list of non-dicts # if input is list of dicts, merge dicts and resend to analysis if isinstance(raw, list): for item in raw: if not isinstance(item, dict): print(item) else: create_meta_dict(item) analysis(meta_dict, limit, text, percent) # if input is dict: print k, v pairs # optional args for return limit and description text if isinstance(raw, dict): print(text) ct = 0 for v in sorted(raw, key=raw.get, reverse=True): ct += 1 if ct > limit: break if isinstance(raw[v], float): if percent: per = r'%' else: per = '' print(" " + v, str(round(raw[v] * 100, 2)) + per) else: print(v, raw[v]) print() def create_meta_dict(item): # merge list of dicts into master dict global meta_dict meta_dict[item['text']] = item['confidence'] return meta_dict rClean = '' for i in range(len(r_data)): if r_data[i - 1] == '\\': rClean = rClean[:-1] if r_data[i] != "'": continue if r_data[i] == '*': rClean += ' ' else: rClean += r_data[i] r_data = rClean del rClean indicoio.config.api_key = keycheck.get_key() # Big 5 big5 = { 'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data) } # Meyers briggs mbtiLabels = indicoio.personas(r_data) mbti_dict = { 'architect': 'intj', 'logician': 'intp', 'commander': 'entj', 'debater': 'entp', 'advocate': 'infj', 'mediator': 'infp', 'protagonist': 'enfj', 'campaigner': 'enfp', 'logistician': 'istj', 'defender': 'isfj', 'executive': 'estj', 'consul': 'esfj', 'virtuoso': 'istp', 'adventurer': 'isfp', 'entrepreneur': 'estp', 'entertainer': 'esfp' } def replace_mbti(): for k, v in mbtiLabels.items(): k = k.replace(k, mbti_dict[k]) yield k k = (list(replace_mbti())) v = map(lambda x: x, mbtiLabels.values()) payload = (dict(zip(k, v))) mbti = { 'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True } # Political pol = { 'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1) } # Sentiment sen = { 'text': "Sentiment: ", "payload": { 'Percent positive': indicoio.sentiment(r_data) }, 'ct': 3 } # Emotion emo = { 'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5 } # Keywords kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5} # Text tags tt = { 'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10 } # Place pla = { 'text': "Key locations: ", 'payload': indicoio.places(r_data, version=2), 'ct': 3, 'percent': True } def Karma(USERNAME): import praw import collections kList = [] user_agent = ("N2ITN") r = praw.Reddit(user_agent=user_agent) thing_limit = 100 user = r.get_redditor(USERNAME) gen = user.get_submitted(limit=thing_limit) karma_by_subreddit = {} for thing in gen: subreddit = thing.subreddit.display_name karma_by_subreddit[subreddit] = ( karma_by_subreddit.get(subreddit, 0) + thing.score) for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True): kList.append(str(w) + ': ' + str(karma_by_subreddit[w])) kList.insert(0, 'Karma by Sub') print("\n\t".join(kList[:10])) def show(results): # Accepts bag of dicts, or single dict if not isinstance(results, dict): for X in results: show(X) else: if results == pla and pla['payload'] == []: print("Not enough information to infer place of origin") print() else: i = results analysis(raw=i.get('payload', ''), limit=i.get('ct', 5), text=i.get('text', ''), percent=i.get('percent', True)) with open(fpath, 'w') as outtie: sys.stdout = outtie print(target + USERNAME) print() show([kw, pla, big5, emo, sen, pol, mbti, tt]) Karma(USERNAME) sys.stdout = og return
def test_batch_political(self): response = political([TEST_DATA], version=1) self.assertTrue(isinstance(response, list))
# get rid of useless html. url_dummy[i]=url_dummy[i][3:-1] print url_dummy[i] i=i+1 ## because last url has on more ' , get rid of it url_dummy[-1]=url_dummy[-1][3:-2] print len(url_dummy) ## do political analysis using indicoio using the API and apped it to the array analysis=[] j=0 # You could also use "for item in url_dummy" syntax here, like you did # with "for textfiles in folder" for j in range(len(url_dummy)): analysis.append(indicoio.political(url_dummy[j])) j=j+1 # you shouldn't do this -- the for loop already increments j ## get the average of the analysis ## add all the results of the urls and divide with the number of urls sum_stats=[0,0,0,0] #sum of all stats gained from indicoio for i in range(len(analysis)): sum_stats[0]=sum_stats[0]+analysis[i]["Libertarian"] sum_stats[1]=sum_stats[1]+analysis[i]["Green"] sum_stats[2]=sum_stats[2]+analysis[i]["Liberal"] sum_stats[3]=sum_stats[3]+analysis[i]["Conservative"] i=i+1 # again, you shouldn't do this print sum_stats aver_stats=[0,0,0,0]
#WHACK 2016 import indicoio from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features indicoio.config.api_key = "f09f509655f721e3adac6df5b35abfed" api_key_Lisa = "f09f509655f721e3adac6df5b35abfed" result1 = political("Guns don't kill people. People kill people.") result2 = sentiment("It's so cold outside!") result3 = sentiment("I'm doing okay") result4 = sentiment("indico is so easy to use!") result5 = sentiment("this api isn't bad at recognizing double negatives either.") result6 = sentiment("I'm doing okay") result7 = sentiment("Best day ever!") #print result1 print result7
clean_tweet = " ".join(filter(lambda x: x[0] != "@", clean_tweet.split())) clean_tweet = " ".join( filter(lambda x: x[:4] != "http", clean_tweet.split())) tweets[index] = clean_tweet print("There are " + str(len(tweets)) + " about to be printed!") print("\n-\n".join(tweets)) exit() # join the tweets into a big ol paragraph combined_tweets = " ".join(tweets) # get some sweet stats sentiment = i.sentiment(combined_tweets) personas = i.personas(combined_tweets) political = i.political(combined_tweets) # sorty sort sorted_personas = sorted(personas.items(), key=operator.itemgetter(1), reverse=True) sorted_political = sorted(political.items(), key=operator.itemgetter(1), reverse=True) print() print(sorted_personas[:3]) print(sorted_political[0]) print(sentiment) # Show rate limit status for this application
def get_sentiment(self, parsed, participants): ''' Calculate semtiment values for each line of each participant. Inputs: parsed: (dict of lists) mapping line to speaker participants: (list) every speaker in debate Returns: (dict of lists) containing sentiment (pattern), sentiment (indico), political sentiment (indico)] ''' # Pre-allocating variables, etc sentiments = {} senti_patt = {} senti_indi = {} poli_senti = {} senti = 0 average_count = 0 for participant in participants: senti_patt[participant] = 0 senti_indi[participant] = 0 poli_senti[participant] = 0 # Running pattern sentiment on each line for each participant for participant in participants: senti_max = 0 senti_min = 0 just_senti = 0 for line in parsed[participant]: just_senti = sentiment(line) senti += just_senti[0] average_count += 1 # Total line count, to use to average if just_senti[0] > senti_max: #Finding max and min values senti_max = just_senti[0] if just_senti[0] < senti_min: senti_min = just_senti[0] # Writing average sentiment and max/min data to return senti_patt[participant] = [(senti/average_count+1)/2.0, (senti_max+1)/2.0, (senti_min+1)/2.0] # Running indico sentiment on each line for each participant for participant in participants: senti_max = 0 senti_min = 0 senti = 0 average_count = 0 it = 0 # Debug counter curr_senti = 0 for line in parsed[participant]: print it try: curr_senti = indicoio.sentiment(line) senti += curr_senti average_count += 1 except: pass it += 1 # Finding max and min values if curr_senti > senti_max: senti_max = curr_senti if curr_senti < senti_min: senti_min = curr_senti senti_indi[participant] = [senti/average_count, senti_max, senti_min] #writing average sentiment and max/min data to return for participant in participants: max_con = 0 min_con = 0 max_lib = 0 min_lib = 0 # Determining political sentiment for each participant conserv = 0 lib = 0 average_count = 0 poli_get = {'Conservative': 0, 'Liberal': 0} for line in parsed[participant]: print it try: # Attempts to call poli sentiment function on each line poli_get = indicoio.political(line) conserv += poli_get['Conservative'] # Adds to each count lib += poli_get['Liberal'] average_count += 1 except: pass it += 1 if max_con > poli_get['Conservative']: # Sets max and min values as it cycles through max_con = poli_get['Conservative'] if min_con < poli_get['Conservative']: min_con = poli_get['Conservative'] if max_lib > poli_get['Liberal']: max_lib = poli_get['Liberal'] if min_lib < poli_get['Liberal']: min_lib = poli_get['Liberal'] poli_senti[participant] = [conserv/average_count, max_con, min_con, lib/average_count, max_lib, min_lib] # Creating output dictionary with all data collected for participant in participants: sentiments[participant] = [senti_patt[participant], senti_indi[participant], poli_senti[participant]] return sentiments
def execute(USERNAME, target, refresh): r_data = io_helper.read_raw(USERNAME, target) og = sys.stdout fpath = io_helper.out_path(USERNAME, target) def analysis(raw='', limit=5, text='', percent=True): global meta_dict # print lines if input is a list of non-dicts # if input is list of dicts, merge dicts and resend to analysis if isinstance(raw, list): for item in raw: if not isinstance(item, dict): print(item) else: create_meta_dict(item) analysis(meta_dict, limit, text, percent) # if input is dict: print k, v pairs # optional args for return limit and description text if isinstance(raw, dict): print(text) ct = 0 for v in sorted(raw, key=raw.get, reverse=True): ct += 1 if ct > limit: break if isinstance(raw[v], float): if percent: per = r'%' else: per = '' print(" " + v, str(round(raw[v] * 100, 2)) + per) else: print(v, raw[v]) print() def create_meta_dict(item): # merge list of dicts into master dict global meta_dict meta_dict[item['text']] = item['confidence'] return meta_dict rClean = '' for i in range(len(r_data)): if r_data[i - 1] == '\\': rClean = rClean[:-1] if r_data[i] != "'": continue if r_data[i] == '*': rClean += ' ' else: rClean += r_data[i] r_data = rClean del rClean indicoio.config.api_key = keycheck.get_key() # Big 5 big5 = {'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data)} # Meyers briggs mbtiLabels = indicoio.personas(r_data) mbti_dict = { 'architect': 'intj', 'logician': 'intp', 'commander': 'entj', 'debater': 'entp', 'advocate': 'infj', 'mediator': 'infp', 'protagonist': 'enfj', 'campaigner': 'enfp', 'logistician': 'istj', 'defender': 'isfj', 'executive': 'estj', 'consul': 'esfj', 'virtuoso': 'istp', 'adventurer': 'isfp', 'entrepreneur': 'estp', 'entertainer': 'esfp' } def replace_mbti(): for k, v in mbtiLabels.items(): k = k.replace(k, mbti_dict[k]) yield k k = (list(replace_mbti())) v = map(lambda x: x, mbtiLabels.values()) payload = (dict(zip(k, v))) mbti = {'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True} # Political pol = {'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1)} # Sentiment sen = {'text': "Sentiment: ", "payload": {'Percent positive': indicoio.sentiment(r_data)}, 'ct': 3} # Emotion emo = {'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5} # Keywords kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5} # Text tags tt = {'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10} # Place pla = { 'text': "Key locations: ", 'payload': indicoio.places(r_data, version=2), 'ct': 3, 'percent': True } def Karma(USERNAME): import praw import collections kList = [] user_agent = ("N2ITN") r = praw.Reddit(user_agent=user_agent) thing_limit = 100 user = r.get_redditor(USERNAME) gen = user.get_submitted(limit=thing_limit) karma_by_subreddit = {} for thing in gen: subreddit = thing.subreddit.display_name karma_by_subreddit[subreddit] = (karma_by_subreddit.get(subreddit, 0) + thing.score) for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True): kList.append(str(w) + ': ' + str(karma_by_subreddit[w])) kList.insert(0, 'Karma by Sub') print("\n\t".join(kList[:10])) def show(results): # Accepts bag of dicts, or single dict if not isinstance(results, dict): for X in results: show(X) else: if results == pla and pla['payload'] == []: print("Not enough information to infer place of origin") print() else: i = results analysis( raw=i.get('payload', ''), limit=i.get('ct', 5), text=i.get('text', ''), percent=i.get('percent', True) ) with open(fpath, 'w') as outtie: sys.stdout = outtie print(target + USERNAME) print() show([kw, pla, big5, emo, sen, pol, mbti, tt]) # Karma(USERNAME) sys.stdout = og return