def test_political(self):
        response = political(TEST_DATA)

        self.assertTrue(isinstance(response, dict))
        self.assertEqual(POLITICAL_SET, set(response.keys()))

        test_string = "pro-choice"
        response = political(test_string, version=2)

        self.assertTrue(isinstance(response, dict))
        assert response['Libertarian'] > 0.25
Ejemplo n.º 2
0
    def test_political(self):
        response = political(TEST_DATA)

        self.assertTrue(isinstance(response, dict))
        self.assertEqual(POLITICAL_SET, set(response.keys()))

        test_string = "pro-choice"
        response = political(test_string, version=1)

        self.assertTrue(isinstance(response, dict))
        assert response['Libertarian'] > 0.25
Ejemplo n.º 3
0
    def test_political(self):
        political_set = set(['Libertarian', 'Liberal', 'Conservative', 'Green'])
        test_string = "Guns don't kill people, people kill people."
        response = political(test_string)

        self.assertTrue(isinstance(response, dict))
        self.assertEqual(political_set, set(response.keys()))

        test_string = "pro-choice"
        response = political(test_string)

        self.assertTrue(isinstance(response, dict))
        assert response['Libertarian'] > 0.25
Ejemplo n.º 4
0
def analysis(data):
    sentiment = ind.sentiment_hq(data)
    tags = sort(ind.text_tags(data))
    languages = sort(ind.language(data))
    politics = sort(ind.political(data))
    keywords = sort(ind.keywords(data))
    names = sort(ind.named_entities(data))

    print "Sentiment", sentiment

    print "\n\n\nTags"
    for t in tags:
        print t[0], float(t[1]) * 100

    print "\n\n\nLanguages"
    for l in languages:
        print l[0], float(l[1]) * 100

    print "\n\n\nPolitical"
    for p in politics:
        print p[0], float(p[1]) * 100
    
    print "\n\nkeywords"
    for k in keywords:
        print k[0], float(k[1]) * 100
Ejemplo n.º 5
0
    def getResult(strArray):
        sent = indicoio.sentiment(strArray)
        pers = indicoio.personality(strArray)
        poli = indicoio.political(strArray)
        keyw = indicoio.keywords(strArray)

        result = dict([("sentiment", sent), ("personality", pers), ("political", 4098), ("keywords", keyw)])
        return result
Ejemplo n.º 6
0
 def analyze_tweets_politicaly(self):
     try:
         self.political_stats = Factor(
             indicoio.political(
                 self.person.all_text_as_one().content).items(),
             'Political stats')
         self.plotter.add_factor(self.political_stats)
     except IndicoError:
         raise PersonAnalyzerException(
             'Error while fetching data from indicoio')
Ejemplo n.º 7
0
def main():
    configure()
    examples = filter(lambda sen: sen != '', get_examples('input.txt'))
    # single examples
    sentiments = indicoio.sentiment_hq(examples)
    poli = indicoio.political(examples)
    for i in range(len(examples)):
        print('============')
        print('{}\n\n{}\n\n{}\n'.format(examples[i], sentiments[i], poli[i]))
        print('============')
Ejemplo n.º 8
0
 def get_political(self):
     political_scores = [0, 0, 0, 0]
     political_dict = indicoio.political(self.tweet_text)
     for key, value in political_dict.iteritems():
         if key == 'Libertarian':
             political_scores[0] += value
         elif key == 'Green':
             political_scores[1] += value
         elif key == 'Liberal':
             political_scores[2] += value
         elif key == 'Conservative':
             political_scores[3] += value
     return political_scores
def analyze_text(journal_num):
	f= open(folders[journal_num], 'r')

	line=f.readline()
	url_dummy=line.split(',') # dummy-> lists or urls,get all urls from the saved file
	
	for i in range(len(url_dummy)-1):
		# get rid of useless html.
		url_dummy[i]=url_dummy[i][3:-1]	
	url_dummy[-1]=url_dummy[-1][3:-2] ## because last url has on more ' , get rid of it
	

	## do political analysis using indicoio using the API and apped it to the array
	
	for j in range(len(url_dummy)):
		analysis[journal_num].append(indicoio.political(url_dummy[j]))
	f.close()
Ejemplo n.º 10
0
    def getOverallResult(self, strArray):

        result = indicoio.personality(strArray)

        extraversion = []
        openness = []
        agreeableness = []
        conscientiousness = []

        for things in result:
            extraversion.append(things["extraversion"])
            openness.append(things["openness"])
            agreeableness.append(things["agreeableness"])
            conscientiousness.append(things["conscientiousness"])

        result = indicoio.political(strArray)

        libertarian = []
        green = []
        liberal = []
        conservative = []

        for things in result:
            libertarian.append(things["Libertarian"])
            green.append(things["Green"])
            liberal.append(things["Liberal"])
            conservative.append(things["Conservative"])

        result = indicoio.sentiment(strArray)

        t = [
            result,
            libertarian,
            green,
            liberal,
            conservative,
            extraversion,
            openness,
            agreeableness,
            conscientiousness,
        ]

        return t
Ejemplo n.º 11
0
def gimme_the_goods(text, tag_count=3, persona_count=3):
        
    # Consume some of that api for analysis
    sentiment = indicoio.sentiment(text)
    # TODO figure out a better way to handle this bug
    political = indicoio.political(text[0:1100])
    personality = indicoio.personality(text)
    personas = indicoio.personas(text)
    tags = indicoio.text_tags(text, top_n=tag_count)

    # Sort the personas to grab top ones
    top_personas = dict(sorted(personas.items(),
                        key=operator.itemgetter(1),
                        reverse=True)[:persona_count])
    
    # Truncate the values to 3 decimals for cleanliness
    roundness = 3
    sentiment = truncate_values(sentiment, roundness)
    political = truncate_values(political, roundness)
    personality = truncate_values(personality, roundness)
    top_personas = truncate_values(top_personas, roundness)
    tags = truncate_values(tags, roundness)
    
    # Rearrange the personas a bit
    final_personas = []
    for key, value in top_personas.items():
        final_personas.append({
            'type': persona_mapping[key],
            'name': key,
            'value': value,
        })
    
    return_dict = {
        'sentiment': sentiment,
        'political': political,
        'personality': personality,
        'personas': final_personas,
        'tags': tags
    }

    return return_dict
Ejemplo n.º 12
0
def gimme_the_goods(text, tag_count=3, persona_count=3):

    # Consume some of that api for analysis
    sentiment = indicoio.sentiment(text)
    # TODO figure out a better way to handle this bug
    political = indicoio.political(text[0:1100])
    personality = indicoio.personality(text)
    personas = indicoio.personas(text)
    tags = indicoio.text_tags(text, top_n=tag_count)

    # Sort the personas to grab top ones
    top_personas = dict(
        sorted(personas.items(), key=operator.itemgetter(1),
               reverse=True)[:persona_count])

    # Truncate the values to 3 decimals for cleanliness
    roundness = 3
    sentiment = truncate_values(sentiment, roundness)
    political = truncate_values(political, roundness)
    personality = truncate_values(personality, roundness)
    top_personas = truncate_values(top_personas, roundness)
    tags = truncate_values(tags, roundness)

    # Rearrange the personas a bit
    final_personas = []
    for key, value in top_personas.items():
        final_personas.append({
            'type': persona_mapping[key],
            'name': key,
            'value': value,
        })

    return_dict = {
        'sentiment': sentiment,
        'political': political,
        'personality': personality,
        'personas': final_personas,
        'tags': tags
    }

    return return_dict
Ejemplo n.º 13
0
 def askInfo(self, request, dictOrString):
     if request == "mood":
         tempDict = indicoio.emotion(self.opinionString,
                                     api_key=config["indico_key"])
         if dictOrString == "dictionary":
             return tempDict
         else:
             maxVal = max(tempDict.values())
             for i in tempDict:
                 if tempDict[i] == maxVal:
                     return i
     elif request == "party":
         tempDict = indicoio.political(self.opinionString,
                                       api_key=config["indico_key"])
         if dictOrString == "dictionary":
             return tempDict
         else:
             maxVal = max(tempDict.values())
             for i in tempDict:
                 if tempDict[i] == maxVal:
                     return i
     else:
         warnings.warn("invalid request", UserWarning)
Ejemplo n.º 14
0
#WHACK 2016

import indicoio
from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features

indicoio.config.api_key = "f09f509655f721e3adac6df5b35abfed"
api_key_Lisa = "f09f509655f721e3adac6df5b35abfed"

result1 = political("Guns don't kill people. People kill people.")
result2 = sentiment("It's so cold outside!")
result3 = sentiment("I'm doing okay")
result4 = sentiment("indico is so easy to use!")
result5 = sentiment(
    "this api isn't bad at recognizing double negatives either.")

result6 = sentiment("I'm doing okay")
result7 = sentiment("Best day ever!")

#print result1
print result7
Ejemplo n.º 15
0
def indicoPolitics(tweet):
    tag_dict = indicoio.political(tweet)
    return sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:1]
Ejemplo n.º 16
0
def indicoPoliticsNumber(tweet):
    tag_dict = indicoio.political(tweet)
    print(tag_dict)
    top = sorted(tag_dict.keys(), key=lambda x: tag_dict[x], reverse=True)[:1]
    print(tag_dict[top[0]])
    return tag_dict[top[0]]
Ejemplo n.º 17
0
    'liberal':[],
    'conservative':[],
    'libertarian':[],
    'green':[]
}

for x in range(0, len(response_json["response"]["docs"])):
    article_to_analyze = response_json["response"]["docs"][x]
    data['dates'].append(article_to_analyze["pub_date"])

    snippet_to_analyze = article_to_analyze["lead_paragraph"]
    if snippet_to_analyze == None:
        snippet_to_analyze = response_json["response"]["docs"][x]["snippet"]
    
    print "Before leanings"
    leanings = indicoio.political(snippet_to_analyze)
    print "After leanings"

    data['liberal'].append(leanings['Liberal'])
    data['green'].append(leanings['Green'])
    data['conservative'].append(leanings['Conservative'])
    data['libertarian'].append(leanings['Libertarian'])
    print data['liberal'][x]

My_Graph = TrendGraph(data)
print My_Graph.getGraphImage()

#sample_data = {}
#sample_data['dates']         =    ["Year 2010", "Year 2011", "Year 2012", "Year 2013"]
#sample_data['liberals']      =    [0.25, 0.25, 0.25, 0.25]
#sample_data['conservatives'] =    [0.25, 0.25, 0.25, 0.25]
 def test_batch_political(self):
     response = political([TEST_DATA], version=2)
     self.assertTrue(isinstance(response, list))
Ejemplo n.º 19
0
import requests, indicoio, pickle

total_responses = []

for p in range(150):
    params = {
        "apikey": "PAXCY03JG4B4QZ0Z",
        "q": "*",
        "count": 100,
        "page": p,
        "view": "full"
    }

    resp = requests.get("https://api.fiscalnote.com/bills",
                        params=params).json()
    for response in resp:
        try:
            political = indicoio.political(
                response["description"],
                api_key="df43365f5a827d884eb683b836fcb78a"), response["title"]
            #print response["title"]
            response["political"] = political
            total_responses += [response]
            if len(total_responses) % 100 == 0:
                print(len(total_responses))
        except:
            pass

print(len(total_responses))

pickle.dump(total_responses, open("fiscal_note_raw_data_full.txt", "w+"))
Ejemplo n.º 20
0
#with open('textfile.txt', 'r') as myfile:
#   data = myfile.read().replace('\n', '')
#print(data)
import os
import indicoio

# reads from the file which contains the audio to speech content
__location__ = os.path.realpath(
    os.path.join(os.getcwd(), os.path.dirname(__file__)))
file_contents = open(os.path.join(__location__, "textfile.txt"))
text = file_contents.read()

# next, feed it into the ML API
indicoio.config.api_key = 'd08fbca96c4341957f0a8a0b21d08b5d'
print("Political Allegiance: ")
print(indicoio.political(text))
print("\n")
print("Key Words: ")
print(indicoio.keywords(text, version=2))
print("\n")
print("Important Persons: ")
print(indicoio.people(text))
print("\n")
print("Significant Locations: ")
print(indicoio.places(text))
print("\n")
print("Relevant Organizations: ")
print(indicoio.organizations(text))
Ejemplo n.º 21
0
 def _get_political(self):
     return indicoio.political(" ".join(self.text))
Ejemplo n.º 22
0
def politicalAnalysis(newsMessages):
	'''
	Uses indicoio API to perform political analysis on all posts for all pages in newsMessages.
	Creates average for each page to allow easy visualization later on.
	Args:
    	newsMessages: dictionary with page name keys and values that are a list of post messages (strings)
  	Returns:
		dict: 
			keys: page names
			values: dict of values with chance that the page is Libertarian, Green, Liberal, or Conservative,
			as defined by indicoio's political analysis API

	TODO: insert into sql table?
	'''

	# for debugging purposes; don't wanna make 500 calls 500 times now do we
	writeUpdates = False
	try:
		analysesFile = open('politicalAnalysis.json', 'r')
		analyses = json.loads(analysesFile.read())
		analysesFile.close()
		# clean-up from previous calls
		toDelete = []
		for s in analyses['average']:
			if s not in newsMessages:
				toDelete.append(s)
		for s in toDelete:
			writeUpdates = True
			del analyses['average'][s]
			del analyses['all'][s]
	except:
		writeUpdates = True
		print("Please wait while a whole ton of requests are made...")
		analyses = {'all': {}, 'average': {}}
	
	for company in newsMessages:
		# don't recalculate if we alredy did before...
		if company in analyses['average']:
			continue
		writeUpdates = True
		analyses['all'][company] = indicoio.political(newsMessages[company])
		# analyses['all'][company] is now a list of batch results, an analysis for each post
		libertarianSum = 0
		greenSum = 0
		liberalSum = 0
		conservativeSum = 0
		# so let's go get the average and classify this page
		for res in analyses['all'][company]:
			libertarianSum += res['Libertarian']
			greenSum += res['Green']
			liberalSum += res['Liberal']
			conservativeSum += res['Conservative']
		analyses['average'][company] = {'Libertarian': libertarianSum/len(analyses['all'][company]),
										'Green': greenSum/len(analyses['all'][company]), 
										'Liberal': liberalSum/len(analyses['all'][company]),
										'Conservative': conservativeSum/len(analyses['all'][company])}
	# save if there were changes
	if writeUpdates:
		analysesFile = open('politicalAnalysis.json', 'w')
		analysesFile.write(json.dumps(analyses, indent=2))
		analysesFile.close()
	return analyses['average']
Ejemplo n.º 23
0
import indicoio

indicoio.config.api_key = '5eced02755ca5225746b0a83c0a03104'

# Multiple examples for political
indicoio.political("I have a constitutional right to bear arms!")
indicoio.political("I have a constitutional right to fee speech!")
indicoio.political("Through the constitution I do not have to quarter troops!")
indicoio.political("It is my right as a women to vote")
Ejemplo n.º 24
0
def get_political():
    if request.method == 'POST':
        data = dict(request.form)['data_to_analyze']
        return json.dumps({
            'political': indicoio.political(data)
        })
Ejemplo n.º 25
0
    'liberal': [],
    'conservative': [],
    'libertarian': [],
    'green': []
}

for x in range(0, len(response_json["response"]["docs"])):
    article_to_analyze = response_json["response"]["docs"][x]
    data['dates'].append(article_to_analyze["pub_date"])

    snippet_to_analyze = article_to_analyze["lead_paragraph"]
    if snippet_to_analyze == None:
        snippet_to_analyze = response_json["response"]["docs"][x]["snippet"]

    print "Before leanings"
    leanings = indicoio.political(snippet_to_analyze)
    print "After leanings"

    data['liberal'].append(leanings['Liberal'])
    data['green'].append(leanings['Green'])
    data['conservative'].append(leanings['Conservative'])
    data['libertarian'].append(leanings['Libertarian'])
    print data['liberal'][x]

My_Graph = TrendGraph(data)
print My_Graph.getGraphImage()

#sample_data = {}
#sample_data['dates']         =    ["Year 2010", "Year 2011", "Year 2012", "Year 2013"]
#sample_data['liberals']      =    [0.25, 0.25, 0.25, 0.25]
#sample_data['conservatives'] =    [0.25, 0.25, 0.25, 0.25]
Ejemplo n.º 26
0
                         passwd="",
                         db="nytimes")
    cur = db.cursor()

    sentimentValues = []
    politicalValues = []
    personalityValues = []
    emotionValues = []
    start = 3474
    itr = start
    for i in range(start, num + start):
        print("starting chunk " + str(itr) + " !")
        itr += 1
        curText = allText[i][:1000]
        sentimentValues = indicoio.sentiment_hq(curText)
        politicalValues = indicoio.political(curText)
        personalityValues = indicoio.personality(curText)
        emotionValues = indicoio.emotion(curText)
        abst = repr(allText[i]).replace("'", '').replace('"', '')
        SQLline = 'INSERT INTO `article`(`abst`, `url`, `sent`, `serv`, `gree`, `libe`, `libt`, `agre`, `cons`, `extr`, `open`, `ange`, `fear`, `joyy`, `sadd`, `surp`) VALUES ("' + abst + '"  ,"' + repr(
            articles[i]["url"]) + '",' + str(sentimentValues) + ',' + str(
                politicalValues["Conservative"]
            ) + ',' + str(politicalValues["Green"]) + ',' + str(
                politicalValues["Liberal"]
            ) + ',' + str(politicalValues["Libertarian"]) + ',' + str(
                personalityValues["agreeableness"]
            ) + ',' + str(personalityValues["conscientiousness"]) + ',' + str(
                personalityValues["extraversion"]) + ',' + str(
                    personalityValues["openness"]) + ',' + str(
                        emotionValues["anger"]) + ',' + str(
                            emotionValues["fear"]) + ',' + str(
 def test_batch_political_v2(self):
     test_data = [TEST_DATA, TEST_DATA]
     response = political(test_data, version=2)
     self.assertTrue(isinstance(response, list))
     self.assertEqual(POLITICAL_SET, set(response[0].keys()))
     self.assertEqual(response[0], response[1])
Ejemplo n.º 28
0

api = twitter.Api(consumer_key='...',
                      consumer_secret='...',
                      access_token_key='...',
                      access_token_secret='...')

indicoio.config.api_key = "ac9e3e9c5e603a59d6752fad66fdeb51"
politicianTwitters = ['LincolnChafee', 'HillaryClinton', 'lessig', 'MartinOMalley', 'BernieSanders', 'JimWebbUSA', 'JebBush', 'RealBenCarson', 'ChrisChristie', 'tedcruz', 'CarlyFiorina', 'gov_gilmore', 'GrahamBlog', 'GovMikeHuckabee', 'BobbyJindal', 'JohnKasich', 'GovernorPataki', 'RandPaul', 'marcorubio', 'RickSantorum', 'ScottWalker', 'realDonaldTrump']
output = open('politicianScores.txt', 'w')
libertarian = 'Libertarian'
green = 'Green'
liberal = 'Liberal'
conservative = 'Conservative'
for user in politicianTwitters:
    statuses = api.GetUserTimeline(screen_name=user, count=200)
    l = [s.text for s in statuses]
    count = len(l)
    scores = {libertarian: 0, green: 0, liberal: 0, conservative: 0}
    for entry in l:
        politicianScore = political(entry)
        scores[libertarian] += politicianScore[u'Libertarian']
        scores[green] += politicianScore[u'Green']
        scores[liberal] += politicianScore[u'Liberal']
        scores[conservative] += politicianScore[u'Conservative']
    scores[libertarian] /= count
    scores[green] /= count
    scores[liberal] /= count
    scores[conservative] /= count
output.write(user + " " + libertarian + ": " + str(scores[libertarian]) + green + ": " + str(scores[green]) + liberal + ": " + str(scores[liberal]) + conservative + ": " + str(scores[conservative]) + '\n')
Ejemplo n.º 29
0
 def test_batch_political(self):
     test_data = ["Guns don't kill people, people kill people."]
     response = political(test_data, api_key=self.api_key)
     self.assertTrue(isinstance(response, list))
Ejemplo n.º 30
0
def bias(phrase):
    biasDict = indicoio.political("phrase")
    biasKeys = biasDict.keys()
    for x in range(0, 4):
        biasList[x] = biasDict[biasKeys[x]]
 def test_political_v2(self):
     response = political(TEST_DATA, version=2)
     self.assertTrue(isinstance(response, dict))
     self.assertEqual(POLITICAL_SET, set(response.keys()))
Ejemplo n.º 32
0
import requests, indicoio, pickle

total_responses = []

for p in range(150):
	params = {
		"apikey" : "PAXCY03JG4B4QZ0Z",
		"q" : "*",
		"count" : 100,
		"page" : p,
		"view" : "full"
	}

	resp = requests.get("https://api.fiscalnote.com/bills", params=params).json()
	for response in resp:
		try:
			political = indicoio.political(response["description"], api_key="df43365f5a827d884eb683b836fcb78a"), response["title"]
			#print response["title"]
			response["political"] = political
			total_responses += [response]
			if len(total_responses) % 100 == 0:
				print(len(total_responses))
		except:
			pass

print(len(total_responses))

pickle.dump(total_responses, open("fiscal_note_raw_data_full.txt", "w+"))
Ejemplo n.º 33
0
 def test_batch_political_v2(self):
     test_data = [TEST_DATA, TEST_DATA]
     response = political(test_data, version=2)
     self.assertTrue(isinstance(response, list))
     self.assertEqual(POLITICAL_SET, set(response[0].keys()))
     self.assertEqual(response[0], response[1])
Ejemplo n.º 34
0
for sent in sentences:
    for word in nltk.word_tokenize(sent.lower()):
        if word in word_frequencies.keys():
            if len(sent.split(' ')) < 30:
                if sent not in sentence_scores.keys():
                    sentence_scores[sent] = word_frequencies[word]
                else:
                    sentence_scores[sent] += word_frequencies[word]

summary_sentences = heapq.nlargest(len(sentences),
                                   sentence_scores,
                                   key=sentence_scores.get)

video_info["summary_variable"] = summary_sentences

politicalValues = indicoio.political(text)
personalityValues = indicoio.personality(text)
emotionValues = indicoio.emotion(text)

video_info["political"] = politicalValues
video_info["personality"] = personalityValues
video_info["emotion"] = emotionValues
video_info["url"] = url


class MyEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, numpy.integer):
            return int(obj)
        elif isinstance(obj, numpy.floating):
            return float(obj)
Ejemplo n.º 35
0
    clean_tweet = html.unescape(clean_tweet)
    clean_tweet = " ".join(filter(lambda x:x[0]!="@", clean_tweet.split()))
    clean_tweet = " ".join(filter(lambda x:x[:4]!="http", clean_tweet.split()))
    tweets[index] = clean_tweet
    
print("There are " + str(len(tweets)) + " about to be printed!")
print("\n-\n".join(tweets))
exit()    

# join the tweets into a big ol paragraph
combined_tweets = " ".join(tweets)

# get some sweet stats
sentiment = i.sentiment(combined_tweets)
personas = i.personas(combined_tweets)
political = i.political(combined_tweets)

# sorty sort
sorted_personas = sorted(personas.items(), 
                         key=operator.itemgetter(1), 
                         reverse=True)
sorted_political = sorted(political.items(),
                          key=operator.itemgetter(1),
                          reverse=True)

print()
print(sorted_personas[:3])
print(sorted_political[0])
print(sentiment)

# Show rate limit status for this application
Ejemplo n.º 36
0
data_str = data_bytes.decode('utf-8')
tweet_data = json.loads(data_str)

just_tweets = [tweet["tweet_text"] for tweet in tweet_data]




# # single example
# indicoio.political("I have a constitutional right to bear arms!")

max_send = 30000

# batch example
finished = False
i = 12
while not finished:
	up_bound = min((i+1)*max_send, len(just_tweets))
	if up_bound == len(just_tweets):
		finished = True
	to_send = just_tweets[i*max_send:up_bound]
	stuff = indicoio.political(to_send)
	print(len(stuff))
	stuff2 = {"stuff": stuff}
	print ("dumping", i)
	json.dump(stuff2,(open("testing5.json", 'a')))
	print ("done dumping", i)
	i += 1


Ejemplo n.º 37
0
import indicoio
indicoio.config.api_key = '5eced02755ca5225746b0a83c0a03104'

# single example
indicoio.political("I have a constitutional right to bear arms!")
Ejemplo n.º 38
0
def info(yt_url):
    video_info = {}

    url = "https://www.youtube.com/watch?v=" + yt_url
    yt = YouTube(url)

    video_info["timestamped"] = []

    # get the audio file
    a = yt.captions.get_by_language_code('en')
    caps = a.generate_srt_captions()
    caps = caps.split("\n\n")
    caps = [i.split("\n") for i in caps]
    text = ""

    for i in caps:
        for j in i[2:]:
            text += j
            line = " ".join(i[2:])
            line = re.sub(r"<[^<]+?>", '', line)
            try:
                video_info["timestamped"].append([
                    i[1].split(" --> ")[0],
                    i[1].split(" --> ")[1],
                    line
                ])
            except:
                pass

    text = re.sub(r"<[^>]*>", " ", text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r"<[^<]+?>", '', text)
    text = text.replace("...", ".")
    text = text.replace("…", "")
    text = text.replace(".", ". ")
    text = re.sub(r'\s+', ' ', text)
    sentences = nltk.sent_tokenize(text)
    video_info["full_transcript"] = text
    stopwords = nltk.corpus.stopwords.words('english')

    word_frequencies = {}
    for word in nltk.word_tokenize(text):
        if word not in stopwords:
            if word not in word_frequencies.keys():
                word_frequencies[word] = 1
            else:
                word_frequencies[word] += 1

    maximum_frequency = max(word_frequencies.values())
    for word in word_frequencies.keys():
        word_frequencies[word] = (word_frequencies[word] / maximum_frequency)

    sentence_scores = {}
    for sent in sentences:
        for word in nltk.word_tokenize(sent.lower()):
            if word in word_frequencies.keys():
                if len(sent.split(' ')) < 30:
                    if sent not in sentence_scores.keys():
                        sentence_scores[sent] = word_frequencies[word]
                    else:
                        sentence_scores[sent] += word_frequencies[word]

    summary_sentences = heapq.nlargest(len(sentences), sentence_scores, key=sentence_scores.get)

    video_info["summary_variable"] = summary_sentences

    politicalValues = indicoio.political(text)
    personalityValues = indicoio.personality(text)
    emotionValues = indicoio.emotion(text)

    video_info["political"] = politicalValues
    video_info["personality"] = personalityValues
    video_info["emotion"] = emotionValues
    video_info["sentiment"] = indicoio.sentiment(text)
    video_info["url"] = url

    class MyEncoder(json.JSONEncoder):
        def default(self, obj):
            if isinstance(obj, numpy.integer):
                return int(obj)
            elif isinstance(obj, numpy.floating):
                return float(obj)
            elif isinstance(obj, numpy.ndarray):
                return obj.tolist()
            else:
                return super(MyEncoder, self).default(obj)

    return json.dumps(video_info, cls=MyEncoder)
Ejemplo n.º 39
0
def execute(USERNAME, target, refresh):

    r_data = io_helper.read_raw(USERNAME, target)

    og = sys.stdout
    fpath = io_helper.out_path(USERNAME, target)

    def analysis(raw='', limit=5, text='', percent=True):
        global meta_dict
        # print lines if input is a list of non-dicts
        # if input is list of dicts, merge dicts and resend to analysis
        if isinstance(raw, list):
            for item in raw:
                if not isinstance(item, dict):
                    print(item)
                else:
                    create_meta_dict(item)
            analysis(meta_dict, limit, text, percent)

        # if input is dict: print k, v pairs
        # optional args for return limit and description text
        if isinstance(raw, dict):
            print(text)
            ct = 0
            for v in sorted(raw, key=raw.get, reverse=True):
                ct += 1
                if ct > limit: break
                if isinstance(raw[v], float):
                    if percent: per = r'%'
                    else: per = ''
                    print("    " + v, str(round(raw[v] * 100, 2)) + per)
                else:
                    print(v, raw[v])
            print()

    def create_meta_dict(item):
        # merge list of dicts into master dict
        global meta_dict
        meta_dict[item['text']] = item['confidence']
        return meta_dict

    rClean = ''
    for i in range(len(r_data)):
        if r_data[i - 1] == '\\':
            rClean = rClean[:-1]
            if r_data[i] != "'":
                continue

        if r_data[i] == '*':
            rClean += ' '
        else:
            rClean += r_data[i]

    r_data = rClean
    del rClean
    indicoio.config.api_key = keycheck.get_key()

    # Big 5
    big5 = {
        'text': "Big 5 personality inventory matches: ",
        "payload": indicoio.personality(r_data)
    }

    # Meyers briggs
    mbtiLabels = indicoio.personas(r_data)
    mbti_dict = {
        'architect': 'intj',
        'logician': 'intp',
        'commander': 'entj',
        'debater': 'entp',
        'advocate': 'infj',
        'mediator': 'infp',
        'protagonist': 'enfj',
        'campaigner': 'enfp',
        'logistician': 'istj',
        'defender': 'isfj',
        'executive': 'estj',
        'consul': 'esfj',
        'virtuoso': 'istp',
        'adventurer': 'isfp',
        'entrepreneur': 'estp',
        'entertainer': 'esfp'
    }

    def replace_mbti():
        for k, v in mbtiLabels.items():
            k = k.replace(k, mbti_dict[k])
            yield k

    k = (list(replace_mbti()))
    v = map(lambda x: x, mbtiLabels.values())
    payload = (dict(zip(k, v)))

    mbti = {
        'text': "Most likely personalilty styles: ",
        "payload": payload,
        'ct': 5,
        'percent': True
    }

    # Political
    pol = {
        'text': "Political alignments: ",
        "payload": indicoio.political(r_data, version=1)
    }
    # Sentiment
    sen = {
        'text': "Sentiment: ",
        "payload": {
            'Percent positive': indicoio.sentiment(r_data)
        },
        'ct': 3
    }

    # Emotion
    emo = {
        'text': "Predominant emotions:",
        "payload": indicoio.emotion(r_data),
        'ct': 5
    }

    # Keywords
    kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5}
    # Text tags
    tt = {
        'text': "Text tags: ",
        "payload": indicoio.text_tags(r_data),
        'ct': 10
    }
    # Place
    pla = {
        'text': "Key locations: ",
        'payload': indicoio.places(r_data, version=2),
        'ct': 3,
        'percent': True
    }

    def Karma(USERNAME):
        import praw
        import collections
        kList = []
        user_agent = ("N2ITN")
        r = praw.Reddit(user_agent=user_agent)
        thing_limit = 100

        user = r.get_redditor(USERNAME)
        gen = user.get_submitted(limit=thing_limit)
        karma_by_subreddit = {}
        for thing in gen:
            subreddit = thing.subreddit.display_name
            karma_by_subreddit[subreddit] = (
                karma_by_subreddit.get(subreddit, 0) + thing.score)

        for w in sorted(karma_by_subreddit,
                        key=karma_by_subreddit.get,
                        reverse=True):
            kList.append(str(w) + ': ' + str(karma_by_subreddit[w]))
        kList.insert(0, 'Karma by Sub')

        print("\n\t".join(kList[:10]))

    def show(results):
        # Accepts bag of dicts, or single dict
        if not isinstance(results, dict):
            for X in results:
                show(X)
        else:
            if results == pla and pla['payload'] == []:
                print("Not enough information to infer place of origin")
                print()
            else:

                i = results
                analysis(raw=i.get('payload', ''),
                         limit=i.get('ct', 5),
                         text=i.get('text', ''),
                         percent=i.get('percent', True))

    with open(fpath, 'w') as outtie:
        sys.stdout = outtie
        print(target + USERNAME)
        print()
        show([kw, pla, big5, emo, sen, pol, mbti, tt])
        Karma(USERNAME)

        sys.stdout = og
    return
Ejemplo n.º 40
0
 def test_batch_political(self):
     response = political([TEST_DATA], version=1)
     self.assertTrue(isinstance(response, list))
Ejemplo n.º 41
0
		# get rid of useless html.
		url_dummy[i]=url_dummy[i][3:-1]
		print url_dummy[i]
		i=i+1

	## because last url has on more ' , get rid of it
	url_dummy[-1]=url_dummy[-1][3:-2]
	print len(url_dummy)

	## do political analysis using indicoio using the API and apped it to the array
	analysis=[]
	j=0
	# You could also use "for item in url_dummy" syntax here, like you did
	# with "for textfiles in folder"
	for j in range(len(url_dummy)):
		analysis.append(indicoio.political(url_dummy[j]))
		j=j+1 # you shouldn't do this -- the for loop already increments j


	## get the average of the analysis
	## add all the results of the urls and divide with the number of urls

	sum_stats=[0,0,0,0]  #sum of all stats gained from indicoio
	for i in range(len(analysis)):
		sum_stats[0]=sum_stats[0]+analysis[i]["Libertarian"]
		sum_stats[1]=sum_stats[1]+analysis[i]["Green"]
		sum_stats[2]=sum_stats[2]+analysis[i]["Liberal"]
		sum_stats[3]=sum_stats[3]+analysis[i]["Conservative"]
		i=i+1 # again, you shouldn't do this
	print sum_stats
	aver_stats=[0,0,0,0]
Ejemplo n.º 42
0
 def test_political_v2(self):
     response = political(TEST_DATA, version=2)
     self.assertTrue(isinstance(response, dict))
     self.assertEqual(POLITICAL_SET, set(response.keys()))
Ejemplo n.º 43
0
 def _get_political(self):
     return indicoio.political(" ".join(self.text))
Ejemplo n.º 44
0
#WHACK 2016

import indicoio
from indicoio import political, sentiment, language, text_tags, keywords, fer, facial_features, image_features

indicoio.config.api_key = "f09f509655f721e3adac6df5b35abfed"
api_key_Lisa = "f09f509655f721e3adac6df5b35abfed"

result1 = political("Guns don't kill people. People kill people.")
result2 = sentiment("It's so cold outside!")
result3 = sentiment("I'm doing okay")
result4 = sentiment("indico is so easy to use!")
result5 = sentiment("this api isn't bad at recognizing double negatives either.")

result6 = sentiment("I'm doing okay")
result7 = sentiment("Best day ever!")

#print result1
print result7
Ejemplo n.º 45
0
    clean_tweet = " ".join(filter(lambda x: x[0] != "@", clean_tweet.split()))
    clean_tweet = " ".join(
        filter(lambda x: x[:4] != "http", clean_tweet.split()))
    tweets[index] = clean_tweet

print("There are " + str(len(tweets)) + " about to be printed!")
print("\n-\n".join(tweets))
exit()

# join the tweets into a big ol paragraph
combined_tweets = " ".join(tweets)

# get some sweet stats
sentiment = i.sentiment(combined_tweets)
personas = i.personas(combined_tweets)
political = i.political(combined_tweets)

# sorty sort
sorted_personas = sorted(personas.items(),
                         key=operator.itemgetter(1),
                         reverse=True)
sorted_political = sorted(political.items(),
                          key=operator.itemgetter(1),
                          reverse=True)

print()
print(sorted_personas[:3])
print(sorted_political[0])
print(sentiment)

# Show rate limit status for this application
Ejemplo n.º 46
0
    def get_sentiment(self, parsed, participants):
        ''' Calculate semtiment values for each line of each participant.

            Inputs:
                parsed: (dict of lists) mapping line to speaker
                participants: (list) every speaker in debate

            Returns:
                (dict of lists) containing sentiment (pattern), sentiment (indico),
                political sentiment (indico)]
        '''

        # Pre-allocating variables, etc
        sentiments = {}
        senti_patt = {}
        senti_indi = {}
        poli_senti = {}
        senti = 0
        average_count = 0

        for participant in participants:
            senti_patt[participant] = 0
            senti_indi[participant] = 0
            poli_senti[participant] = 0

        # Running pattern sentiment on each line for each participant
        for participant in participants:
            senti_max = 0
            senti_min = 0
            just_senti = 0
            for line in parsed[participant]:
                just_senti = sentiment(line)
                senti += just_senti[0]
                average_count += 1 # Total line count, to use to average
                if just_senti[0] > senti_max: #Finding max and min values
                    senti_max = just_senti[0]
                if just_senti[0] < senti_min:
                    senti_min = just_senti[0]

            # Writing average sentiment and max/min data to return
            senti_patt[participant] = [(senti/average_count+1)/2.0, (senti_max+1)/2.0, (senti_min+1)/2.0]

        # Running indico sentiment on each line for each participant
        for participant in participants:
            senti_max = 0
            senti_min = 0
            senti = 0
            average_count = 0
            it = 0 # Debug counter
            curr_senti = 0
            for line in parsed[participant]:
                print it
                try:
                    curr_senti = indicoio.sentiment(line)
                    senti += curr_senti
                    average_count += 1
                except:
                    pass
                it += 1

                # Finding max and min values
                if curr_senti > senti_max:
                    senti_max = curr_senti
                if curr_senti < senti_min:
                    senti_min = curr_senti

            senti_indi[participant] = [senti/average_count, senti_max, senti_min] #writing average sentiment and max/min data to return

        for participant in participants:
            max_con = 0
            min_con = 0
            max_lib = 0
            min_lib = 0

            # Determining political sentiment for each participant
            conserv = 0
            lib = 0
            average_count = 0
            poli_get = {'Conservative': 0, 'Liberal': 0}
            for line in parsed[participant]:
                print it
                try: # Attempts to call poli sentiment function on each line
                    poli_get = indicoio.political(line)
                    conserv += poli_get['Conservative'] # Adds to each count
                    lib += poli_get['Liberal']
                    average_count += 1
                except:
                    pass

                it += 1
                if max_con > poli_get['Conservative']: # Sets max and min values as it cycles through
                    max_con = poli_get['Conservative']
                if min_con < poli_get['Conservative']:
                    min_con = poli_get['Conservative']
                if max_lib > poli_get['Liberal']:
                    max_lib = poli_get['Liberal']
                if min_lib < poli_get['Liberal']:
                    min_lib = poli_get['Liberal']

            poli_senti[participant] = [conserv/average_count, max_con, min_con, lib/average_count, max_lib, min_lib]

        # Creating output dictionary with all data collected
        for participant in participants:
            sentiments[participant] = [senti_patt[participant], senti_indi[participant], poli_senti[participant]]

        return sentiments
Ejemplo n.º 47
0
def execute(USERNAME, target, refresh):

    r_data = io_helper.read_raw(USERNAME, target)

    og = sys.stdout
    fpath = io_helper.out_path(USERNAME, target)

    def analysis(raw='', limit=5, text='', percent=True):
        global meta_dict
        # print lines if input is a list of non-dicts
        # if input is list of dicts, merge dicts and resend to analysis
        if isinstance(raw, list):
            for item in raw:
                if not isinstance(item, dict):
                    print(item)
                else:
                    create_meta_dict(item)
            analysis(meta_dict, limit, text, percent)

        # if input is dict: print k, v pairs
        # optional args for return limit and description text
        if isinstance(raw, dict):
            print(text)
            ct = 0
            for v in sorted(raw, key=raw.get, reverse=True):
                ct += 1
                if ct > limit: break
                if isinstance(raw[v], float):
                    if percent: per = r'%'
                    else: per = ''
                    print("    " + v, str(round(raw[v] * 100, 2)) + per)
                else:
                    print(v, raw[v])
            print()

    def create_meta_dict(item):
        # merge list of dicts into master dict
        global meta_dict
        meta_dict[item['text']] = item['confidence']
        return meta_dict

    rClean = ''
    for i in range(len(r_data)):
        if r_data[i - 1] == '\\':
            rClean = rClean[:-1]
            if r_data[i] != "'":
                continue

        if r_data[i] == '*':
            rClean += ' '
        else:
            rClean += r_data[i]

    r_data = rClean
    del rClean
    indicoio.config.api_key = keycheck.get_key()

    # Big 5
    big5 = {'text': "Big 5 personality inventory matches: ", "payload": indicoio.personality(r_data)}

    # Meyers briggs
    mbtiLabels = indicoio.personas(r_data)
    mbti_dict = {
        'architect': 'intj',
        'logician': 'intp',
        'commander': 'entj',
        'debater': 'entp',
        'advocate': 'infj',
        'mediator': 'infp',
        'protagonist': 'enfj',
        'campaigner': 'enfp',
        'logistician': 'istj',
        'defender': 'isfj',
        'executive': 'estj',
        'consul': 'esfj',
        'virtuoso': 'istp',
        'adventurer': 'isfp',
        'entrepreneur': 'estp',
        'entertainer': 'esfp'
    }

    def replace_mbti():
        for k, v in mbtiLabels.items():
            k = k.replace(k, mbti_dict[k])
            yield k

    k = (list(replace_mbti()))
    v = map(lambda x: x, mbtiLabels.values())
    payload = (dict(zip(k, v)))

    mbti = {'text': "Most likely personalilty styles: ", "payload": payload, 'ct': 5, 'percent': True}

    # Political
    pol = {'text': "Political alignments: ", "payload": indicoio.political(r_data, version=1)}
    # Sentiment
    sen = {'text': "Sentiment: ", "payload": {'Percent positive': indicoio.sentiment(r_data)}, 'ct': 3}

    # Emotion 
    emo = {'text': "Predominant emotions:", "payload": indicoio.emotion(r_data), 'ct': 5}

    # Keywords
    kw = {'text': "Keywords: ", "payload": indicoio.keywords(r_data), 'ct': 5}
    # Text tags
    tt = {'text': "Text tags: ", "payload": indicoio.text_tags(r_data), 'ct': 10}
    # Place
    pla = {
        'text': "Key locations: ",
        'payload': indicoio.places(r_data, version=2),
        'ct': 3,
        'percent': True
    }

    def Karma(USERNAME):
        import praw
        import collections
        kList = []
        user_agent = ("N2ITN")
        r = praw.Reddit(user_agent=user_agent)
        thing_limit = 100

        user = r.get_redditor(USERNAME)
        gen = user.get_submitted(limit=thing_limit)
        karma_by_subreddit = {}
        for thing in gen:
            subreddit = thing.subreddit.display_name
            karma_by_subreddit[subreddit] = (karma_by_subreddit.get(subreddit, 0) + thing.score)

        for w in sorted(karma_by_subreddit, key=karma_by_subreddit.get, reverse=True):
            kList.append(str(w) + ': ' + str(karma_by_subreddit[w]))
        kList.insert(0, 'Karma by Sub')

        print("\n\t".join(kList[:10]))

    def show(results):
        # Accepts bag of dicts, or single dict
        if not isinstance(results, dict):
            for X in results:
                show(X)
        else:
            if results == pla and pla['payload'] == []:
                print("Not enough information to infer place of origin")
                print()
            else:

                i = results
                analysis(
                    raw=i.get('payload', ''),
                    limit=i.get('ct', 5),
                    text=i.get('text', ''),
                    percent=i.get('percent', True)
                )

    with open(fpath, 'w') as outtie:
        sys.stdout = outtie
        print(target + USERNAME)
        print()
        show([kw, pla, big5, emo, sen, pol, mbti, tt])
        # Karma(USERNAME)

        sys.stdout = og
    return