Ejemplo n.º 1
0
    def respond_from_waiting(self, message, tags):
        """Decide what state to go to from the "waiting" state.

        Parameters:
            message (str): The incoming message.
            tags (Mapping[str, int]): A count of the tags that apply to the message.

        Returns:
            str: The message to send to the user.
        """
        # self.professor = None
        # if 'office-hours' in tags:
        #     for professor in self.PROFESSORS:
        #         if professor in tags:
        #             self.professor = professor
        #             return self.go_to_state('specific_faculty')
        #     return self.go_to_state('unknown_faculty')
        # elif 'thanks' in tags:
        #     return self.finish('thanks')
        # else:
        #     return self.finish('confused')

        if 'greeting' in tags:
            return self.go_to_state('main_question')
        elif 'capital punishment' in tags or 'death penalty' in tags and 'hello' not in tags:
            if indicoio.sentiment(message) < .5:
                return self.go_to_state('pose_topic')
            elif indicoio.sentiment(message) >= .5:
                return self.finish('agree')
        else:
            return self.finish('confused')
Ejemplo n.º 2
0
	def geo_data_analysis(self, search_term):
		"""Finds the average positive/negative sentiment of tweets for each region.
		Params: search_term - string term used to search tweets
		Returns: list of four doubles (average polarity for West, South, Northeast, and Midwest)
		"""
		map_pol = dict()

		#A list of tweet texts from each region
		NE_text = self.geo_collect_tweets(search_term,42.781158,-71.398729,'250mi')
		S_text = self.geo_collect_tweets(search_term,33.000000,-84.000000,'500mi')
		MW_text = self.geo_collect_tweets(search_term,40.000000,-100.000000,'1000mi')
		W_text = self.geo_collect_tweets(search_term,35.000000,-120.000000,'250mi')
		
		#A list of sentiment values for the tweets from each region 
		NE_sentiment_values = sentiment(NE_text)
		S_sentiment_values = sentiment(S_text)
		MW_sentiment_values = sentiment(MW_text)
		W_sentiment_values = sentiment(W_text)

		#find the average sentiment value for each region
		NE_avg = sum(NE_sentiment_values)/len(NE_sentiment_values)
		S_avg = sum(S_sentiment_values)/len(S_sentiment_values)
		MW_avg = sum(MW_sentiment_values)/len(MW_sentiment_values)
		W_avg = sum(W_sentiment_values)/len(W_sentiment_values)

		return [W_avg,S_avg,NE_avg,MW_avg]
Ejemplo n.º 3
0
def sentiment_from_text(text):
    text_sentiment = 0

    emojis = re.findall(emoji.get_emoji_regexp(), text)
    emojis_sentiment = sentiment_from_emojis(emojis)
    logger.debug('emoji sentiment: ' + str(emojis_sentiment))
    text_sentiment += emojis_sentiment

    text_translated = text
    text_emoji_free = remove_redundant_symbols(text)
    logger.debug('text without emoji: ' + text_emoji_free)

    if len(text_emoji_free) > 0:
        is_original = random.choices([True, False],
                                     weights=language_proportions)[0]
        try:
            if not is_original:
                text_translated = translator.translate(text_emoji_free)
                text_sentiment += indicoio.sentiment(text_translated)
            else:
                text_sentiment += indicoio.sentiment(text_emoji_free)
            if emojis_sentiment > 0:
                text_sentiment /= 2
        except Exception as e:
            print(e)
    logger.debug([text, text_translated, text_sentiment])

    if text_sentiment == 0:  # if failed to compute sentiment
        return 0.5

    return text_sentiment
Ejemplo n.º 4
0
def sentiment(screen_name, language):
    # This will create a sentiment column and length column
    df = pd.read_csv('./Data/%s_prep.csv' % screen_name, encoding='utf8')
    indicoio.config.api_key = apikey['client_key']

    # Trial Run
    test = df.sample(50)
    test['length'] = test['full_text'].apply(len)
    test['sentiment'] = indicoio.sentiment(test['full_text'].tolist(),
                                           language=language)
    test['handle'] = screen_name

    # Real Run
    df['length'] = df['full_text'].apply(len)
    df['sentiment'] = indicoio.sentiment(df['full_text'].tolist(),
                                         language=language)
    df['handle'] = screen_name

    # Export both dataframes to csv
    test.to_csv('./Data/%s_test.csv' % screen_name,
                encoding='utf8',
                index=False)
    df.to_csv('./Data/%s_final.csv' % screen_name,
              encoding='utf8',
              index=False)
def use_indico(train):
    '''
    Batch reviews in groups of 1000 and send them to the Indico API to get
    sentiment results. Return the list of results, as well as the test features
    and test targets, to be used in testing the results.
    Isolate the fetching of the sentiment results from Indico from the use of
    those results, so that if something goes wrong, we don't need to fetch again.
    No need to clean and vectorize training reviews, or train a random forest
    on them, because Indico has done all of that already. Just strip out html.
    '''
    Xtrain, Xtest, ytrain, ytest = test_train(train)
    print "Cleaning html from the test set of movie reviews..."
    clean_test_reviews = remove_html(Xtest)
    print "Running Indico queries..."
    print "This will take a while..."
    # process the reviews in batches of 1000, then finish with the leftovers, if any
    sentiment_lists = []
    for i in range(1000, len(Xtest), 1000):
        print "Processing reviews {0} to {1}...".format(i - 1000, i - 1)
        batch = clean_test_reviews[i - 1000:i]
        results = indicoio.sentiment(batch, split='sentence')
        sentiment_lists += results
    if len(sentiment_lists) < len(Xtest):
        print "Processing final reviews {0} to {1}...".format(
            len(sentiment_lists), len(Xtest))
        batch = clean_test_reviews[len(sentiment_lists):]
        results = indicoio.sentiment(batch, split='sentence')
        sentiment_lists += results
    print "{0} Indico sentiments returned".format(len(sentiment_lists))
    return sentiment_lists, Xtest, ytest
def use_indico(train):
    '''
    Batch reviews in groups of 1000 and send them to the Indico API to get
    sentiment results. Return the list of results, as well as the test features
    and test targets, to be used in testing the results.
    Isolate the fetching of the sentiment results from Indico from the use of
    those results, so that if something goes wrong, we don't need to fetch again.
    No need to clean and vectorize training reviews, or train a random forest
    on them, because Indico has done all of that already. Just strip out html.
    '''
    Xtrain, Xtest, ytrain, ytest = test_train(train)
    print "Cleaning html from the test set of movie reviews..."
    clean_test_reviews = remove_html(Xtest)
    print "Running Indico queries..."
    print "This will take a while..."
    # process the reviews in batches of 1000, then finish with the leftovers, if any
    sentiment_lists = []
    for i in range(1000,len(Xtest),1000):
        print "Processing reviews {0} to {1}...".format(i-1000, i-1)
        batch = clean_test_reviews[i-1000:i]
        results = indicoio.sentiment(batch, split='sentence')
        sentiment_lists += results
    if len(sentiment_lists)<len(Xtest):
        print "Processing final reviews {0} to {1}...".format(len(sentiment_lists),len(Xtest))
        batch = clean_test_reviews[len(sentiment_lists):]
        results = indicoio.sentiment(batch, split='sentence')
        sentiment_lists += results
    print "{0} Indico sentiments returned".format(len(sentiment_lists))
    return sentiment_lists, Xtest, ytest
Ejemplo n.º 7
0
    def test_posneg(self):
        test_string = "Worst song ever."
        response = sentiment(test_string)

        self.assertTrue(isinstance(response, float))
        self.assertTrue(response < 0.5)

        test_string = "Best song ever."
        response = sentiment(test_string)
        self.assertTrue(isinstance(response, float))
        self.assertTrue(response > 0.5)
Ejemplo n.º 8
0
    def on_data(self, data):
        #print(data)
        text = (json.loads(data.strip())['text'])

        point = json.loads(data.strip())['coordinates']
        if point:
            #print(point['coordinates'])
            longitud = point['coordinates'][0]
            latitud = point['coordinates'][1]
        else:
            longitud = ''
            latitud = ''

        created_at = json.loads(data.strip())['created_at']
        place = json.loads(data.strip())['place']['name']
        hashtag = re.findall(r"#(\w+)", text)
        #tweet=np.array([created_at,text,place, coordinates,hashtag])
        #chapu reguapa
        if len(text.split('\n')) == 1:
            if re.match(r'http*', text):
                sent = 0
            else:
                sent = indicoio.sentiment(text.strip())
            varcsv = str(created_at + ';' + text.strip() + ';' +
                         ''.join(hashtag) + ';' + place + ';' + str(latitud) +
                         ';' + str(longitud) + ';' + str(sent)).strip() + '\n'

            print(varcsv)
            #sacar a fichero los datos para procesarlos
            #np.savetxt('test.out', x, delimiter=',')
            with open("output2.txt", "a") as text_file:

                text_file.write(varcsv)
            return True
Ejemplo n.º 9
0
def getIndico(news_file):
    output_file_name = "indico_" + news_file
    news_data = pd.read_csv(os.path.join(input_dir, news_file),
                            names=["time", "headline"])
    news_data['indico_score'] = np.vectorize(lambda x: indicoio.sentiment(x))(
        news_data.headline)
    news_data.to_csv(os.path.join(output_dir, output_file_name), index=False)
Ejemplo n.º 10
0
 def test_specify_version(self):
     test_data = ['Worst song ever', 'Best song ever']
     response = sentiment(test_data, version="1")
     self.assertIsInstance(response, list)
     self.assertEqual(len(response), 2)
     self.assertTrue(response[0] < .5)
     self.assertTrue(response[1] > .5)
Ejemplo n.º 11
0
def sentiment_sliding(messages, window=1000, shift=20):
    allwords = []
    data = {}
    for m in messages:
        if "\\Sent" not in m.get("folders", tuple()):
            continue
        if not m.get("body") or not m["body"].get("content"):
            continue
        allwords.append(EmailReplyParser.parse_reply(m["body"]["content"]))

    allwords = " ".join(allwords)
    allwords = allwords.encode("ascii", "ignore")
    allwords = allwords.split()

    current_window = 0
    next_window = window
    print "number of words", len(allwords)
    while True:
        if len(allwords) < next_window:
            print "sliding-sentiment reached end at lengths:%s" % len(allwords)
            break
        print "sliding-sentiment start:%s end:%s" % (current_window, next_window)
        data[current_window] = " ".join(allwords[current_window:next_window])
        data[current_window] = indicoio.sentiment(data[current_window])
        print data[current_window]
        current_window += shift
        next_window += shift
    return data
Ejemplo n.º 12
0
def gradeMultiple(inputList):
    "Call this function when multiple lines need to be checked"

    # dictonary with numLines number of elements that will contain the marks of all the lines in order of appearance in inputList
    posts = []
    pos_posts = []
    neg_posts = []
    xCount = 0
    yCount = 0

    # for loop that will get the marks of all elements in inputList (assume inputList is a list of strings)
    for i in inputList:
        if indicoio.sentiment(i) > 0.5:
            pos_posts.append(i)
        else:
            neg_posts.append(i)

    posts.append(pos_posts)
    posts.append(neg_posts)

    return_posts = []

    for x in posts:
        return_posts.append([])
        for y in x:
            yCount = yCount + 1
            y = (str(yCount) + ". " + y)
            return_posts[xCount].append(y)
        xCount = xCount + 1

    return return_posts
Ejemplo n.º 13
0
def main():
    if len(sys.argv) != 3:
        return

    inname = sys.argv[1]
    outname = sys.argv[2]

    with open(inname, mode='r') as inFile:
        tweets = json.load(inFile)
        count = 0

        for tweet in tweets:
            if tweet['positiveness'] is None:
                try:
                    tweet['positiveness'] = float(
                        indicoio.sentiment(tweet['text'],
                                           language=tweet['lang']))
                except:
                    tweet['positiveness'] = None

            count += 1

            if count % 100 == 0:
                print(count)
                with open(outname, 'w') as outfile:
                    json.dump(tweets, outfile)

        with open(outname, 'w') as outfile:
            json.dump(tweets, outfile)
Ejemplo n.º 14
0
    def test_posneg(self):
        posneg_set = set(['Sentiment'])
        test_string = "Worst song ever."
        response = sentiment(test_string)

        self.assertTrue(isinstance(response, dict))
        self.assertEqual(posneg_set, set(response.keys()))
Ejemplo n.º 15
0
def get_keywords(query, level):
    tweets = get_twitter_data(query)
    tweet_text = [tweet.text for tweet in tweets]

    num_tweets = len(tweets)

    n = 5  # can be changed

    keywords = indicoio.keywords(tweet_text, version=2, top_n=n)
    d2 = defaultdict(float)
    for d in keywords:
        for key in d:
            d2[key] += d[key]  # final is sum of probabilities

    # only keep edges with high connectivity
    c = 0.007  # can be changed
    minimum = c * num_tweets * math.sqrt(n) * level  # can be changed
    final = {}
    for key in d2:
        if d2[key] > minimum:
            final[key.lower()] = d2[key]

    sentiments = indicoio.sentiment(tweet_text)
    avg_sentiment = numpy.mean(sentiments)
    return (final, avg_sentiment)
Ejemplo n.º 16
0
def getSentiment(jsonInput, word):
	jsonStr = ""
	try:
		sentiments =  indicoio.sentiment(jsonInput[0][0])
		keywords = indicoio.keywords(jsonInput[0][0])
		average =0 
		above_average = 0 
		below_average =0 
		for sentiment in sentiments: 
			average+= sentiment
			if (sentiment > 0.5) : 
				above_average = above_average+1
			else: 
				below_average=below_average+1
		average = average/len(sentiments)
		above_average = float(above_average)/len(sentiments)
		below_average= float(below_average)/len(sentiments) 
	   	most_frequent_words =getFrequentWords(jsonInput)
		jsonStr = "{\"results\":{\"above_average\":\""+str(above_average)+"\", \"word\":\""+word+"\",\"below_average\" :\""+str(below_average)+"\",\"average\":"+str(average)+"}, \"keywords\": \""+str(keywords)+"\", \"most_frequent_word\":\""
		for i in most_frequent_words[1:len(most_frequent_words)]:
			print(i.getKey())
			jsonStr+=i.getKey()+","
		jsonStr+= "\"}"

		result = BuzzfeedSearch(json=jsonStr, name=word)
		result.save()
		serializer = BuzzfeedSerializer(result)
		content = JSONRenderer().render(serializer.data) 
		all_entries = BuzzfeedSearch.objects.all()

	except Exception,e:
		return jsonStr
Ejemplo n.º 17
0
 def test_specify_version(self):
     test_data = ['Worst song ever', 'Best song ever']
     response = sentiment(test_data, api_key = self.api_key, version="1")
     self.assertIsInstance(response, list)
     self.assertEqual(len(response), 2)
     self.assertTrue(response[0] < .5)
     self.assertTrue(response[1] > .5)
Ejemplo n.º 18
0
def get_buzz_comments(buzz_id, response_dict):
    buzz_url = 'http://www.buzzfeed.com/api/v1/comments/%s' % (buzz_id)
    r = requests.get(buzz_url)
    comments = r.json()['comments']
    total = r.json()['total_count']
    count = len(comments)
    page = 2 #start looping on the second page

    while count < total:
        buzz_url = 'http://www.buzzfeed.com/api/v1/comments/%s?p=%s' % (buzz_id, str(page))
        r = requests.get(buzz_url)
        comments.extend(r.json()['comments'])
        count = len(comments)
        page += 1

    for comment in comments:
        score = indicoio.sentiment(comment['blurb'])
        if (score < .4):
            response_dict['negative'] += 1
        elif (score < .6):
            response_dict['neutral'] += 1
        else:
            response_dict['positive'] += 1
        response_dict['total'] += 1

    return response_dict
Ejemplo n.º 19
0
def convertData(x):
    data = x
    name = data['name']

    lang = indicoio.language(data['words'])

    if lang['English'] > lang['Spanish']:
        language = 'english'
    if lang['English'] < lang['Spanish']:
        language = 'spanish'

    sent = round(indicoio.sentiment(data['words']), 2)
    words = data['words'].split()
    numwords = len(words)
    totalL = 0
    for j in words:
        totalL += len(j)

    meanLW = round((totalL / numwords), 2)

    #Encoding Total-lenght
    # Values :
    # 0 : short < 20
    # 1 : medium > 20
    # 2 : long > 40
    if totalL < 20:
        totalL = 0
    if totalL > 40:
        totalL = 2
    if totalL > 20:
        totalL = 1
    #Encoding numwords
    if numwords < 5:
        numwords = 0
    if numwords > 10:
        numwords = 2
    if numwords >= 5:
        numwords = 1

    #Encoding sentiment
    #Values :
    # 1 : Possitive
    # 2 : Negative
    # 0 : Neutral

    if sent > 0.6:
        sent = 1
    if sent < 0.4:
        sent = 2
    if sent < 1:
        sent = 0

    if language == 'english':
        language = 1
    if language == 'spanish':
        language = 0
        sent = 0

    cad = [name, language, totalL, meanLW, sent, numwords]
    return cad
Ejemplo n.º 20
0
def tweetCategory(getDF=False,insta=False):
    '''

    :return: (text_classAndSenti,text_list)
        text_classAndSenti (selected samples,2): (:,0)=1 indicates that this tweets 1) is related to food 2) is positive.
                                                (:,0)=0 otherwise
        text_list: original lists of tweets
    '''
    FOOD=["beer","cooking","general_food","vegan","vegetarian","wine","nutrition"]
    if not(insta):
        os.system('curl "https://boiling-fire-6168.firebaseio.com/twitter_data.json?print=pretty" > twitter_data.json')
        with open('twitter_data.json') as json_data:
            data = json.load(json_data)
    else:
        os.system('curl "https://boiling-fire-6168.firebaseio.com/twitter_data.json?print=pretty" > instagram_data.json')
        with open('instagram_data.json') as json_data:
            data = json.load(json_data)
    # JSON -> list of texts
    df = pd.DataFrame.from_dict(data)
    df = df.transpose()
    print(df.info())
    lat = df['coordinate_1']
    lng = df['coordinate_2']
    in_toronto = []
    for idx,x  in enumerate(lat):
        in_toronto = in_toronto + [geo_results.is_in_circle(geo_results.TORONTO.latitude, geo_results.TORONTO.longitude, geo_results.radius, lng[idx], lat[idx])]
    print in_toronto
    df['in_toronto'] = in_toronto
    df = df[df['in_toronto'] == 1]
    print df
    text_list = df['text'].values.tolist()

    # Get topics
    indicoio.config.api_key = 'dfd155c0984bed63c78aef5ce44763bf'
    topics = indicoio.text_tags(text_list,top_n = 5)

    def topIncluded(topics,cat):
        # test if at least one element in topics is in cat
        for i in topics:
            if i in cat:
                return True
        return False

    # get sentiment analysis
    text_classAndSenti = np.zeros((len(text_list),2))
    text_classAndSenti[:,1] = indicoio.sentiment(text_list)

    # put text into classes (Food is 1; otherwise, 0)
    for i,t in enumerate(topics):
        top_topics = t.keys()
        if topIncluded(top_topics,FOOD) and text_classAndSenti[i,1]>.5:
            text_classAndSenti[i,0] = 1
        else:
            text_classAndSenti[i,1] = 0 # clear sentiment info of non-food tweets

    if getDF:
        return text_classAndSenti,text_list,df
    else:
        return text_classAndSenti,text_list
Ejemplo n.º 21
0
    def get_avg_sentiment(comment):
		comment_sentences = filter(lambda string: any(c.isalpha() for c in string), comment.split('. '))
		sentiments = [indicoio.sentiment(comment) for comment in comment_sentences]
		try:
			avg_sentiment = sum(sentiments) / len(sentiments)
		except:
			return 0.5
		return avg_sentiment
Ejemplo n.º 22
0
def indico_sentiment(filename):
    import indicoio
    indicoio.config.api_key = 'my_key'
    with open (my_file, "r") as myfile:
        text = myfile.read().replace('\n', ' ')
        sentences = tokenize.sent_tokenize(text)
        indico_sent = indicoio.sentiment(sentences)
    return indico_sent
Ejemplo n.º 23
0
def indi_sentimentR(review):
	sum = 0 
	count = 0
	for sentence in review:
		sum += indicoio.sentiment(sentence)
		count += 1
	average = sum/count
	return average
Ejemplo n.º 24
0
def indico_sentiment(filename):
    import indicoio
    indicoio.config.api_key = 'my_key'
    with open(my_file, "r") as myfile:
        text = myfile.read().replace('\n', ' ')
        sentences = tokenize.sent_tokenize(text)
        indico_sent = indicoio.sentiment(sentences)
    return indico_sent
Ejemplo n.º 25
0
    def getResult(strArray):
        sent = indicoio.sentiment(strArray)
        pers = indicoio.personality(strArray)
        poli = indicoio.political(strArray)
        keyw = indicoio.keywords(strArray)

        result = dict([("sentiment", sent), ("personality", pers), ("political", 4098), ("keywords", keyw)])
        return result
Ejemplo n.º 26
0
 def analyze(self, text):
     indicoio.config.api_key = '051003141c2626e19f0acf007730258f'
     value = indicoio.sentiment(text)
     if value == 0.5:
         return "neutral"
     if value > 0.5:
         return "pos"
     else:
         return "neg"
    def analyse_sentiment(cls, text):
        try:
            result = indicoio.sentiment(text)
            sa_result = cls.map_sentiment(result)

            return sa_result
        except Exception as ex:
            print("Error occured during analysing sentiments: " + ex)
            return None
Ejemplo n.º 28
0
def test_key():
    with open(keyPath, 'r') as c:
        keycheck = c.read()
    try:
        import indicoio
        indicoio.config.api_key = keycheck
        indicoio.sentiment("I love writing code!")
        return True
    except Exception as e:
        print("Indico API key missing/invalid")
        print()
        print(
            'Redditor text can be collected with reddit_persona.go(USERNAME), but it will not be analyzed'
        )
        print()
        print('To enter your indico API key, use reddit_persona.new_key( )')
        print()
        return False
Ejemplo n.º 29
0
def spanish_sentimiento(texto,positivos,neutrales,negativos):
	resultado = indicoio.sentiment(texto,lang='spanish')
	if (resultado >= 0.6 ):
		positivos += 1
	elif (resultado < 0.6 and 0.5 >= resultado):
		neutrales += 1
	else:
		negativos += 1
	return positivos,neutrales,negativos
Ejemplo n.º 30
0
	def trending(self):
		'''trending sentiment'''

		trendArray = []

		for trend in Twitter().trends(cached=False):
			trendArray.append([trend,indicoio.sentiment(trend)])

		return trendArray
Ejemplo n.º 31
0
	def sArray(self):
		'''calculate sentiment '''
		sentimentArray = []

		for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount):
			sentimentArray.append(indicoio.sentiment(tweet.text))
			self.i = tweet.id

		return sentimentArray
Ejemplo n.º 32
0
	def fArray(self):
		'''full array including tweet and sentiment'''	
		fullArray = []

		for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount):
			fullArray.append([tweet.text,indicoio.sentiment(tweet.text)])
			self.i = tweet.id

		return fullArray
Ejemplo n.º 33
0
def find_sentiment_values(company, startDate, endDate):
	"""
	Takes in the company name, start date, and end date. 
	Looks into the pickle file and runs sentiment analysis on each entry.

	Weights those sentiment values that places more emphasis on the 1st few entries
	Assigns a list of those results to a variable(final_sentiment_list), and returns that variable
	"""
	#since we're calling get_store_googleresults(), 
	#we don't need to run getting_date_tuples() because get_store_googleresults() 
	#already calls that function
	#daily_google_results gets the gigantic list generated by get_store_googleresults()
	#reminder that list represents list of lists with each entry representing 
	#a day's worth of google results
	daily_google_results = get_store_googleresults(company, startDate, endDate)
	
	# for debugging, so we can double check that the text matches with URL's results
	# print daily_google_results
	
	#list of multiple daily_value_list(s, plural)
	overall_value_list = [] 
	for day_results in daily_google_results:

		#For incrementing purposes, assigned here because it should reset for each day
		total_num_descriptions = len(daily_google_results[0]) - 1 
		updating_value = 0 

		#sentiment values for one day, placed here because I want it to reset
		daily_value_list = [] 

		#Looping to store these sentiment values, 
		#using a weighting system that's dependent on total_num_descriptions
		#updating_value may be unnecessary, but helpful for us to store & see
		for individual_result in day_results:
			num = indicoio.sentiment(individual_result) * total_num_descriptions
			updating_value += 1
			total_num_descriptions -= 1
			daily_value_list.append(num)

		overall_value_list.append(daily_value_list)
	

	#final_sentiment_is a list that has a sentiment value for each day-index
	final_sentiment_list = []

	#The loop for averaging all the values for one day. 
	for a_list in overall_value_list:
		summation = 0
		for weighted_sentiment in a_list:

			summation += weighted_sentiment
		final_average_for_day = summation / 45 
		#note: 45 represents the # of results if there were 9 of the 1st google result, 8 of the 2nd google result, etc.
		
		final_sentiment_list.append(final_average_for_day)
	return final_sentiment_list
def daily_sentiments_function():
	#Changing the directory so that the sentiment pickle file is stored with all the google results 
	import os

	if company in ['El_Pollo_Loco', 'Chipotle']:
		year = '2015'
	else:
		year = '2010'
	print year
	path = "/home/anne/DataAnalysis/{}/{}_headlines".format(company, year)
	# Check current working directory.
	retval = os.getcwd()
	print "Current working directory %s" % retval
	# Now change the directory
	os.chdir( path )
	# Check current working directory.
	retval = os.getcwd()
	print "Directory changed successfully %s" % retval

	daily_sentiments_dictionary = {}
	for date in complete_list_of_dates:
		year = date[2]
		month = date[0]
		day = date[1]
		print month, day, year


		#loading the results for interpretation
		fin1 = open('{}_{}_{}_{}_headlines.pickle'.format(company, month, day, year))
		list_of_results = pickle.load(fin1)
		fin1.close()

		# For storing the sentiments, 
		sentiments = []
		# sentiment_list_length = len(sentiments)

		#looping and appending sentiment results to the list
		for result in list_of_results:
			sentiments.append(indicoio.sentiment(result))
		sentiment_list_length = len(sentiments)


		sentiment_summation = 0

		#finding the average sentiment for this particular day
		x = sentiment_list_length + 1
		denominator_for_averaging = (x + 1) * (x/2)
		for i in range(sentiment_list_length):
			print x
			current_sentiment_weighted_value = x * sentiments[i]
			x -= 1
			print x
			sentiment_summation += current_sentiment_weighted_value
			sentiment_avg = sentiment_summation/denominator_for_averaging
			daily_sentiments_dictionary[month, day, year] = sentiment_avg
	return daily_sentiments_dictionary
Ejemplo n.º 35
0
    def respond_from_waiting(self, message, tags):
        """Decide what state to go to from the "waiting" state.

        Parameters:
            message (str): The incoming message.
            tags (Mapping[str, int]): A count of the tags that apply to the message.

        Returns:
            str: The message to send to the user.
        """
        if 'greeting' in tags:
            return self.go_to_state('main_question')
        elif 'capital punishment' in tags or 'death penalty' in tags and 'hello' not in tags:
            if indicoio.sentiment(message) >= .5:
                return self.go_to_state('pose_topic') ############## cant pose topic yet--not a function!
            elif indicoio.sentiment(message) < .5:
                return self.finish('agree')
        else:
            return self.finish('confused')        ###gibberish doesnt work!!!!!!!
Ejemplo n.º 36
0
    def get_text_analyze(self):
        if self._type=="bylib":
             text = T(self._text)
             print("Text analyze by lib : {}".format(text.polarity))

            # analyze by word eg : Good : 1.0
            # for w in text.words:
            #     print("{:<16}{:>2}".format(w, w.polarity))
        else:
            indicoio.config.api_key = '799b2dbda4132e1553a94467eb0e890f'
            print("Text analyze by api: {}".format(indicoio.sentiment(self._text, language='ru')))
Ejemplo n.º 37
0
def indi_sentimentR(review):
	sum = 0 
	count = 0
	for sentence in review:
		sum += indicoio.sentiment(sentence)
		count += 1
	try:
		average = sum/count
		return average
	except ZeroDivisionError:
		print("Empty review.")
Ejemplo n.º 38
0
def daily_sentiments_function():
    #Changing the directory so that the sentiment pickle file is stored with all the google results
    import os

    if company in ['El_Pollo_Loco', 'Chipotle']:
        year = '2015'
    else:
        year = '2010'
    print year
    path = "/home/anne/DataAnalysis/{}/{}_Google_Results".format(company, year)
    # Check current working directory.
    retval = os.getcwd()
    print "Current working directory %s" % retval
    # Now change the directory
    os.chdir(path)
    # Check current working directory.
    retval = os.getcwd()
    print "Directory changed successfully %s" % retval

    daily_sentiments_dictionary = {}
    for date in complete_list_of_dates:
        year = date[2]
        month = date[0]
        day = date[1]
        print month, day, year

        #loading the results for interpretation
        fin1 = open('{}_{}_{}googleresults.pickle'.format(month, day, year))
        list_of_results = pickle.load(fin1)
        fin1.close()

        # For storing the sentiments,
        sentiments = []
        # sentiment_list_length = len(sentiments)

        #looping and appending sentiment results to the list
        for result in list_of_results:
            sentiments.append(indicoio.sentiment(result))
        sentiment_list_length = len(sentiments)

        sentiment_summation = 0

        #finding the average sentiment for this particular day
        x = sentiment_list_length + 1
        denominator_for_averaging = (x + 1) * (x / 2)
        for i in range(sentiment_list_length):
            print x
            current_sentiment_weighted_value = x * sentiments[i]
            x -= 1
            print x
            sentiment_summation += current_sentiment_weighted_value
            sentiment_avg = sentiment_summation / denominator_for_averaging
            daily_sentiments_dictionary[month, day, year] = sentiment_avg
    return daily_sentiments_dictionary
Ejemplo n.º 39
0
def sentiment_analysis_Spanish():
    fname = 'yoga.json'
    freq = 0
    sum_sent = 0
    with open(fname, 'r') as f:
        for line in f:
            tweet = json.loads(line)
            if tweet['lang'] == 'es':
                freq += 1
                sum_sent += indicoio.sentiment(tweet['text'])

    return sum_sent, freq
Ejemplo n.º 40
0
def main():
	indicoio.config.api_key = '123273ff84fe220626891873d499ea07'
	indicoio.config.language = 'russian'

	# results:
	#0.94399955814
	#print indicoio.sentiment('хороший кот', language='russian')
	#0.777086528524
	#print indicoio.sentiment('постановление правительство', language='russian')
	print indicoio.sentiment('хороший', language='russian')
	print indicoio.sentiment('правительство', language='russian')
	print indicoio.sentiment('кот', language='russian')

	return

	res = indicoio.sentiment_hq([
		'хороший кот',
		'постановление правительство',
		'состоятельный оказаться',
		'коррупционный правонарушение',
		'конфликт интерес',
		'первое квартал'
	])

	for r in res:
		print r
Ejemplo n.º 41
0
def getSentiment(politic):
    search_results = api.search(q=politic, count=100)

    result_tweets = []

    for result in search_results:
        result = result.text.encode('ascii', errors='ignore')
        result_tweets.append(result)

    sentiment = indicoio.sentiment(result_tweets)
    pos = int(100 * (sum(sentiment) / len(sentiment)))
    neg = int(100 - pos)
    return pos, neg
Ejemplo n.º 42
0
 def SentimentOnText(self, data):
     #Values greater than 0.5 indicate positive sentiment, while values less than 0.5 indicate negative sentiment.
     posneg = indicoio.sentiment(data)
     if posneg < .20:
         return "Very Negative", posneg
     elif posneg >= .20 and posneg < .40:
         return "Negative", posneg
     elif posneg >= .40 and posneg < .60:
         return "Neutral", posneg
     elif posneg >= .60 and posneg < .80:
         return "Positive", posneg
     elif posneg >= .80:
         return "Very Positive", posneg
Ejemplo n.º 43
0
    def post(self):

        data = json.loads(self.request.body)
        api = data.get('api')
        data = data.get('data')

        if api == 'sentiment':
            result = indicoio.sentiment(data)
        else:
            result = [aggregate_score(scores, api) for scores in indicoio.text_tags(data)]

        self.write(json.dumps(result))
        self.finish()
Ejemplo n.º 44
0
def toDict(tweets, maxNum=MAXTERM):
    i = 0
    myDict={}
    for tweet in tweets:
        if i<maxNum:
            if tweet['text'] in myDict:
                entry = myDict[tweet['text']]
                if 'coordinates' in tweet and tweet['coordinates']!=None:
                    entry['location'].append(tweet['coordinates'])
                    i+=1
                elif tweet['user']['location']!='':
                    entry['location'].append(tweet['user']['location'])
                    i+=1
            else: #create new entry w/ sentiment + location
                if 'coordinates' in tweet and tweet['coordinates']!=None:
                    myDict[tweet['text']]={'sentiment':indicoio.sentiment(tweet['text']),'location':[tweet['coordinates']]}
                    i+=1
                elif tweet['user']['location']!='':
                    myDict[tweet['text']]={'sentiment':indicoio.sentiment(tweet['text']),'location':[tweet['user']['location']]}
                    i+=1
        else:
            break
    return myDict
    def analyse_sentiments_batch(cls, list):
        try:
            print("Performing batch sentiment analysis for [" + str(len(list)) + "] entries...")
            result = indicoio.sentiment(list)
            sa_result = cls.map_sentiments_batch(result)

            # check number of items
            if len(sa_result) != len(list):
                raise Exception('The input and output list size do not match!')

            return sa_result
        except Exception as ex:
            print("Error occured during analysing sentiments: " + ex)
            return None
def review_to_sentiment( review):
    '''
    Function to split a review into parsed sentences and get sentiment
    This uses one function call for each review... good for testing, not
    good for production, since you have a limited number of API calls
    '''
    # 1. Use Indico to split the review into sentences, with sentiment
    results = indicoio.sentiment(review, split='sentence')
    # 2. Loop over each sentence
    sums = 0
    for item in results:
        sums += item['results']
    avg_sentiment = sums / len(results)
    return avg_sentiment
Ejemplo n.º 47
0
    def print_links(host_name, url):
        global page_count
        page_count = 0
        try:
            # Get pretty html
            html = urlopen(url).read()
            soup = BeautifulSoup(html, 'html.parser')
            sentiment = indicoio.sentiment(soup.title.get_text())

            image_urls = []
            # Iterate through images
            for img in soup.find_all('img'):
                img_src = img.get('src')
                if img_src not in image_urls and img_src.startswith('http'):
                    page_count += 1
                    if page_count > page_limit:
                        break
                    image_urls.append(img_src)
                    response = api.tag_image_urls(img_src)
                    tags = response['results'][0]['result']['tag']['classes']
                    print url + " --- " + img_src
                    for tag in tags:
                        print tag
                        topic = json.loads(urlopen(str.format("https://api.projectoxford.ai/luis/v1/application?id=cca1f963-ab81-4771-a661-735d1544bd0f&subscription-key=dfefa88b64064940b18ba7603c7d9650&q={}",quote_plus(tag))).read())
                        if len(topic['intents']) > 0 and topic['intents'][0]['intent'] is not None:
                            print "*** Industry - " + topic['intents'][0]['intent'] + " ***** Sentiment Level - " + str(sentiment)
                            if topic['intents'][0]['intent'] == 'technology':
                                industry_id = 2
                            elif topic['intents'][0]['intent'] == 'agriculture':
                                industry_id = 1
                            elif topic['intents'][0]['intent'] == 'fashion':
                                industry_id = 3
                            elif topic['intents'][0]['intent'] == 'entertainment':
                                industry_id = 4
                            else:
                                industry_id = 4
                            insight = Insights(industry_id=industry_id, score=sentiment)
                            insight.save()

            # Iterate through links - Recursion
            for link in soup.find_all('a'):
                href = link.get('href')
                if href is not None and validators.url(href) and href not in visited_links:
                    visited_links.append(href)
                    if host_name in href:
                        print_links(host_name, href)

        except Exception as ex:
            pass
Ejemplo n.º 48
0
def rate():
	msg = ""
	form = MyForm(csrf_enabled=False)
	print(form)
	if request.method == "POST":
		if len(request.form['text']) > 20:
			if sentiment(request.form['text']) == "positive":
				update_course(request.form['course_id'], int(request.form['difficulty']) - (0 + int(request.form['difficulty'])) / 5.0)
			else:
				update_course(request.form['course_id'], int(request.form['difficulty']) + (5 - int(request.form['difficulty'])) / 5.0)
		update_course(request.form['course_id'], int(request.form['difficulty']))
		msg = "Thank you for your submission!"
	else:
		msg = "Enter your submission:"
	return render_template('rate.html', form=form, Course=Course, msg=msg)
Ejemplo n.º 49
0
def geo_data_analysis(search_term):
    """analyzes the sentiment of tweets and return the average value for each region 
    """
    map_pol = dict()

    #A list of tweet texts from each region
    NE_text = geo_collect_tweets(search_term,42.781158,-71.398729,'250mi')
    S_text = geo_collect_tweets(search_term,33.000000,-84.000000,'500mi')
    MW_text = geo_collect_tweets(search_term,40.000000,-100.000000,'1000mi')
    W_text = geo_collect_tweets(search_term,35.000000,-120.000000,'250mi')
   
    #A list of sentiment values for the tweets from each region 
    NE_sentiment_values = sentiment(NE_text)
    S_sentiment_values = sentiment(S_text)
    MW_sentiment_values = sentiment(MW_text)
    W_sentiment_values = sentiment(W_text)

    #find the average sentiment value for each region
    NE_avg = sum(NE_sentiment_values)/len(NE_sentiment_values)
    S_avg = sum(S_sentiment_values)/len(S_sentiment_values)
    MW_avg = sum(MW_sentiment_values)/len(MW_sentiment_values)
    W_avg = sum(W_sentiment_values)/len(W_sentiment_values)

    return [W_avg,S_avg,NE_avg,MW_avg]
Ejemplo n.º 50
0
    def getOverallResult(self, strArray):

        result = indicoio.personality(strArray)

        extraversion = []
        openness = []
        agreeableness = []
        conscientiousness = []

        for things in result:
            extraversion.append(things["extraversion"])
            openness.append(things["openness"])
            agreeableness.append(things["agreeableness"])
            conscientiousness.append(things["conscientiousness"])

        result = indicoio.political(strArray)

        libertarian = []
        green = []
        liberal = []
        conservative = []

        for things in result:
            libertarian.append(things["Libertarian"])
            green.append(things["Green"])
            liberal.append(things["Liberal"])
            conservative.append(things["Conservative"])

        result = indicoio.sentiment(strArray)

        t = [
            result,
            libertarian,
            green,
            liberal,
            conservative,
            extraversion,
            openness,
            agreeableness,
            conscientiousness,
        ]

        return t