def respond_from_waiting(self, message, tags): """Decide what state to go to from the "waiting" state. Parameters: message (str): The incoming message. tags (Mapping[str, int]): A count of the tags that apply to the message. Returns: str: The message to send to the user. """ # self.professor = None # if 'office-hours' in tags: # for professor in self.PROFESSORS: # if professor in tags: # self.professor = professor # return self.go_to_state('specific_faculty') # return self.go_to_state('unknown_faculty') # elif 'thanks' in tags: # return self.finish('thanks') # else: # return self.finish('confused') if 'greeting' in tags: return self.go_to_state('main_question') elif 'capital punishment' in tags or 'death penalty' in tags and 'hello' not in tags: if indicoio.sentiment(message) < .5: return self.go_to_state('pose_topic') elif indicoio.sentiment(message) >= .5: return self.finish('agree') else: return self.finish('confused')
def geo_data_analysis(self, search_term): """Finds the average positive/negative sentiment of tweets for each region. Params: search_term - string term used to search tweets Returns: list of four doubles (average polarity for West, South, Northeast, and Midwest) """ map_pol = dict() #A list of tweet texts from each region NE_text = self.geo_collect_tweets(search_term,42.781158,-71.398729,'250mi') S_text = self.geo_collect_tweets(search_term,33.000000,-84.000000,'500mi') MW_text = self.geo_collect_tweets(search_term,40.000000,-100.000000,'1000mi') W_text = self.geo_collect_tweets(search_term,35.000000,-120.000000,'250mi') #A list of sentiment values for the tweets from each region NE_sentiment_values = sentiment(NE_text) S_sentiment_values = sentiment(S_text) MW_sentiment_values = sentiment(MW_text) W_sentiment_values = sentiment(W_text) #find the average sentiment value for each region NE_avg = sum(NE_sentiment_values)/len(NE_sentiment_values) S_avg = sum(S_sentiment_values)/len(S_sentiment_values) MW_avg = sum(MW_sentiment_values)/len(MW_sentiment_values) W_avg = sum(W_sentiment_values)/len(W_sentiment_values) return [W_avg,S_avg,NE_avg,MW_avg]
def sentiment_from_text(text): text_sentiment = 0 emojis = re.findall(emoji.get_emoji_regexp(), text) emojis_sentiment = sentiment_from_emojis(emojis) logger.debug('emoji sentiment: ' + str(emojis_sentiment)) text_sentiment += emojis_sentiment text_translated = text text_emoji_free = remove_redundant_symbols(text) logger.debug('text without emoji: ' + text_emoji_free) if len(text_emoji_free) > 0: is_original = random.choices([True, False], weights=language_proportions)[0] try: if not is_original: text_translated = translator.translate(text_emoji_free) text_sentiment += indicoio.sentiment(text_translated) else: text_sentiment += indicoio.sentiment(text_emoji_free) if emojis_sentiment > 0: text_sentiment /= 2 except Exception as e: print(e) logger.debug([text, text_translated, text_sentiment]) if text_sentiment == 0: # if failed to compute sentiment return 0.5 return text_sentiment
def sentiment(screen_name, language): # This will create a sentiment column and length column df = pd.read_csv('./Data/%s_prep.csv' % screen_name, encoding='utf8') indicoio.config.api_key = apikey['client_key'] # Trial Run test = df.sample(50) test['length'] = test['full_text'].apply(len) test['sentiment'] = indicoio.sentiment(test['full_text'].tolist(), language=language) test['handle'] = screen_name # Real Run df['length'] = df['full_text'].apply(len) df['sentiment'] = indicoio.sentiment(df['full_text'].tolist(), language=language) df['handle'] = screen_name # Export both dataframes to csv test.to_csv('./Data/%s_test.csv' % screen_name, encoding='utf8', index=False) df.to_csv('./Data/%s_final.csv' % screen_name, encoding='utf8', index=False)
def use_indico(train): ''' Batch reviews in groups of 1000 and send them to the Indico API to get sentiment results. Return the list of results, as well as the test features and test targets, to be used in testing the results. Isolate the fetching of the sentiment results from Indico from the use of those results, so that if something goes wrong, we don't need to fetch again. No need to clean and vectorize training reviews, or train a random forest on them, because Indico has done all of that already. Just strip out html. ''' Xtrain, Xtest, ytrain, ytest = test_train(train) print "Cleaning html from the test set of movie reviews..." clean_test_reviews = remove_html(Xtest) print "Running Indico queries..." print "This will take a while..." # process the reviews in batches of 1000, then finish with the leftovers, if any sentiment_lists = [] for i in range(1000, len(Xtest), 1000): print "Processing reviews {0} to {1}...".format(i - 1000, i - 1) batch = clean_test_reviews[i - 1000:i] results = indicoio.sentiment(batch, split='sentence') sentiment_lists += results if len(sentiment_lists) < len(Xtest): print "Processing final reviews {0} to {1}...".format( len(sentiment_lists), len(Xtest)) batch = clean_test_reviews[len(sentiment_lists):] results = indicoio.sentiment(batch, split='sentence') sentiment_lists += results print "{0} Indico sentiments returned".format(len(sentiment_lists)) return sentiment_lists, Xtest, ytest
def use_indico(train): ''' Batch reviews in groups of 1000 and send them to the Indico API to get sentiment results. Return the list of results, as well as the test features and test targets, to be used in testing the results. Isolate the fetching of the sentiment results from Indico from the use of those results, so that if something goes wrong, we don't need to fetch again. No need to clean and vectorize training reviews, or train a random forest on them, because Indico has done all of that already. Just strip out html. ''' Xtrain, Xtest, ytrain, ytest = test_train(train) print "Cleaning html from the test set of movie reviews..." clean_test_reviews = remove_html(Xtest) print "Running Indico queries..." print "This will take a while..." # process the reviews in batches of 1000, then finish with the leftovers, if any sentiment_lists = [] for i in range(1000,len(Xtest),1000): print "Processing reviews {0} to {1}...".format(i-1000, i-1) batch = clean_test_reviews[i-1000:i] results = indicoio.sentiment(batch, split='sentence') sentiment_lists += results if len(sentiment_lists)<len(Xtest): print "Processing final reviews {0} to {1}...".format(len(sentiment_lists),len(Xtest)) batch = clean_test_reviews[len(sentiment_lists):] results = indicoio.sentiment(batch, split='sentence') sentiment_lists += results print "{0} Indico sentiments returned".format(len(sentiment_lists)) return sentiment_lists, Xtest, ytest
def test_posneg(self): test_string = "Worst song ever." response = sentiment(test_string) self.assertTrue(isinstance(response, float)) self.assertTrue(response < 0.5) test_string = "Best song ever." response = sentiment(test_string) self.assertTrue(isinstance(response, float)) self.assertTrue(response > 0.5)
def on_data(self, data): #print(data) text = (json.loads(data.strip())['text']) point = json.loads(data.strip())['coordinates'] if point: #print(point['coordinates']) longitud = point['coordinates'][0] latitud = point['coordinates'][1] else: longitud = '' latitud = '' created_at = json.loads(data.strip())['created_at'] place = json.loads(data.strip())['place']['name'] hashtag = re.findall(r"#(\w+)", text) #tweet=np.array([created_at,text,place, coordinates,hashtag]) #chapu reguapa if len(text.split('\n')) == 1: if re.match(r'http*', text): sent = 0 else: sent = indicoio.sentiment(text.strip()) varcsv = str(created_at + ';' + text.strip() + ';' + ''.join(hashtag) + ';' + place + ';' + str(latitud) + ';' + str(longitud) + ';' + str(sent)).strip() + '\n' print(varcsv) #sacar a fichero los datos para procesarlos #np.savetxt('test.out', x, delimiter=',') with open("output2.txt", "a") as text_file: text_file.write(varcsv) return True
def getIndico(news_file): output_file_name = "indico_" + news_file news_data = pd.read_csv(os.path.join(input_dir, news_file), names=["time", "headline"]) news_data['indico_score'] = np.vectorize(lambda x: indicoio.sentiment(x))( news_data.headline) news_data.to_csv(os.path.join(output_dir, output_file_name), index=False)
def test_specify_version(self): test_data = ['Worst song ever', 'Best song ever'] response = sentiment(test_data, version="1") self.assertIsInstance(response, list) self.assertEqual(len(response), 2) self.assertTrue(response[0] < .5) self.assertTrue(response[1] > .5)
def sentiment_sliding(messages, window=1000, shift=20): allwords = [] data = {} for m in messages: if "\\Sent" not in m.get("folders", tuple()): continue if not m.get("body") or not m["body"].get("content"): continue allwords.append(EmailReplyParser.parse_reply(m["body"]["content"])) allwords = " ".join(allwords) allwords = allwords.encode("ascii", "ignore") allwords = allwords.split() current_window = 0 next_window = window print "number of words", len(allwords) while True: if len(allwords) < next_window: print "sliding-sentiment reached end at lengths:%s" % len(allwords) break print "sliding-sentiment start:%s end:%s" % (current_window, next_window) data[current_window] = " ".join(allwords[current_window:next_window]) data[current_window] = indicoio.sentiment(data[current_window]) print data[current_window] current_window += shift next_window += shift return data
def gradeMultiple(inputList): "Call this function when multiple lines need to be checked" # dictonary with numLines number of elements that will contain the marks of all the lines in order of appearance in inputList posts = [] pos_posts = [] neg_posts = [] xCount = 0 yCount = 0 # for loop that will get the marks of all elements in inputList (assume inputList is a list of strings) for i in inputList: if indicoio.sentiment(i) > 0.5: pos_posts.append(i) else: neg_posts.append(i) posts.append(pos_posts) posts.append(neg_posts) return_posts = [] for x in posts: return_posts.append([]) for y in x: yCount = yCount + 1 y = (str(yCount) + ". " + y) return_posts[xCount].append(y) xCount = xCount + 1 return return_posts
def main(): if len(sys.argv) != 3: return inname = sys.argv[1] outname = sys.argv[2] with open(inname, mode='r') as inFile: tweets = json.load(inFile) count = 0 for tweet in tweets: if tweet['positiveness'] is None: try: tweet['positiveness'] = float( indicoio.sentiment(tweet['text'], language=tweet['lang'])) except: tweet['positiveness'] = None count += 1 if count % 100 == 0: print(count) with open(outname, 'w') as outfile: json.dump(tweets, outfile) with open(outname, 'w') as outfile: json.dump(tweets, outfile)
def test_posneg(self): posneg_set = set(['Sentiment']) test_string = "Worst song ever." response = sentiment(test_string) self.assertTrue(isinstance(response, dict)) self.assertEqual(posneg_set, set(response.keys()))
def get_keywords(query, level): tweets = get_twitter_data(query) tweet_text = [tweet.text for tweet in tweets] num_tweets = len(tweets) n = 5 # can be changed keywords = indicoio.keywords(tweet_text, version=2, top_n=n) d2 = defaultdict(float) for d in keywords: for key in d: d2[key] += d[key] # final is sum of probabilities # only keep edges with high connectivity c = 0.007 # can be changed minimum = c * num_tweets * math.sqrt(n) * level # can be changed final = {} for key in d2: if d2[key] > minimum: final[key.lower()] = d2[key] sentiments = indicoio.sentiment(tweet_text) avg_sentiment = numpy.mean(sentiments) return (final, avg_sentiment)
def getSentiment(jsonInput, word): jsonStr = "" try: sentiments = indicoio.sentiment(jsonInput[0][0]) keywords = indicoio.keywords(jsonInput[0][0]) average =0 above_average = 0 below_average =0 for sentiment in sentiments: average+= sentiment if (sentiment > 0.5) : above_average = above_average+1 else: below_average=below_average+1 average = average/len(sentiments) above_average = float(above_average)/len(sentiments) below_average= float(below_average)/len(sentiments) most_frequent_words =getFrequentWords(jsonInput) jsonStr = "{\"results\":{\"above_average\":\""+str(above_average)+"\", \"word\":\""+word+"\",\"below_average\" :\""+str(below_average)+"\",\"average\":"+str(average)+"}, \"keywords\": \""+str(keywords)+"\", \"most_frequent_word\":\"" for i in most_frequent_words[1:len(most_frequent_words)]: print(i.getKey()) jsonStr+=i.getKey()+"," jsonStr+= "\"}" result = BuzzfeedSearch(json=jsonStr, name=word) result.save() serializer = BuzzfeedSerializer(result) content = JSONRenderer().render(serializer.data) all_entries = BuzzfeedSearch.objects.all() except Exception,e: return jsonStr
def test_specify_version(self): test_data = ['Worst song ever', 'Best song ever'] response = sentiment(test_data, api_key = self.api_key, version="1") self.assertIsInstance(response, list) self.assertEqual(len(response), 2) self.assertTrue(response[0] < .5) self.assertTrue(response[1] > .5)
def get_buzz_comments(buzz_id, response_dict): buzz_url = 'http://www.buzzfeed.com/api/v1/comments/%s' % (buzz_id) r = requests.get(buzz_url) comments = r.json()['comments'] total = r.json()['total_count'] count = len(comments) page = 2 #start looping on the second page while count < total: buzz_url = 'http://www.buzzfeed.com/api/v1/comments/%s?p=%s' % (buzz_id, str(page)) r = requests.get(buzz_url) comments.extend(r.json()['comments']) count = len(comments) page += 1 for comment in comments: score = indicoio.sentiment(comment['blurb']) if (score < .4): response_dict['negative'] += 1 elif (score < .6): response_dict['neutral'] += 1 else: response_dict['positive'] += 1 response_dict['total'] += 1 return response_dict
def convertData(x): data = x name = data['name'] lang = indicoio.language(data['words']) if lang['English'] > lang['Spanish']: language = 'english' if lang['English'] < lang['Spanish']: language = 'spanish' sent = round(indicoio.sentiment(data['words']), 2) words = data['words'].split() numwords = len(words) totalL = 0 for j in words: totalL += len(j) meanLW = round((totalL / numwords), 2) #Encoding Total-lenght # Values : # 0 : short < 20 # 1 : medium > 20 # 2 : long > 40 if totalL < 20: totalL = 0 if totalL > 40: totalL = 2 if totalL > 20: totalL = 1 #Encoding numwords if numwords < 5: numwords = 0 if numwords > 10: numwords = 2 if numwords >= 5: numwords = 1 #Encoding sentiment #Values : # 1 : Possitive # 2 : Negative # 0 : Neutral if sent > 0.6: sent = 1 if sent < 0.4: sent = 2 if sent < 1: sent = 0 if language == 'english': language = 1 if language == 'spanish': language = 0 sent = 0 cad = [name, language, totalL, meanLW, sent, numwords] return cad
def tweetCategory(getDF=False,insta=False): ''' :return: (text_classAndSenti,text_list) text_classAndSenti (selected samples,2): (:,0)=1 indicates that this tweets 1) is related to food 2) is positive. (:,0)=0 otherwise text_list: original lists of tweets ''' FOOD=["beer","cooking","general_food","vegan","vegetarian","wine","nutrition"] if not(insta): os.system('curl "https://boiling-fire-6168.firebaseio.com/twitter_data.json?print=pretty" > twitter_data.json') with open('twitter_data.json') as json_data: data = json.load(json_data) else: os.system('curl "https://boiling-fire-6168.firebaseio.com/twitter_data.json?print=pretty" > instagram_data.json') with open('instagram_data.json') as json_data: data = json.load(json_data) # JSON -> list of texts df = pd.DataFrame.from_dict(data) df = df.transpose() print(df.info()) lat = df['coordinate_1'] lng = df['coordinate_2'] in_toronto = [] for idx,x in enumerate(lat): in_toronto = in_toronto + [geo_results.is_in_circle(geo_results.TORONTO.latitude, geo_results.TORONTO.longitude, geo_results.radius, lng[idx], lat[idx])] print in_toronto df['in_toronto'] = in_toronto df = df[df['in_toronto'] == 1] print df text_list = df['text'].values.tolist() # Get topics indicoio.config.api_key = 'dfd155c0984bed63c78aef5ce44763bf' topics = indicoio.text_tags(text_list,top_n = 5) def topIncluded(topics,cat): # test if at least one element in topics is in cat for i in topics: if i in cat: return True return False # get sentiment analysis text_classAndSenti = np.zeros((len(text_list),2)) text_classAndSenti[:,1] = indicoio.sentiment(text_list) # put text into classes (Food is 1; otherwise, 0) for i,t in enumerate(topics): top_topics = t.keys() if topIncluded(top_topics,FOOD) and text_classAndSenti[i,1]>.5: text_classAndSenti[i,0] = 1 else: text_classAndSenti[i,1] = 0 # clear sentiment info of non-food tweets if getDF: return text_classAndSenti,text_list,df else: return text_classAndSenti,text_list
def get_avg_sentiment(comment): comment_sentences = filter(lambda string: any(c.isalpha() for c in string), comment.split('. ')) sentiments = [indicoio.sentiment(comment) for comment in comment_sentences] try: avg_sentiment = sum(sentiments) / len(sentiments) except: return 0.5 return avg_sentiment
def indico_sentiment(filename): import indicoio indicoio.config.api_key = 'my_key' with open (my_file, "r") as myfile: text = myfile.read().replace('\n', ' ') sentences = tokenize.sent_tokenize(text) indico_sent = indicoio.sentiment(sentences) return indico_sent
def indi_sentimentR(review): sum = 0 count = 0 for sentence in review: sum += indicoio.sentiment(sentence) count += 1 average = sum/count return average
def indico_sentiment(filename): import indicoio indicoio.config.api_key = 'my_key' with open(my_file, "r") as myfile: text = myfile.read().replace('\n', ' ') sentences = tokenize.sent_tokenize(text) indico_sent = indicoio.sentiment(sentences) return indico_sent
def getResult(strArray): sent = indicoio.sentiment(strArray) pers = indicoio.personality(strArray) poli = indicoio.political(strArray) keyw = indicoio.keywords(strArray) result = dict([("sentiment", sent), ("personality", pers), ("political", 4098), ("keywords", keyw)]) return result
def analyze(self, text): indicoio.config.api_key = '051003141c2626e19f0acf007730258f' value = indicoio.sentiment(text) if value == 0.5: return "neutral" if value > 0.5: return "pos" else: return "neg"
def analyse_sentiment(cls, text): try: result = indicoio.sentiment(text) sa_result = cls.map_sentiment(result) return sa_result except Exception as ex: print("Error occured during analysing sentiments: " + ex) return None
def test_key(): with open(keyPath, 'r') as c: keycheck = c.read() try: import indicoio indicoio.config.api_key = keycheck indicoio.sentiment("I love writing code!") return True except Exception as e: print("Indico API key missing/invalid") print() print( 'Redditor text can be collected with reddit_persona.go(USERNAME), but it will not be analyzed' ) print() print('To enter your indico API key, use reddit_persona.new_key( )') print() return False
def spanish_sentimiento(texto,positivos,neutrales,negativos): resultado = indicoio.sentiment(texto,lang='spanish') if (resultado >= 0.6 ): positivos += 1 elif (resultado < 0.6 and 0.5 >= resultado): neutrales += 1 else: negativos += 1 return positivos,neutrales,negativos
def trending(self): '''trending sentiment''' trendArray = [] for trend in Twitter().trends(cached=False): trendArray.append([trend,indicoio.sentiment(trend)]) return trendArray
def sArray(self): '''calculate sentiment ''' sentimentArray = [] for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount): sentimentArray.append(indicoio.sentiment(tweet.text)) self.i = tweet.id return sentimentArray
def fArray(self): '''full array including tweet and sentiment''' fullArray = [] for tweet in self.t.search(self.topic, start=self.i, count = self.tweetCount): fullArray.append([tweet.text,indicoio.sentiment(tweet.text)]) self.i = tweet.id return fullArray
def find_sentiment_values(company, startDate, endDate): """ Takes in the company name, start date, and end date. Looks into the pickle file and runs sentiment analysis on each entry. Weights those sentiment values that places more emphasis on the 1st few entries Assigns a list of those results to a variable(final_sentiment_list), and returns that variable """ #since we're calling get_store_googleresults(), #we don't need to run getting_date_tuples() because get_store_googleresults() #already calls that function #daily_google_results gets the gigantic list generated by get_store_googleresults() #reminder that list represents list of lists with each entry representing #a day's worth of google results daily_google_results = get_store_googleresults(company, startDate, endDate) # for debugging, so we can double check that the text matches with URL's results # print daily_google_results #list of multiple daily_value_list(s, plural) overall_value_list = [] for day_results in daily_google_results: #For incrementing purposes, assigned here because it should reset for each day total_num_descriptions = len(daily_google_results[0]) - 1 updating_value = 0 #sentiment values for one day, placed here because I want it to reset daily_value_list = [] #Looping to store these sentiment values, #using a weighting system that's dependent on total_num_descriptions #updating_value may be unnecessary, but helpful for us to store & see for individual_result in day_results: num = indicoio.sentiment(individual_result) * total_num_descriptions updating_value += 1 total_num_descriptions -= 1 daily_value_list.append(num) overall_value_list.append(daily_value_list) #final_sentiment_is a list that has a sentiment value for each day-index final_sentiment_list = [] #The loop for averaging all the values for one day. for a_list in overall_value_list: summation = 0 for weighted_sentiment in a_list: summation += weighted_sentiment final_average_for_day = summation / 45 #note: 45 represents the # of results if there were 9 of the 1st google result, 8 of the 2nd google result, etc. final_sentiment_list.append(final_average_for_day) return final_sentiment_list
def daily_sentiments_function(): #Changing the directory so that the sentiment pickle file is stored with all the google results import os if company in ['El_Pollo_Loco', 'Chipotle']: year = '2015' else: year = '2010' print year path = "/home/anne/DataAnalysis/{}/{}_headlines".format(company, year) # Check current working directory. retval = os.getcwd() print "Current working directory %s" % retval # Now change the directory os.chdir( path ) # Check current working directory. retval = os.getcwd() print "Directory changed successfully %s" % retval daily_sentiments_dictionary = {} for date in complete_list_of_dates: year = date[2] month = date[0] day = date[1] print month, day, year #loading the results for interpretation fin1 = open('{}_{}_{}_{}_headlines.pickle'.format(company, month, day, year)) list_of_results = pickle.load(fin1) fin1.close() # For storing the sentiments, sentiments = [] # sentiment_list_length = len(sentiments) #looping and appending sentiment results to the list for result in list_of_results: sentiments.append(indicoio.sentiment(result)) sentiment_list_length = len(sentiments) sentiment_summation = 0 #finding the average sentiment for this particular day x = sentiment_list_length + 1 denominator_for_averaging = (x + 1) * (x/2) for i in range(sentiment_list_length): print x current_sentiment_weighted_value = x * sentiments[i] x -= 1 print x sentiment_summation += current_sentiment_weighted_value sentiment_avg = sentiment_summation/denominator_for_averaging daily_sentiments_dictionary[month, day, year] = sentiment_avg return daily_sentiments_dictionary
def respond_from_waiting(self, message, tags): """Decide what state to go to from the "waiting" state. Parameters: message (str): The incoming message. tags (Mapping[str, int]): A count of the tags that apply to the message. Returns: str: The message to send to the user. """ if 'greeting' in tags: return self.go_to_state('main_question') elif 'capital punishment' in tags or 'death penalty' in tags and 'hello' not in tags: if indicoio.sentiment(message) >= .5: return self.go_to_state('pose_topic') ############## cant pose topic yet--not a function! elif indicoio.sentiment(message) < .5: return self.finish('agree') else: return self.finish('confused') ###gibberish doesnt work!!!!!!!
def get_text_analyze(self): if self._type=="bylib": text = T(self._text) print("Text analyze by lib : {}".format(text.polarity)) # analyze by word eg : Good : 1.0 # for w in text.words: # print("{:<16}{:>2}".format(w, w.polarity)) else: indicoio.config.api_key = '799b2dbda4132e1553a94467eb0e890f' print("Text analyze by api: {}".format(indicoio.sentiment(self._text, language='ru')))
def indi_sentimentR(review): sum = 0 count = 0 for sentence in review: sum += indicoio.sentiment(sentence) count += 1 try: average = sum/count return average except ZeroDivisionError: print("Empty review.")
def daily_sentiments_function(): #Changing the directory so that the sentiment pickle file is stored with all the google results import os if company in ['El_Pollo_Loco', 'Chipotle']: year = '2015' else: year = '2010' print year path = "/home/anne/DataAnalysis/{}/{}_Google_Results".format(company, year) # Check current working directory. retval = os.getcwd() print "Current working directory %s" % retval # Now change the directory os.chdir(path) # Check current working directory. retval = os.getcwd() print "Directory changed successfully %s" % retval daily_sentiments_dictionary = {} for date in complete_list_of_dates: year = date[2] month = date[0] day = date[1] print month, day, year #loading the results for interpretation fin1 = open('{}_{}_{}googleresults.pickle'.format(month, day, year)) list_of_results = pickle.load(fin1) fin1.close() # For storing the sentiments, sentiments = [] # sentiment_list_length = len(sentiments) #looping and appending sentiment results to the list for result in list_of_results: sentiments.append(indicoio.sentiment(result)) sentiment_list_length = len(sentiments) sentiment_summation = 0 #finding the average sentiment for this particular day x = sentiment_list_length + 1 denominator_for_averaging = (x + 1) * (x / 2) for i in range(sentiment_list_length): print x current_sentiment_weighted_value = x * sentiments[i] x -= 1 print x sentiment_summation += current_sentiment_weighted_value sentiment_avg = sentiment_summation / denominator_for_averaging daily_sentiments_dictionary[month, day, year] = sentiment_avg return daily_sentiments_dictionary
def sentiment_analysis_Spanish(): fname = 'yoga.json' freq = 0 sum_sent = 0 with open(fname, 'r') as f: for line in f: tweet = json.loads(line) if tweet['lang'] == 'es': freq += 1 sum_sent += indicoio.sentiment(tweet['text']) return sum_sent, freq
def main(): indicoio.config.api_key = '123273ff84fe220626891873d499ea07' indicoio.config.language = 'russian' # results: #0.94399955814 #print indicoio.sentiment('хороший кот', language='russian') #0.777086528524 #print indicoio.sentiment('постановление правительство', language='russian') print indicoio.sentiment('хороший', language='russian') print indicoio.sentiment('правительство', language='russian') print indicoio.sentiment('кот', language='russian') return res = indicoio.sentiment_hq([ 'хороший кот', 'постановление правительство', 'состоятельный оказаться', 'коррупционный правонарушение', 'конфликт интерес', 'первое квартал' ]) for r in res: print r
def getSentiment(politic): search_results = api.search(q=politic, count=100) result_tweets = [] for result in search_results: result = result.text.encode('ascii', errors='ignore') result_tweets.append(result) sentiment = indicoio.sentiment(result_tweets) pos = int(100 * (sum(sentiment) / len(sentiment))) neg = int(100 - pos) return pos, neg
def SentimentOnText(self, data): #Values greater than 0.5 indicate positive sentiment, while values less than 0.5 indicate negative sentiment. posneg = indicoio.sentiment(data) if posneg < .20: return "Very Negative", posneg elif posneg >= .20 and posneg < .40: return "Negative", posneg elif posneg >= .40 and posneg < .60: return "Neutral", posneg elif posneg >= .60 and posneg < .80: return "Positive", posneg elif posneg >= .80: return "Very Positive", posneg
def post(self): data = json.loads(self.request.body) api = data.get('api') data = data.get('data') if api == 'sentiment': result = indicoio.sentiment(data) else: result = [aggregate_score(scores, api) for scores in indicoio.text_tags(data)] self.write(json.dumps(result)) self.finish()
def toDict(tweets, maxNum=MAXTERM): i = 0 myDict={} for tweet in tweets: if i<maxNum: if tweet['text'] in myDict: entry = myDict[tweet['text']] if 'coordinates' in tweet and tweet['coordinates']!=None: entry['location'].append(tweet['coordinates']) i+=1 elif tweet['user']['location']!='': entry['location'].append(tweet['user']['location']) i+=1 else: #create new entry w/ sentiment + location if 'coordinates' in tweet and tweet['coordinates']!=None: myDict[tweet['text']]={'sentiment':indicoio.sentiment(tweet['text']),'location':[tweet['coordinates']]} i+=1 elif tweet['user']['location']!='': myDict[tweet['text']]={'sentiment':indicoio.sentiment(tweet['text']),'location':[tweet['user']['location']]} i+=1 else: break return myDict
def analyse_sentiments_batch(cls, list): try: print("Performing batch sentiment analysis for [" + str(len(list)) + "] entries...") result = indicoio.sentiment(list) sa_result = cls.map_sentiments_batch(result) # check number of items if len(sa_result) != len(list): raise Exception('The input and output list size do not match!') return sa_result except Exception as ex: print("Error occured during analysing sentiments: " + ex) return None
def review_to_sentiment( review): ''' Function to split a review into parsed sentences and get sentiment This uses one function call for each review... good for testing, not good for production, since you have a limited number of API calls ''' # 1. Use Indico to split the review into sentences, with sentiment results = indicoio.sentiment(review, split='sentence') # 2. Loop over each sentence sums = 0 for item in results: sums += item['results'] avg_sentiment = sums / len(results) return avg_sentiment
def print_links(host_name, url): global page_count page_count = 0 try: # Get pretty html html = urlopen(url).read() soup = BeautifulSoup(html, 'html.parser') sentiment = indicoio.sentiment(soup.title.get_text()) image_urls = [] # Iterate through images for img in soup.find_all('img'): img_src = img.get('src') if img_src not in image_urls and img_src.startswith('http'): page_count += 1 if page_count > page_limit: break image_urls.append(img_src) response = api.tag_image_urls(img_src) tags = response['results'][0]['result']['tag']['classes'] print url + " --- " + img_src for tag in tags: print tag topic = json.loads(urlopen(str.format("https://api.projectoxford.ai/luis/v1/application?id=cca1f963-ab81-4771-a661-735d1544bd0f&subscription-key=dfefa88b64064940b18ba7603c7d9650&q={}",quote_plus(tag))).read()) if len(topic['intents']) > 0 and topic['intents'][0]['intent'] is not None: print "*** Industry - " + topic['intents'][0]['intent'] + " ***** Sentiment Level - " + str(sentiment) if topic['intents'][0]['intent'] == 'technology': industry_id = 2 elif topic['intents'][0]['intent'] == 'agriculture': industry_id = 1 elif topic['intents'][0]['intent'] == 'fashion': industry_id = 3 elif topic['intents'][0]['intent'] == 'entertainment': industry_id = 4 else: industry_id = 4 insight = Insights(industry_id=industry_id, score=sentiment) insight.save() # Iterate through links - Recursion for link in soup.find_all('a'): href = link.get('href') if href is not None and validators.url(href) and href not in visited_links: visited_links.append(href) if host_name in href: print_links(host_name, href) except Exception as ex: pass
def rate(): msg = "" form = MyForm(csrf_enabled=False) print(form) if request.method == "POST": if len(request.form['text']) > 20: if sentiment(request.form['text']) == "positive": update_course(request.form['course_id'], int(request.form['difficulty']) - (0 + int(request.form['difficulty'])) / 5.0) else: update_course(request.form['course_id'], int(request.form['difficulty']) + (5 - int(request.form['difficulty'])) / 5.0) update_course(request.form['course_id'], int(request.form['difficulty'])) msg = "Thank you for your submission!" else: msg = "Enter your submission:" return render_template('rate.html', form=form, Course=Course, msg=msg)
def geo_data_analysis(search_term): """analyzes the sentiment of tweets and return the average value for each region """ map_pol = dict() #A list of tweet texts from each region NE_text = geo_collect_tweets(search_term,42.781158,-71.398729,'250mi') S_text = geo_collect_tweets(search_term,33.000000,-84.000000,'500mi') MW_text = geo_collect_tweets(search_term,40.000000,-100.000000,'1000mi') W_text = geo_collect_tweets(search_term,35.000000,-120.000000,'250mi') #A list of sentiment values for the tweets from each region NE_sentiment_values = sentiment(NE_text) S_sentiment_values = sentiment(S_text) MW_sentiment_values = sentiment(MW_text) W_sentiment_values = sentiment(W_text) #find the average sentiment value for each region NE_avg = sum(NE_sentiment_values)/len(NE_sentiment_values) S_avg = sum(S_sentiment_values)/len(S_sentiment_values) MW_avg = sum(MW_sentiment_values)/len(MW_sentiment_values) W_avg = sum(W_sentiment_values)/len(W_sentiment_values) return [W_avg,S_avg,NE_avg,MW_avg]
def getOverallResult(self, strArray): result = indicoio.personality(strArray) extraversion = [] openness = [] agreeableness = [] conscientiousness = [] for things in result: extraversion.append(things["extraversion"]) openness.append(things["openness"]) agreeableness.append(things["agreeableness"]) conscientiousness.append(things["conscientiousness"]) result = indicoio.political(strArray) libertarian = [] green = [] liberal = [] conservative = [] for things in result: libertarian.append(things["Libertarian"]) green.append(things["Green"]) liberal.append(things["Liberal"]) conservative.append(things["Conservative"]) result = indicoio.sentiment(strArray) t = [ result, libertarian, green, liberal, conservative, extraversion, openness, agreeableness, conscientiousness, ] return t