def main(): emotion_predictor = EmotionPredictor() folds = 10 num_data = len(clean_data_x) accuracy = [] for fold in range(folds): #train_x, test_x, train_y, test_y = train_test_split(clean_data_x, clean_data_y, test_size=0.2) low = int(fold * (num_data / folds)) high = int(low + (num_data / folds)) print("FOR THIS RUN {} {}".format(low, high)) train_x = np.concatenate((clean_data_x[:low], clean_data_x[high:]), axis=0) train_y = np.concatenate((clean_data_y[:low], clean_data_y[high:]), axis=0) test_x = clean_data_x[low:high] test_y = clean_data_y[low:high] accuracy.append( emotion_predictor.train_and_test_trees(train_x, train_y, test_x, test_y)) print("Accuracy for this round " + str(accuracy[-1]) + "%") emotion_predictor.train_trees(clean_data_x, clean_data_y) avg = 0 for a in accuracy: avg += a avg /= len(accuracy) print("Average accuracy " + str(avg) + "%")
def findFrequencies(self, tweets): model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True) self.__predictions = model.predict_classes(tweets) i = 0 for index, row in self.__predictions.iterrows(): self.__emotionFrequency[row['Emotion']] += 1
def get_emotion_embeddings(input_file): model = EmotionPredictor(classification='plutchik', setting='mc', use_unison_model=True) f = open(input_file) tweets = f.read().splitlines() embeddings = model.embedd(tweets) return embeddings
def cross_validation(k, X, y, random_forest=False, use_confidence=False, num_of_trees=1): accuracies = [] y_pred = [] y_true = [] predictors = get_predictors() emotion_values = get_emotion_values() X_splits = np.array_split(X, k) y_splits = np.array_split(y, k) for i in range(k): X_train, X_test, y_train, y_test = get_train_test_split( X_splits, y_splits, i) emotion_predictor = EmotionPredictor(predictors, random_forest, use_confidence, num_of_trees) emotion_predictor.fit(emotion_values, X_train, y_train) predictions = emotion_predictor.predict(X_test) y_pred = y_pred + predictions # print(y_test) # print(y_true) for elem in y_test: y_true.append(elem) # y_true = y_true + y_test correct = sum([ 1 for i, prediction in enumerate(predictions) if prediction == y_test[i] ]) accuracy = float(correct * 100) / len(y_test) accuracies.append(accuracy) print("Accuracy for round {0} is {1:.2f}".format(i + 1, accuracy)) print( "Cross Validation accuracy has a mean of {0:.2f} and a std of {1:.2f}". format(np.mean(accuracies), np.std(accuracies))) print(" prec, rec, f1") for emotion_number in emotion_values: print("Emotion {0}: {1:.2f}, {2:.2f}, {3:.2f}".format( emotion_number, get_precision(y_true, y_pred, emotion_number), get_recall(y_true, y_pred, emotion_number), get_f1_score(y_true, y_pred, emotion_number))) plt.figure() cfm = confusion_matrix(y_true, y_pred) / k plot_confusion_matrix(cfm, classes=["1", "2", "3", "4", "5", "6"]) plt.show()
self.cursor.execute(query) self.cnxn.commit() return self.cursor #Create the DB connection db = DB() # Pandas presentation options pd.options.display.max_colwidth = 150 # show whole tweet's content pd.options.display.width = 200 # don't break columns # pd.options.display.max_columns = 7 # maximal number of columns # Predictor for Ekman's emotions in multiclass setting. model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True) def streaming(keyword): global db #consumer key, consumer secret, access token, access secret. consumer_key = "yzcqdp3QZmKQrrZfNNTmozUQb" consumer_secret = "3hVqVKHgP10SBhNSlcRAY5na6nBpQco5MqkhaK9ajUpHO1WIq0" access_token = "1085530428063150080-6zJrB79K09jQFgQ34NtmDZRHMIaoJM" access_secret = "ETRshaLITFSEKIhlfFwcpVF8daO1EDJHEAPAQ5bh1NxVZ" #Authenticate the app using Twitter API key and secret auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
import pandas as pd import csv from emotion_predictor import EmotionPredictor # Pandas presentation options pd.options.display.max_colwidth = 150 # show whole tweet's content pd.options.display.width = 200 # don't break columns # pd.options.display.max_columns = 7 # maximal number of columns # Predictor for Ekman's emotions in multiclass setting. model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True) tweets = [] with open( '/home/didier/ASONAMW/PA/Sentiment/Billboard_Music_Awards_2016_Sentiment.txt', 'r') as csvfile: plots = csv.reader(csvfile, delimiter=';') for row in plots: tweets.append(row[1]) predictions = model.predict_classes(tweets) predictions.to_csv( "/home/didier/ASONAMW/PA/Sentiment/Billboard_Music_Awards_2016_Emotions.txt", sep=';', encoding='utf-8') tweets = [] with open( '/home/didier/ASONAMW/PA/Sentiment/Billboard_Music_Awards_2017_Sentiment.txt',
import pickle from emotion_predictor import EmotionPredictor from util import get_clean_data, get_predictors, get_emotion_values X, y = get_clean_data() predictors = get_predictors() emotion_values = get_emotion_values() emotion_predictor = EmotionPredictor(predictors, random_forest=True, use_confidence=True, num_of_trees=200) emotion_predictor.fit(emotion_values, X, y) with open('emotion_predictor.pickle', 'wb') as f: pickle.dump(emotion_predictor, f, pickle.HIGHEST_PROTOCOL)
import argparse from emotion_predictor import EmotionPredictor parser = argparse.ArgumentParser() parser.add_argument('-tweets') args = parser.parse_args() tweets = args.tweets model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True) predictions = model.predict_classes(tweets) print(predictions, '\n') probabilities = model.predict_probabilities(tweets) print(probabilities, '\n') embeddings = model.embedd(tweets) print(embeddings, '\n')
default=False) app.config['DEBUG'] = env.bool('DEBUG', default=False) api = Api(app) # Pandas presentation options pd.options.display.max_colwidth = 150 # show whole tweet's content pd.options.display.width = 200 # don't break columns # pd.options.display.max_columns = 7 # maximal number of columns # http://www.vstechnologies.net/wp-content/uploads/2018/12/IEEEJV_82Emotion-Recognition-on-Twitter-Comparative-Study-and-Training-a-Unison-Model.pdf # Predictor for Ekman's emotions in multiclass setting. # classifications: ekman, plutchik, poms # setting: mc (multiclass) ml (multilabel) model = EmotionPredictor(classification='plutchik', setting='mc', use_unison_model=True) #probabilities = model.predict_probabilities(tweets) #print(probabilities, '\n') #embeddings = model.embed(tweets) #print(embeddings, '\n') def format_predictions(predictions): s = predictions.iloc[0].iloc[1:].sort_values(ascending=False) return list(zip(s, s.index)) @app.route('/', methods=["GET", "POST"]) def index_page():
dbs_name=dbs_name, document_id=document_id, rev=rev) response = requests.delete(url, auth=self.auth) return response.json() if __name__ == '__main__': my_couchdb = couchDb_utils('admin', 'password', 'localhost') # res = my_couchdb.insert_document('demo', {'_id': 'second_record', 'init_balance': 1500}) # res = my_couchdb.get_document('demo', 'second_record') # res = my_couchdb.delete_document('demo', 'second_record', "1-9528dce32655253d363029732a718a23") # print res with open('tinyTwitter.json', 'rb') as f: model = EmotionPredictor(classification='ekman', setting='mc') tiny_twitter = json.loads(f.read()) twitters = tiny_twitter['rows'] for twitter in twitters: text = twitter['doc']['text'] text = str(text) preprocess_tweet(text) # score = sentiment_score(text) # data = { # 'positiveness': score, # 'test_pos':score_new, # 'tweet': text # } # my_couchdb.insert_document('demo2', data)
s='output_final.csv' tweets = ['art',"social","intelligence","game","planet","career","mistake","ready","like","good","plan","deal","me","loyal","know","child"] # i=0 # with open(s) as rf: # reader=csv.reader(rf,delimiter=',') # for row in reader: # tweet=row[2] # tweets.append(tweet) # i=i+1 # noOfTweets = i # for i in range(0,30): # print (tweets[i]) # print (noOfTweets) model = EmotionPredictor(classification='plutchik', setting='mc') # tweets = [ # "Watching the sopranos again from start to finish!", # "Finding out i have to go to the dentist tomorrow", # "I want to go outside and chalk but I have no chalk", # "Stock market hit all time low on friday", # "Stock market hit all time high on friday", # "I believe you", # "My mom wasn't mad", # "Do people have no Respect for themselves or you know others peoples homes", # "www.google.com I am happy", # ] result = model.predict_classes(tweets) probabilities=model.predict_probabilities(tweets)
from emotion_predictor import EmotionPredictor model = EmotionPredictor(classification='plutchik', setting='mc', use_unison_model=True) print("************") predictions = model.predict_classes(["I like to play cricket"]) print(predictions.iloc[0]['Tweet'], '\t', predictions.iloc[0]['Emotion'])
lemma_text.append(lemma2) else: wntag = None lemma2 = word lemma_text.append(lemma2) return (' '.join(lemma_text)) # Pandas presentation options pd.options.display.max_colwidth = 150 # show whole tweet's content pd.options.display.width = 200 # don't break columns # # Predictor for Ekman's emotions in multiclass setting. model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True) cursor = cnx.cursor() cursor2 = cnx.cursor() cursor3 = cnx.cursor() select_text = ("SELECT id,title FROM goal ") cursor.execute(select_text) result = cursor.fetchall() dictlist = [] ids = [] i = 1 for items in result: dictlist.append(cleaner(items[1])) ids.append(items[0]) print(i) i += 1
import pandas as pd import json import os from emotion_predictor import EmotionPredictor if not os.path.exists('data'): os.mkdir('data') # Pandas presentation options pd.options.display.max_colwidth = 100 # show whole tweet's content pd.options.display.width = 200 # don't break columns # pd.options.display.max_columns = 7 # maximal number of columns # Predictor for Ekman's emotions in multiclass setting. model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True) reviews = [] ratings = [] authors = [] hotels = [] #forMoreJsonFiles #pathOfJsonFiles path_to_json = 'json/' for file_name in [file for file in os.listdir(path_to_json) if file.endswith('.json')]: with open(path_to_json + file_name) as json_file: data= [] data=json.load(json_file) # read list inside dict
tweet = emoji.demojize(tweet) tweet = tweet.replace(":"," ") tweet = ' '.join(tweet.split()) return tweet def removeContent(df:str, colname:str, *args): sens ='df[{}]'.format("|".join(["df.{}.str.contains('"'{}'"')".format(colname,x) for x in list(args)])) df_other_list1 = list(eval(sens)[colname]) df_other_list2 = list(df[colname]) ret = list(set(df_other_list2) ^ set(df_other_list1)) result = df[df[colname].isin(ret)] return result tweets_table = pd.read_csv("clean#excitement.csv",usecols=["id","created_at","new_text"]) model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True) predictions = model.predict_classes([process_emoji(tw) for tw in tweets_table.new_text.tolist()]) #print(predictions, '\n') tweets_table["emotion"]=predictions["Emotion"].tolist() result=removeContent(tweets_table.astype(str),"emotion", 'Disgust', 'Fear','Sadness') #print(result) print(result.shape[0]) result=result.drop(['emotion'],axis=1) result.to_csv("cs_excitement.csv", mode='a',encoding="utf-8",index=None)