Esempio n. 1
0
def main():

    emotion_predictor = EmotionPredictor()
    folds = 10
    num_data = len(clean_data_x)
    accuracy = []

    for fold in range(folds):
        #train_x, test_x, train_y, test_y = train_test_split(clean_data_x, clean_data_y, test_size=0.2)
        low = int(fold * (num_data / folds))
        high = int(low + (num_data / folds))
        print("FOR THIS RUN {} {}".format(low, high))
        train_x = np.concatenate((clean_data_x[:low], clean_data_x[high:]),
                                 axis=0)
        train_y = np.concatenate((clean_data_y[:low], clean_data_y[high:]),
                                 axis=0)
        test_x = clean_data_x[low:high]
        test_y = clean_data_y[low:high]
        accuracy.append(
            emotion_predictor.train_and_test_trees(train_x, train_y, test_x,
                                                   test_y))
        print("Accuracy for this round " + str(accuracy[-1]) + "%")

    emotion_predictor.train_trees(clean_data_x, clean_data_y)
    avg = 0
    for a in accuracy:
        avg += a
    avg /= len(accuracy)
    print("Average accuracy " + str(avg) + "%")
Esempio n. 2
0
 def findFrequencies(self, tweets):
     model = EmotionPredictor(classification='ekman',
                              setting='mc',
                              use_unison_model=True)
     self.__predictions = model.predict_classes(tweets)
     i = 0
     for index, row in self.__predictions.iterrows():
         self.__emotionFrequency[row['Emotion']] += 1
Esempio n. 3
0
def get_emotion_embeddings(input_file):
    model = EmotionPredictor(classification='plutchik',
                             setting='mc',
                             use_unison_model=True)
    f = open(input_file)
    tweets = f.read().splitlines()
    embeddings = model.embedd(tweets)
    return embeddings
def cross_validation(k,
                     X,
                     y,
                     random_forest=False,
                     use_confidence=False,
                     num_of_trees=1):
    accuracies = []
    y_pred = []
    y_true = []
    predictors = get_predictors()
    emotion_values = get_emotion_values()

    X_splits = np.array_split(X, k)
    y_splits = np.array_split(y, k)

    for i in range(k):
        X_train, X_test, y_train, y_test = get_train_test_split(
            X_splits, y_splits, i)

        emotion_predictor = EmotionPredictor(predictors, random_forest,
                                             use_confidence, num_of_trees)
        emotion_predictor.fit(emotion_values, X_train, y_train)

        predictions = emotion_predictor.predict(X_test)
        y_pred = y_pred + predictions
        # print(y_test)
        # print(y_true)
        for elem in y_test:
            y_true.append(elem)
        # y_true = y_true + y_test
        correct = sum([
            1 for i, prediction in enumerate(predictions)
            if prediction == y_test[i]
        ])

        accuracy = float(correct * 100) / len(y_test)
        accuracies.append(accuracy)
        print("Accuracy for round {0} is {1:.2f}".format(i + 1, accuracy))

    print(
        "Cross Validation accuracy has a mean of {0:.2f} and a std of {1:.2f}".
        format(np.mean(accuracies), np.std(accuracies)))

    print("          prec, rec, f1")
    for emotion_number in emotion_values:
        print("Emotion {0}: {1:.2f}, {2:.2f}, {3:.2f}".format(
            emotion_number, get_precision(y_true, y_pred, emotion_number),
            get_recall(y_true, y_pred, emotion_number),
            get_f1_score(y_true, y_pred, emotion_number)))

    plt.figure()
    cfm = confusion_matrix(y_true, y_pred) / k
    plot_confusion_matrix(cfm, classes=["1", "2", "3", "4", "5", "6"])
    plt.show()
Esempio n. 5
0
        self.cursor.execute(query)
        self.cnxn.commit()
        return self.cursor


#Create the DB connection
db = DB()

# Pandas presentation options
pd.options.display.max_colwidth = 150  # show whole tweet's content
pd.options.display.width = 200  # don't break columns
# pd.options.display.max_columns = 7      # maximal number of columns

# Predictor for Ekman's emotions in multiclass setting.
model = EmotionPredictor(classification='ekman',
                         setting='mc',
                         use_unison_model=True)


def streaming(keyword):
    global db

    #consumer key, consumer secret, access token, access secret.
    consumer_key = "yzcqdp3QZmKQrrZfNNTmozUQb"
    consumer_secret = "3hVqVKHgP10SBhNSlcRAY5na6nBpQco5MqkhaK9ajUpHO1WIq0"
    access_token = "1085530428063150080-6zJrB79K09jQFgQ34NtmDZRHMIaoJM"
    access_secret = "ETRshaLITFSEKIhlfFwcpVF8daO1EDJHEAPAQ5bh1NxVZ"

    #Authenticate the app using Twitter API key and secret

    auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
Esempio n. 6
0
import pandas as pd
import csv
from emotion_predictor import EmotionPredictor

# Pandas presentation options
pd.options.display.max_colwidth = 150  # show whole tweet's content
pd.options.display.width = 200  # don't break columns
# pd.options.display.max_columns = 7      # maximal number of columns

# Predictor for Ekman's emotions in multiclass setting.
model = EmotionPredictor(classification='ekman',
                         setting='mc',
                         use_unison_model=True)

tweets = []
with open(
        '/home/didier/ASONAMW/PA/Sentiment/Billboard_Music_Awards_2016_Sentiment.txt',
        'r') as csvfile:
    plots = csv.reader(csvfile, delimiter=';')
    for row in plots:
        tweets.append(row[1])

predictions = model.predict_classes(tweets)
predictions.to_csv(
    "/home/didier/ASONAMW/PA/Sentiment/Billboard_Music_Awards_2016_Emotions.txt",
    sep=';',
    encoding='utf-8')

tweets = []
with open(
        '/home/didier/ASONAMW/PA/Sentiment/Billboard_Music_Awards_2017_Sentiment.txt',
import pickle
from emotion_predictor import EmotionPredictor

from util import get_clean_data, get_predictors, get_emotion_values

X, y = get_clean_data()
predictors = get_predictors()
emotion_values = get_emotion_values()

emotion_predictor = EmotionPredictor(predictors, random_forest=True, use_confidence=True, num_of_trees=200)
emotion_predictor.fit(emotion_values, X, y)

with open('emotion_predictor.pickle', 'wb') as f:
    pickle.dump(emotion_predictor, f, pickle.HIGHEST_PROTOCOL)

import argparse
from emotion_predictor import EmotionPredictor

parser = argparse.ArgumentParser()
parser.add_argument('-tweets')
args = parser.parse_args()

tweets = args.tweets

model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True)

predictions = model.predict_classes(tweets)
print(predictions, '\n')

probabilities = model.predict_probabilities(tweets)
print(probabilities, '\n')

embeddings = model.embedd(tweets)
print(embeddings, '\n')
Esempio n. 9
0
                                                  default=False)
app.config['DEBUG'] = env.bool('DEBUG', default=False)

api = Api(app)

# Pandas presentation options
pd.options.display.max_colwidth = 150  # show whole tweet's content
pd.options.display.width = 200  # don't break columns
# pd.options.display.max_columns = 7      # maximal number of columns

# http://www.vstechnologies.net/wp-content/uploads/2018/12/IEEEJV_82Emotion-Recognition-on-Twitter-Comparative-Study-and-Training-a-Unison-Model.pdf
# Predictor for Ekman's emotions in multiclass setting.
# classifications: ekman, plutchik, poms
# setting: mc (multiclass) ml (multilabel)
model = EmotionPredictor(classification='plutchik',
                         setting='mc',
                         use_unison_model=True)

#probabilities = model.predict_probabilities(tweets)
#print(probabilities, '\n')
#embeddings = model.embed(tweets)
#print(embeddings, '\n')


def format_predictions(predictions):
    s = predictions.iloc[0].iloc[1:].sort_values(ascending=False)
    return list(zip(s, s.index))


@app.route('/', methods=["GET", "POST"])
def index_page():
            dbs_name=dbs_name,
            document_id=document_id,
            rev=rev)

        response = requests.delete(url, auth=self.auth)
        return response.json()


if __name__ == '__main__':
    my_couchdb = couchDb_utils('admin', 'password', 'localhost')
    # res = my_couchdb.insert_document('demo', {'_id': 'second_record', 'init_balance': 1500})
    # res = my_couchdb.get_document('demo', 'second_record')
    # res = my_couchdb.delete_document('demo', 'second_record', "1-9528dce32655253d363029732a718a23")
    # print res
    with open('tinyTwitter.json', 'rb') as f:
        model = EmotionPredictor(classification='ekman', setting='mc')
        tiny_twitter = json.loads(f.read())
        twitters = tiny_twitter['rows']
        for twitter in twitters:
            text = twitter['doc']['text']
            text = str(text)
            preprocess_tweet(text)
            # score = sentiment_score(text)

            # data = {
            #     'positiveness': score,
            #     'test_pos':score_new,
            #     'tweet': text
            # }
        # my_couchdb.insert_document('demo2', data)
s='output_final.csv'
tweets = ['art',"social","intelligence","game","planet","career","mistake","ready","like","good","plan","deal","me","loyal","know","child"]
# i=0
# with open(s) as rf:
# 	reader=csv.reader(rf,delimiter=',')
# 	for row in reader:
# 		tweet=row[2]
# 		tweets.append(tweet)
# 		i=i+1

# noOfTweets = i
# for i in range(0,30):
# 	print (tweets[i])

# print (noOfTweets)
model = EmotionPredictor(classification='plutchik', setting='mc')

# tweets = [
#     "Watching the sopranos again from start to finish!",
#     "Finding out i have to go to the  dentist tomorrow",
#     "I want to go outside and chalk but I have no chalk",
#     "Stock market hit all time low on friday",
#     "Stock market hit all time high on friday",  
#     "I believe you",
#     "My mom wasn't mad",
#     "Do people have no Respect for themselves or you know others peoples homes",
#     "www.google.com I am happy",
# ]

result = model.predict_classes(tweets)
probabilities=model.predict_probabilities(tweets)
Esempio n. 12
0
from emotion_predictor import EmotionPredictor

model = EmotionPredictor(classification='plutchik',
                         setting='mc',
                         use_unison_model=True)

print("************")
predictions = model.predict_classes(["I like to play cricket"])
print(predictions.iloc[0]['Tweet'], '\t', predictions.iloc[0]['Emotion'])
            lemma_text.append(lemma2)

        else:
            wntag = None
            lemma2 = word
            lemma_text.append(lemma2)
    return (' '.join(lemma_text))


# Pandas presentation options
pd.options.display.max_colwidth = 150  # show whole tweet's content
pd.options.display.width = 200  # don't break columns
#
# Predictor for Ekman's emotions in multiclass setting.
model = EmotionPredictor(classification='ekman',
                         setting='mc',
                         use_unison_model=True)
cursor = cnx.cursor()
cursor2 = cnx.cursor()
cursor3 = cnx.cursor()
select_text = ("SELECT id,title FROM goal ")
cursor.execute(select_text)
result = cursor.fetchall()
dictlist = []
ids = []
i = 1
for items in result:
    dictlist.append(cleaner(items[1]))
    ids.append(items[0])
    print(i)
    i += 1
Esempio n. 14
0
import pandas as pd
import json
import os
from emotion_predictor import EmotionPredictor

if not os.path.exists('data'):
    os.mkdir('data')

# Pandas presentation options
pd.options.display.max_colwidth = 100   # show whole tweet's content
pd.options.display.width = 200          # don't break columns
# pd.options.display.max_columns = 7      # maximal number of columns


# Predictor for Ekman's emotions in multiclass setting.
model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True)

reviews = []
ratings = []
authors = []
hotels = []

#forMoreJsonFiles

#pathOfJsonFiles
path_to_json = 'json/'
for file_name in [file for file in os.listdir(path_to_json) if file.endswith('.json')]:
  with open(path_to_json + file_name) as json_file:
    data= []
    data=json.load(json_file)
    # read list inside dict
    tweet = emoji.demojize(tweet)
    tweet = tweet.replace(":"," ")
    tweet = ' '.join(tweet.split())

    return tweet

def removeContent(df:str, colname:str, *args):
    sens ='df[{}]'.format("|".join(["df.{}.str.contains('"'{}'"')".format(colname,x) for x in list(args)]))
    df_other_list1 = list(eval(sens)[colname])
    df_other_list2 = list(df[colname])
    ret = list(set(df_other_list2) ^ set(df_other_list1))
    result = df[df[colname].isin(ret)]
    return result    


tweets_table = pd.read_csv("clean#excitement.csv",usecols=["id","created_at","new_text"])

model = EmotionPredictor(classification='ekman', setting='mc', use_unison_model=True)
predictions = model.predict_classes([process_emoji(tw) for tw in tweets_table.new_text.tolist()])
#print(predictions, '\n')

tweets_table["emotion"]=predictions["Emotion"].tolist()

result=removeContent(tweets_table.astype(str),"emotion", 'Disgust', 'Fear','Sadness')

#print(result)
print(result.shape[0])
result=result.drop(['emotion'],axis=1)

result.to_csv("cs_excitement.csv", mode='a',encoding="utf-8",index=None)