def get_emotion_sequence(posts, EMO_RESOURCES): emo_seq = [] agg_profile = {} for post in posts: clean_post = text_utils.clean_text(post) sentences = sent_tokenize(clean_post) for sent in sentences: profile, seq = get_emotion_profile_per_post(sent, EMO_RESOURCES) if profile != {}: max_emotion = max(profile.items(), key=operator.itemgetter(1))[0] max_emo_val = max(profile.items(), key=operator.itemgetter(1))[1] if max_emo_val > 0: for key in profile: if profile[key] == max_emo_val: emo_seq.append(max_emotion) if key in agg_profile.keys(): agg_profile[key] = agg_profile[key] + profile[key] else: agg_profile[key] = profile[key] return emo_seq, agg_profile
def cleanText(inpath): output = r'F:\projects-he\nlp-emotion-analysis-Jeloh\nlp-emotion-analysis-core\data\coronavirus_reddit_raw_comments.xls' csv_xls.csv_to_xlsx_pd(inpath, output) wb = load_workbook(inpath) a_sheet indata = xlrd.open_workbook(inpath) book = Workbook(encoding='utf-8') outdata = book.add_sheet('sheet1') table = indata.sheets()[0] nrows = table.nrows ncols = table.ncols for i in range(1, nrows): alldata = table.row_values(i) #result = alldata[1] clean_text_1 = text_utils.clean_text(alldata[1]) outdata.write(i, 1, alldata[0]) outdata.write(i, 2, clean_text_1) outdata.save( r"F:\projects-he\nlp-emotion-analysis-Jeloh\nlp-emotion-analysis-core\data\AfterClean.csv" )
'NEGATION': NEGATION_MAP, 'INTENSIFIERS': INTENSIFIER_MAP, 'PHYSICAL': PHYSICAL } return EMO_RESOURCES results_df = pd.DataFrame() df = pd.read_csv(path) print(df.columns) print(df['emotion'].unique()) EMO_RESOURCES = load_emotion_dictionaries() for i, row in df.iterrows(): # if(i>20):continue row_dict = row.to_dict() # print() sentence = row['utterance'] clean_text_1 = text_utils.clean_text(sentence) emotion_profile, emo_seq = emotion_extractor.get_emotion_profile_per_post( clean_text_1, EMO_RESOURCES) print(emotion_profile) row_dict.update(emotion_profile) print(row_dict) results_df = results_df.append(row_dict, ignore_index=True) results_df.to_csv( r"E:\Projects\Emotion_detection_gihan\finbert_experiments\evaluations\twitter_emotion_evaluations.csv" )
with open('models/emotions/intensifier_vocab.pkl', 'rb') as f: INTENSIFIER_MAP = pickle.load(f) with open('models/emotions/negation_vocab.pkl', 'rb') as f: NEGATION_MAP = pickle.load(f) with open('models/clinical_info/physical.pkl', 'rb') as f: PHYSICAL = pickle.load(f) EMO_RESOURCES = {'EMOTIONS': EMOTION_MAP, 'NEGATION': NEGATION_MAP, 'INTENSIFIERS': INTENSIFIER_MAP, 'PHYSICAL': PHYSICAL} return EMO_RESOURCES if __name__ == '__main__': EMO_RESOURCES = load_emotion_dictionaries() text_1 = 'still i feel very sad about the unexpected incident. hopefully the pain will be less, and i am grateful for..' clean_text_1 = text_utils.clean_text(text_1) emotion_profile = emotion_extractor.get_emotion_profile_per_post(clean_text_1, EMO_RESOURCES) clinical_info = clinical_info_extractor.get_physical_sym_profile(clean_text_1, EMO_RESOURCES) keyphrases = keyphrase_extractor.analyze_keyphrases(clean_text_1) print(keyphrases) print(clinical_info) print(emotion_profile)