def emotion_storyline(): empty_synopsis = "It looks like we don't have a Synopsis for this title yet." file_list = get_dataset_list() for file in file_list: with open(file) as data_file: data = json.load(data_file) for item in data: id = item['id'] with open('../synopsis/' + id + '.txt', 'r') as f: synopsis = f.read() if empty_synopsis in synopsis: script_txt = item['meta'].get('storyline') if script_txt is None: script_txt = item.get('description') else: script_txt = synopsis if len(script_txt)<=0: continue text_object = NRCLex(script_txt) raw_emotion_scores = text_object.raw_emotion_scores emotions = raw_emotion_scores.items() s = sum(raw_emotion_scores.values()) for key, value in emotions: item[key] = value/s with open(file, 'w') as result_file: json.dump(data, result_file) return
def get_sentiment_breakdown(string): text_object = NRCLex(string) raw_scores = text_object.raw_emotion_scores affect_dict = text_object.affect_dict emotion_total = 0 sentiment_total = 0 emotions = {} sentiments = {} for key, value in raw_scores.items(): if (key != "positive" and key != "negative"): emotion_total += value else: sentiment_total += value for key, value in raw_scores.items(): if (key != "positive" and key != "negative"): emotions[key] = value / emotion_total else: sentiments[key] = value / sentiment_total return { "emotions": emotions, "sentiments": sentiments, "raw_scores": raw_scores, "affect_dict": affect_dict }
def get_emotions(df): emotions = { 'fear': [], 'anger': [], 'anticipation': [], 'trust': [], 'surprise': [], 'positive': [], 'negative': [], 'sadness': [], 'disgust': [], 'joy': [] } for index, row in df.iterrows(): tweet = row['Text'] text_object = NRCLex(tweet) length_sentence = len(tweet.split()) absolute_numbers = text_object.raw_emotion_scores #just a dictionary, similiar to one above for emot in emotions: try: val = absolute_numbers[emot] / length_sentence except: val = 0 emotions[emot].append(val) for e in emotions: df[e] = emotions[e] df.to_csv( '/Users/gabriel/PycharmProjects/Finance/Data/4B-Ungrouped non-ML sentiment/FB_1d_no_ML.csv' ) return True
def emotions(books, name): word = [word for word in books['Text'] if word not in stop_words_file] word = str([cell.encode('utf-8') for cell in word]) emotions = NRCLex(word) emotions = emotions.raw_emotion_scores emotions = pd.DataFrame(emotions, index=[0]) emotions = pd.melt(emotions) emotions.columns = ('Emotions', 'Count') emotions = emotions.sort_values('Count') #plot emotions plt.figure(figsize=(12, 6)) plt.title(('{} and its emotional effects').format(name)) sns.set_style('dark') sns.set_context(context='notebook', font_scale=1.5) sns.barplot(x='Emotions', y='Count', data=emotions[0:8], palette='viridis')
def emotions(script) : totalScript = script[0] + script[1] text_object = NRCLex(totalScript); #Return words list. text_object.words #Return sentences list. text_object.sentences #Return affect list. text_object.affect_list #Return affect dictionary. text_object.affect_dict #Return raw emotional counts. This one gives the actual frequency #of each emotion print(text_object.raw_emotion_scores) #Return highest emotions. print(text_object.top_emotions) print(text_object.affect_frequencies) frequencies = text_object.affect_frequencies SUM = list(frequencies.values()) return SUM
def get_nrc_sentiments(df): df['fear'] = 0 df['anger'] = 0 df['anticip'] = 0 df['trust'] = 0 df['surprise'] = 0 df['positive'] = 0 df['negative'] = 0 df['sadness'] = 0 df['disgust'] = 0 df['joy'] = 0 df['text'] = df['text'].fillna("") df.reset_index(drop=True, inplace=True) for j in tqdm(range(df.shape[0])): d = NRCLex(df.loc[j, 'text']).affect_frequencies df.loc[j, 'fear'] = d['fear'] df.loc[j, 'anger'] = d['anger'] df.loc[j, 'anticip'] = d['anticip'] df.loc[j, 'trust'] = d['trust'] df.loc[j, 'surprise'] = d['surprise'] df.loc[j, 'positive'] = d['positive'] df.loc[j, 'negative'] = d['negative'] df.loc[j, 'sadness'] = d['sadness'] df.loc[j, 'disgust'] = d['disgust'] df.loc[j, 'joy'] = d['joy'] return df
def get_emotion_dict(): FILES = { "Hans": Hans.words, "Fletcher": Fletcher.words, "Plankton": Plankton.words, "Snowball": Snowball.words, "HarleyQuinn": HarleyQuinn.words, "Jigsaw": Jigsaw.words, "Joker": Joker.words, "Vader": Vader.words, "Thanos": Thanos.words, "HannibalLecter": HannibalLecter.words, "JimMoriarty": JimMoriarty.words, "Scar": Scar.words } emotion_dict = {} for fn, words in FILES.items(): word_dict = get_vader_score(words) polar_list = get_polar(word_dict) emotion_list = [] for word in polar_list: emotion = NRCLex(word) emo = emotion.affect_dict if emo: emotion_list.append(emo) emotion_dict[fn] = emotion_list return emotion_dict
def get_score(text, search_for, exclude): for exclude_word in exclude: if re.search("\\b" + exclude_word + "\\b", text): return None for include_word in search_for: if re.search("\\b" + include_word + "\\b", text): return NRCLex(text).affect_frequencies['anger'] return None
def classify_emotion(data): emotion_list = [] for word in data: emotion = NRCLex(word) top_emo = emotion.top_emotions if top_emo[0][1] != 0.0: emotion_list.append(top_emo) return emotion_list
def main(): #lines = f.readlines() lines = ['hello', 'i\'m happy', 'i enjoy watching netflix'] for line in lines: l = preprocessing(line) score.append(NRCLex( l).raw_emotion_scores) #[('anticipation', 0.3333333333333333)] print(score) return score
def __getEmotions(text): """ Get emotion affect frequencies for the input text. The values returned are continuous numbers between 0 & 1. Values are returned for the following emotions: fear, anger, trust, surprise, sadness, disgust, joy, anticipation :param text: the input text :return: a dict with a continuous value between 0-1 for each emotion """ textObject = NRCLex(text) emtionsDict = textObject.affect_frequencies if 'anticipation' not in emtionsDict: emtionsDict['anticipation'] = 0.0 [emtionsDict.pop(key) for key in ['positive', 'negative', 'anticip']] return emtionsDict
def get_features(song): """use NRCLex to get emotional scores @ the song level """ features = { 'anger': 0, 'anticipation': 1, 'disgust': 2, 'fear': 3, 'joy': 4, 'sadness': 5, 'surprise': 6, 'trust': 7, 'negative': 8, 'positive': 9 } emotion_d = NRCLex(song).raw_emotion_scores vec = np.zeros(10) for k, v in emotion_d.items(): vec[features[k]] = (v / len(song.split())) return vec
def get_emotion_scores(text): """ Input ---------- text(string) Returns ---------- emotions_dictionary (list): Two items in the list: 1) a dictionary of scores for each emotions and 2) a dictionary of emotions and words from the text associated with the emotion """ # preprocess the text text = lowercase(text) text = strip_accents(text) text = strip_punctuation(text) text = strip_url(text) # instantiate the text object text_object = NRCLex(text) # get scores for: fear, anger, trust, surprise, sadness, disgust, joy, anticipation emotion_scores_dictionary = text_object.affect_frequencies keys = ['fear', 'anger', 'trust', 'surprise', 'sadness', 'disgust', 'joy', 'anticipation'] filtered_emotion_scores_dictionary = dict((k, round(emotion_scores_dictionary[k], 4)) for k in keys if k in emotion_scores_dictionary) # get a dictionary of the words corresponding to each emotion words_emotions_dictionary = text_object.affect_dict # inverse the keys and values in the dictionary so the keys are emotions and values are the words emotions_words_dictionary = {} for k,v in words_emotions_dictionary.items(): for x in v: emotions_words_dictionary.setdefault(x,[]).append(k) # get emotion/word association for: fear, anger, trust, surprise, sadness, disgust, joy, anticipation filtered_emotions_words_dictionary = dict((k, emotions_words_dictionary[k]) for k in keys if k in emotions_words_dictionary) emotions_output = [filtered_emotion_scores_dictionary, filtered_emotions_words_dictionary] return emotions_output
def multiCategorySentiment(text): text_object = NRCLex(text) # countDict = dict({ # 'fear': 0, # 'sadness': 0, # 'negative': 0, # 'disgust': 0, # 'anticip':0, # 'joy': 0, # 'trust': 0, # 'positive': 0, # 'surprise': 0, # 'anger': 0 # }) # countDict.update(text_object.raw_emotion_scores) # total = sum(countDict.values()) # if total == 0: # return countDict # norm = {k: v/total for k, v in countDict.items()} return text_object.affect_frequencies #norm
def find_main_emotion(text): emotion = NRCLex(text) if len(emotion.top_emotions) == 1: s = emotion.top_emotions[0][0] listToStr=''.join(map(str, s)) return listToStr elif len(emotion.top_emotions) == 2: s = emotion.top_emotions[0][0] + ' ' + emotion.top_emotions[1][0] listToStr=''.join(map(str, s)) return listToStr else: max = emotion.top_emotions[0][1] max_str = emotion.top_emotions[0][0] listToStr=''.join(map(str, max_str)) for i in range(len(text)): if emotion.top_emotions[i+1][i+1] > max: max = emotion.top_emotions[i+1][i+1] max_str = emotion.top_emotions[i+1][i+1] listToStr = ''.join(map(str, max_str)) return listToStr
def populateEmotions(self, script): totalScript = script[0] + script[1] text_object = NRCLex(totalScript) #emotion_scores = text_object.raw_emotion_scores frequency_of_emotions = list(text_object.affect_frequencies.values()) self.fear.append(frequency_of_emotions[0]) self.anger.append(frequency_of_emotions[1]) self.anticipation.append(frequency_of_emotions[2]) self.trust.append(frequency_of_emotions[3]) self.surprise.append(frequency_of_emotions[4]) self.positive.append(frequency_of_emotions[5]) self.negative.append(frequency_of_emotions[6]) self.sadness.append(frequency_of_emotions[7]) self.disgust.append(frequency_of_emotions[8]) self.joy.append(frequency_of_emotions[9]) # getting polarity and subjectivity blob_object = TextBlob(totalScript) sentiments = list(blob_object.sentiment) self.polarity.append(sentiments[0]) self.subjectivity.append(sentiments[1])
# Generate emotions per comments and store their result overall_scores = { 'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 'anticipation': 0.0, 'trust': 0.0, 'surprise': 0.0, 'positive': 0.0, 'negative': 0.0, 'sadness': 0.0, 'disgust': 0.0, 'joy': 0.0 } for comment in comments_per_event[event]: comment_emotion = NRCLex(comment) for emotion in comment_emotion.affect_frequencies: overall_scores[emotion] += comment_emotion.affect_frequencies[ emotion] #print(overall_scores) #Sum positive and negative emotions emotion_types = { 'positive': ['anticipation', 'trust', 'surprise', 'positive', 'joy'], 'negative': ['fear', 'anger', 'negative', 'sadness', 'disgust'] } emotion_scores = {'positive': 0.0, 'negative': 0.0} for emotion_type in emotion_types: emotions = emotion_types[emotion_type] for emotion in emotions:
def get_sentiment_breakdown(string): text_object = NRCLex(string) frequencies = text_object.affect_frequencies return frequencies
def app(): st.balloons() st.markdown("# Visualizations :art:") menu = ["Number of Tweets per Day", "Number of Retweets per Day", "Number of Likes per Day", "Most Common Tweets", "Sentiment Scores", "Common Entities"] choice = st.selectbox("View", menu) if choice == "Number of Retweets per Day": fig1 = px.histogram(covid_data, x="datetime", color="retweets", title="Number of Retweets Per Day") st.write(fig1) elif choice == "Number of Likes per Day": fig2 = px.histogram(covid_data, x="datetime", color="likes" ,title="Likes Per Day") st.write(fig2) elif choice == "Most Common Tweets": st.write("Word Cloud for Most Common Tweets") stop_words = get_stop_words('english') concat_quotes = ' '.join( [i for i in covid_data.text_without_stopwords.astype(str)]) #print(concat_quotes[:10]) stylecloud.gen_stylecloud( text=concat_quotes, icon_name='fab fa-twitter', palette='cartocolors.qualitative.Bold_9', background_color='white', output_name='tweets.png', collocations=False, custom_stopwords=stop_words ) #Displaying image from a file Image(filename="tweets.png", width=780, height=780) st.image("tweets.png") #Display the most common words after stemming # #Create separate columns table_col, input_col = st.beta_columns([3,2]) covid_data['text_stem'] = covid_data['text_stem'].apply(lambda x:str(x).split()) #Use tokenize or split, smae results top = Counter([item for sublist in covid_data['text_stem'] for item in sublist]) #Counts the frequency of words with input_col: top_n = st.slider("How many of the common words do you want to see?", 0, 5, 10) temp = pd.DataFrame(top.most_common(top_n)) temp.columns = ['common_words', 'count'] #temp = temp.reset_index() with table_col: fig = px.pie(temp, values='count', names='common_words', title='Top Common Words', hover_data=['common_words'], color_discrete_sequence=px.colors.qualitative.G10) fig.update_layout(showlegend=False, width=450, height=450) st.write(fig) # colorscale = [[0, '#4d004c'], [.5, '#f2e5ff'], [1, '#ffffff']] # fig = ff.create_table(temp, height_constant=15, colorscale=colorscale) # st.write(fig) #st.write(temp.style.background_gradient(cmap = 'Blues')) elif choice == "Sentiment Scores": pie_col, input_col = st.beta_columns([3,2]) #Convert the text_stem column to string type. nrclext only takes input of type str covid_data['text_stem'] = covid_data['text_stem'].astype(str) #Create a text object text_object = NRCLex(' '.join(covid_data['text_stem'])) #Create a list from the text object sentiment_scores = pd.DataFrame(list(text_object.raw_emotion_scores.items())) #Create a dataframe of two columns sentiment_scores = sentiment_scores.rename(columns={0: "Sentiment", 1: "Count"}) with input_col: num_n = st.slider("Change Pie Chart Values Here", 0, 5, 10) sentiment_scores = sentiment_scores.head(num_n) btn = st.button("Show Table") colorscale = [[0, '#272D31'], [.5, '#ffffff'], [1, '#ffffff']] font=['#FCFCFC', '#00EE00', '#008B00'] if btn: fig = ff.create_table(sentiment_scores, colorscale=colorscale, font_colors=font) st.write(fig) with pie_col: fig = px.pie(sentiment_scores, values='Count', names='Sentiment', title='Top Emotional Affects', hover_data=['Sentiment'], color_discrete_sequence=px.colors.qualitative.Dark24) fig.update_traces(textposition='inside', textinfo='percent+label') fig.update_layout(showlegend = False, width = 450, height = 450, font=dict(color='#383635', size=15) ) st.write(fig) #Create a dataframe with a dictionary of the sentiments st.title("Table Showing Words & Sentiments") sentiment_words = pd.DataFrame(list(text_object.affect_dict.items()),columns = ['words','sentiments']) num_o = st.slider("Change table size", 0, 50, 100) sentiment_words = sentiment_words.head(num_o) fig = go.Figure(data=[go.Table(columnwidth=[1, 2], header=dict(values= list(sentiment_words[['words', 'sentiments']].columns), fill_color='maroon', align=['left', 'center'], height=40, font=dict(color='white', size=18)), cells=dict(values=[sentiment_words.words, sentiment_words.sentiments], fill_color='lightseagreen', align='left')) ]) fig.update_layout(margin=dict(l=5, r=5, b=10, t=10)) st.write(fig) elif choice == "Common Entities": st.write("Word Cloud for Most Common Entities") # remove duplicate claims (Not really needed since dropped already) words = covid_data.text_stem.unique() # NER list we'll use - Perhaps could be expanded? nlp = en_core_web_sm.load() #nlp = spacy.load(en_core_web_sm) corpus = list(nlp.pipe(words[:700])) all_ents = defaultdict(int) for i, doc in enumerate(corpus): #print(i,doc) for ent in doc.ents: all_ents[str(ent)] += 1 sorted_ents = pd.DataFrame(sorted(all_ents.items(), key=operator.itemgetter(1), reverse=True),columns = ['entities','count']) stop_words = get_stop_words('english') hashtags = sorted_ents['entities'].dropna().tolist() unique_entities=(" ").join(hashtags) # concat_quotes = ' '.join( # [i for i in sorted_ents.entities.astype(str)]) # #print(concat_quotes[:10]) stylecloud.gen_stylecloud( text=unique_entities, #file_path='concat_quotes', icon_name='fas fa-comments', palette='cartocolors.qualitative.Prism_8', background_color='white', output_name='entities.png', collocations=False, custom_stopwords=stop_words ) #Displaying image from a file Image(filename="entities.png", width=780, height=780) st.image("entities.png") else: fig3 = px.histogram(covid_data, x="datetime", title="Number of Tweets Per Day") st.write(fig3)
def QueryTwitter(search_string): #Fetching the Configuration Settings key = "CRn9TI5ITd3OLA548dzNfzRq2" secret = "A129CIOeLb6Z8FxXO1aFWOWOyERfHvnEN8oD5uxg5ED6nXfRBF" access_token = "1181121350183706626-5b5mWWAvrh9DLJGEIt93ht7KCjeWdg" access_secret = "vMI1tZNrHWjWPDgver7U6Oj9Fo4C5rKOLxdiTR4ddhtxT" #Authenticating :: #Receiving Access Tokens auth = tweepy.OAuthHandler(consumer_key=key,consumer_secret=secret) auth.set_access_token(access_token, access_secret) #Instantiating the API with our Access Token api = tweepy.API(auth) tweet_list = [] for tweet in limit_handled(tweepy.Cursor(api.search,q=search_string).items(50)): tweet_list.append(tweet) #We now extract details from the tweet and get the resultant DataFrame tweet_Data = filter_tweets(tweet_list) #spcae for additional code #Here we can see that at many places we have '@names', which is of no use, since it don't have any meaning, So needs to be removed. def remove_pattern(text, pattern_regex): r = re.findall(pattern_regex, text) for i in r: text = re.sub(i, '', text) return text # We are keeping cleaned tweets in a new column called 'tidy_tweets' tweet_Data['tidy_tweets'] = np.vectorize(remove_pattern)(tweet_Data['translate'], "@[\w]*: | *RT*") cleaned_tweets = [] for index, row in tweet_Data.iterrows(): # Here we are filtering out all the words that contains link words_without_links = [word for word in row.tidy_tweets.split() if 'http' not in word] cleaned_tweets.append(' '.join(words_without_links)) tweet_Data['tidy_tweets'] = cleaned_tweets tweet_Data['absolute_tidy_tweets'] = tweet_Data['tidy_tweets'].str.replace("[^a-zA-Z# ]", "") stopwords_set = set(stopwords.words('english')) cleaned_tweets = [] for index, row in tweet_Data.iterrows(): # filerting out all the stopwords words_without_stopwords = [word for word in row.absolute_tidy_tweets.split() if not word in stopwords_set and '#' not in word.lower()] # finally creating tweets list of tuples containing stopwords(list) and sentimentType cleaned_tweets.append(' '.join(words_without_stopwords)) tweet_Data['absolute_tidy_tweets'] = cleaned_tweets tokenized_tweet = tweet_Data['absolute_tidy_tweets'].apply(lambda x: x.split()) for i, tokens in enumerate(tokenized_tweet): tokenized_tweet[i] = ' '.join(tokens) tweet_Data['absolute_tidy_tweets'] = tokenized_tweet def generate_wordcloud_1(all_words): wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=100, relative_scaling=0.5, colormap='Dark2').generate(all_words) plt.figure(figsize=(14, 10)) plt.imshow(wordcloud, interpolation="bilinear") plt.axis('off') #plt.title("POSITIVE WORD CLOUD") #plt.show() plt.savefig('C:\\Users\\Admin\\Desktop\\twitter\\static\\poswc.png') def generate_wordcloud_2(all_words): wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=100, relative_scaling=0.5, colormap='Dark2').generate(all_words) plt.figure(figsize=(14, 10)) plt.imshow(wordcloud, interpolation="bilinear") plt.axis('off') #plt.title("NEGATIVE WORD CLOUD") #plt.show() plt.savefig('C:\\Users\\Admin\\Desktop\\twitter\\static\\negwc.png') all_words = ' '.join([text for text in tweet_Data['absolute_tidy_tweets'][tweet_Data.sentiments_group == 'positive']]) generate_wordcloud_1(all_words) all_words = ' '.join([text for text in tweet_Data['absolute_tidy_tweets'][tweet_Data.sentiments_group == 'negative']]) generate_wordcloud_2(all_words) # function to collect hashtags def hashtag_extract(text_list): hashtags = [] # Loop over the words in the tweet for text in text_list: ht = re.findall(r"#(\w+)", text) hashtags.append(ht) return hashtags def generate_hashtag_freqdist(hashtags): a = nltk.FreqDist(hashtags) d = pd.DataFrame({'Hashtag': list(a.keys()), 'Count': list(a.values())}) # selecting top 15 most frequent hashtags d = d.nlargest(columns="Count", n = 25) plt.figure(figsize=(16,7)) ax = sns.barplot(data=d, x= "Hashtag", y = "Count") plt.xticks(rotation=80) ax.set(ylabel = 'Count') #plt.show() plt.savefig('C:\\Users\\Admin\\Desktop\\twitter\\static\\hashtag.png') hashtags = hashtag_extract(tweet_Data['tidy_tweets']) hashtags = sum(hashtags, []) generate_hashtag_freqdist(hashtags) from nrclex import NRCLex words = ' '.join([text for text in tweet_Data['absolute_tidy_tweets']]) text_object = NRCLex(words) nrc=(text_object.affect_frequencies) e = pd.DataFrame({'Emotion': list(nrc.keys()), 'Frequency': list(nrc.values())}) plt.figure(figsize=(16,7)) ax = sns.barplot(data=e, x= "Emotion", y = "Frequency") plt.xticks(rotation=80) ax.set(ylabel = 'Frequency') #plt.show() plt.savefig('C:\\Users\\Admin\\Desktop\\twitter\\static\\nrc_lexicon.png') (doughnut,sentiment_map,sources_plot,sentiment_pie,retweet_table) = make_maps(tweet_Data) #return tweet_Data return (doughnut,sentiment_map,sources_plot,sentiment_pie,retweet_table)
def emotion_plotter(text): # text = "Astronaut science is best most perfect great thing" print(text) # ---EMOTION ANALYSIS--- # Tokenize text text_tokens = [] text_tokens.append(regexp_tokenize(text.lower(), "[\w']+")) # print(text_tokens[0]) # Remove stop words and assign part of speech filtered_tokens = [] for token in text_tokens[0]: if token not in stopwords.words('english'): filtered_tokens.append(pos_tag(word_tokenize(token))) # print(filtered_tokens) # Lemmatize words (identify base words from other forms of the word) lemmatizer = WordNetLemmatizer() lemmatized_tokens = [] morphy_tag = {'NN':'n', 'JJ':'a', 'VB':'v', 'RB':'r'} for token in filtered_tokens: for word, tag in token: if tag in morphy_tag.keys(): morphy_pos = morphy_tag[tag] else: morphy_pos = '' if morphy_pos in ["a", "n", "v"]: lemmatized_tokens.append(lemmatizer.lemmatize(word, pos=morphy_pos)) else: lemmatized_tokens.append(lemmatizer.lemmatize(word)) # print(lemmatized_tokens) # Join lemmatized words back into sentence lemmatized_text = " ".join(lemmatized_tokens) print(lemmatized_text) # Get emotions text_object = NRCLex(lemmatized_text) # print(text_object.words) # print(text_object.affect_dict) # print(text_object.raw_emotion_scores) # print(text_object.affect_frequencies) # Create emotion data for plot emotion_data = {"words": [], "emotions": []} for word, emotions in text_object.affect_dict.items(): for emotion in emotions: emotion_data["words"].append(word) emotion_data["emotions"].append(emotion.title()) print(emotion_data) emotion_trace = { "x": emotion_data["words"], "y": emotion_data["emotions"], "mode": "markers", "marker": {"size": 40, "color": "midnightblue"} } emotion_plot_data = [emotion_trace] emotion_plot_layout = { "title": {"text": "Emotions Detected in Your Headline"}, "xaxis": { "type": "category", "title": "Your Words", }, "yaxis": { "type": "category", "categoryorder": "array", "categoryarray": [ "Disgust", "Anger", "Fear", "Sadness", "Negative", "Anticipation", "Positive", "Surprise", "Trust", "Joy", ] } } return emotion_plot_data, emotion_plot_layout
content_type='audio/mp3', model='en-US_NarrowbandModel', continious=True).get_result() #print(res) #transcript ######################################################################## #write transcript to .txt file res_string = str(res) print('text --->', res_string[81:-26]) string = res_string[81:-26] ############################################################# # Emotions of text from nrclex import NRCLex #text_file = open('sad.txt') #text = text_file.read() text = string text_object = NRCLex(text) #print(dir('text_object')) print(text_object.raw_emotion_scores) print(text_object.top_emotions) print(text_object.affect_frequencies)
])) all_song_lyrics['lemma_count'] = all_song_lyrics['lyrics_lemmatized'].apply( lambda x: len(x.split(' '))) all_song_lyrics['unique_lemmas_on_song'] = all_song_lyrics[ 'lyrics_lemmatized'].apply(lambda x: len(set(x.split(' ')))) # + [markdown] code_folding=[] # ### NRC Lexicon # - from nrclex import NRCLex #Instantiate text object (for best results, 'text' should be unicode). all_song_lyrics.loc[:, 'nrc_emotions'] = all_song_lyrics['lyrics_clean'].apply( lambda x: NRCLex(x).raw_emotion_scores) all_song_lyrics.loc[:, 'nrc_emotions_total_words'] = all_song_lyrics[ 'lyrics_clean'].apply(lambda x: len(NRCLex(x).words)) all_song_lyrics.head() # Save dataset until here all_song_lyrics.to_csv('./all_song_lyrics_info.csv', index=False) #flair_sentiment_df.head() # ### Sentiments classifier = TextClassifier.load('sentiment') all_song_lyrics.head() #all_song_lyrics.index.tolist()
if review: # st.markdown('## Name Entity Recognition (NER)') docx = nlp(review) spacy_streamlit.visualize_ner(docx, show_table=False, labels=nlp.get_pipe('ner').labels) col1, _, col2, _, col3 = st.beta_columns([8, 1, 6, 1, 6]) col1.markdown('### Processes text') review_p = process_review(review) col1.markdown(review_p) ## Emotional Sentiment col2.markdown('### Emotions') col2.table(pd.Series(NRCLex(review).raw_emotion_scores)) col3.markdown('### Sentiment') blob = TextBlob(review) col3.markdown('**Note:** Positive = 1 & Negative = 0') col3.markdown(f'Polarity : {blob.sentiment.polarity:0.2f}') with col3.beta_expander('Looks like a wrong prediction?'): correct = st.selectbox('Select the correct label', ('None', 'Positive', 'Neutral', 'Negative')) if correct != 'None': st.balloons() elif tab == 'Aggregated Stats': st.markdown('## Aggregated Stats') df = load_data()
'fear': 0.0, 'anger': 0.0, 'anticip': 0.0, 'anticipation': 0.0, 'trust': 0.0, 'surprise': 0.0, 'positive': 0.0, 'negative': 0.0, 'sadness': 0.0, 'disgust': 0.0, 'joy': 0.0 } # Use index to find corresponding uncleand comment for commend_id in ordered_documents[event][topic]: comment = comments_per_event_uncleaned[event][commend_id] comment_emotion = NRCLex(comment) for emotion in comment_emotion.affect_frequencies: overall_scores[emotion] += comment_emotion.affect_frequencies[ emotion] #Sum positive and negative emotions emotion_types = { 'positive': ['anticipation', 'trust', 'surprise', 'positive', 'joy'], 'negative': ['fear', 'anger', 'negative', 'sadness', 'disgust'] } emotion_scores = {'positive': 0.0, 'negative': 0.0} for emotion_type in emotion_types: emotions = emotion_types[emotion_type] for emotion in emotions:
def _process_fragment(fragment, xhtml_dict): for key, val in list(fragment.items()): fragment[key]['text'] = xhtml_dict[key] fragment[key]['emotion'] = NRCLex(xhtml_dict[key]).affect_frequencies return fragment
def analyze_album(album_id): tracks = [] track_ids = [] results = sp.album_tracks(album_id) tracks.extend(results['items']) while results['next']: results = sp.next(results) tracks.extend(results['items']) for track in tracks: track_ids.append(track['id']) analysis_json = sp.audio_features(tracks=track_ids) analysis_json = list(filter(None, analysis_json)) tracks_json = sp.album_tracks(album_id)["items"] tracks_json = list(filter(None, tracks_json)) analysis_df = json_normalize(analysis_json) tracks_df = json_normalize(tracks_json) df = analysis_df.merge(tracks_df, on='id', how='inner') album_name = sp.album(album_id)["name"] album_name = clean_lyrics(album_name) release_date = sp.album(album_id)["release_date"] artist = json_normalize( sp.album_tracks(album_id)["items"][0]["artists"])["name"][0] keys = { 0: 'C', 1: 'C#', 2: 'D', 3: 'D#', 4: 'E', 5: 'F', 6: 'F#', 7: 'G', 8: 'G#', 9: 'A', 10: 'A#', 11: 'B' } df["key"] = df['key'].map(keys, na_action='ignore') mode = {0: 'Minor', 1: 'Major'} df["mode"] = df['mode'].map(mode, na_action='ignore') df["duration"] = (df["duration_ms_x"] / (1000 * 60)) % 60 df['track'] = df['track_number'] df = df.loc[df["disc_number"] == 1] df = df.set_index('track_number') df["album_id"] = album_id sent_score = [] song_lyrics = [] new_titles = [] genius_url = [] genius_songid = [] keywords = [] affect_freq = [] msttr = [] lexical_depth = [] cliche_word_perc = [] cliche_total_count = [] df["metacritic"] = search_metacritic(artist, album_name) for title in df["name"]: try: title = title.split("- Remaster", 1)[0] title = title.split("[Remaster", 1)[0] title = title.split("(Remaster", 1)[0] title = title.split("- Mono", 1)[0] title = title.split("(Mono", 1)[0] title = title.split("[Mono", 1)[0] title = title.split("(with", 1)[0] title = title.split("[with", 1)[0] title = title.split("(featuring", 1)[0] title = title.split("- featuring", 1)[0] title = title.split("[featuring", 1)[0] new_titles.append(title) remote_song_info = request_song_info(title, artist) matching_artist = remote_song_info['result']['primary_artist'][ 'name'] matching_artist = matching_artist.lower() ratio = levenshtein_ratio_and_distance(artist.lower(), matching_artist, ratio_calc=True) if ratio > .6: url = remote_song_info['result']['url'] genius_url.append(url) genius_songid.append(str(remote_song_info['result']['id'])) lyrics = get_lyrics(url) flt = ld.flemmatize(clean_lyrics(lyrics)) clean_flt = [x for x in flt if x.lower() not in excluded_words] spacy_stopwords = list(spacy.lang.en.stop_words.STOP_WORDS) depth = sum( [1 for x in clean_flt if x.lower() not in spacy_stopwords]) cliche_count = sum( [1 for x in clean_flt if x.lower() in cliche_words]) cliche_perc = cliche_count / depth if depth >= 5: msttr.append(ld.msttr((clean_flt), window_length=100)) lexical_depth.append(depth) cliche_word_perc.append(cliche_perc) cliche_total_count.append(cliche_count) else: msttr.append(None) lexical_depth.append(None) cliche_word_perc.append(None) cliche_total_count.append(None) keywords.append( return_keywords(preprocess(clean_lyrics(lyrics)))) sent = sentiment_analyzer_scores(clean_lyrics(lyrics)) sent = round((sent + 1) / 2, 3) sent_score.append(sent) text_object = NRCLex(lyrics) affect_freq.append(text_object.affect_frequencies) song_lyrics.append(clean_lyrics(lyrics)) else: sent_score.append(None) song_lyrics.append(None) keywords.append(None) affect_freq.append(None) genius_url.append(None) genius_songid.append(None) msttr.append(None) lexical_depth.append(None) cliche_word_perc.append(None) cliche_total_count.append(None) except: sent_score.append(None) song_lyrics.append(None) keywords.append(None) affect_freq.append(None) # genius_url.append(None) # genius_songid.append(None) msttr.append(None) lexical_depth.append(None) cliche_word_perc.append(None) cliche_total_count.append(None) df['title'] = new_titles df["lyr_valence"] = sent_score df['mood'] = np.where(df['lyr_valence'].isnull(), df['valence'], round((df["lyr_valence"] + df["valence"]) / 2, 3)) df["mood_discrep"] = df["valence"] - df["lyr_valence"] df["lyrics"] = song_lyrics pos_neg(df, 'lyr_valence_des', 'lyr_valence') pos_neg(df, 'valence_des', 'valence') pos_neg(df, 'mood_des', 'mood') high_low(df, 'energy_des', 'energy') high_low(df, 'dance_des', 'danceability') df["artist"] = artist df["album_name"] = album_name df["release_date"] = release_date df["sp_id"] = df["id"] print(album_name) print(genius_songid) df["genius_songid"] = genius_songid df["url"] = genius_url df['keywords'] = keywords df['affect_freq'] = affect_freq df["lyr_valence"] = df["lyr_valence"].replace({np.nan: None}) df["mood_discrep"] = df["mood_discrep"].replace({np.nan: None}) df["lyr_valence_des"] = df["lyr_valence_des"].replace({'0': 'Not Found'}) df['msttr'] = msttr df['lexical_depth'] = lexical_depth df['cliche_word_perc'] = cliche_word_perc df['cliche_total_words'] = cliche_total_count df["lexical_depth"] = df["lexical_depth"].replace({np.nan: None}) df["msttr"] = df["msttr"].replace({np.nan: None}) df["cliche_word_perc"] = df["cliche_word_perc"].replace({np.nan: None}) df["cliche_total_words"] = df["cliche_total_words"].replace({np.nan: None}) df = df.rename(columns={"valence": "mus_valence"}) df = df.rename(columns={"external_urls.spotify": "external_urls_spotify"}) energy_z = abs(stats.zscore(df["energy"])) mood_z = abs(stats.zscore(df["mood"])) mus_valence_z = abs(stats.zscore(df["mus_valence"])) dance_z = abs(stats.zscore(df["danceability"])) duration_z = abs(stats.zscore(df["duration"])) loudness_z = abs(stats.zscore(df["loudness"])) if None in df["msttr"].values: df["uniqueness"] = (energy_z + dance_z + duration_z + loudness_z + mood_z) / 5 else: lex_diversity = abs(stats.zscore(df["msttr"])) lyr_valence_z = abs(stats.zscore(df["lyr_valence"])) df["uniqueness"] = (energy_z + dance_z + duration_z + loudness_z + lyr_valence_z + mus_valence_z + lex_diversity) / 7 df = df[[ "title", "energy", "mus_valence", "lyr_valence", "mood", "danceability", "loudness", "tempo", "key", "mode", "time_signature", "duration", "sp_id", "track", "lyrics", "speechiness", "acousticness", "instrumentalness", "liveness", "artist", "album_name", "disc_number", "explicit", "external_urls_spotify", "mood_discrep", "release_date", "uniqueness", "lyr_valence_des", "valence_des", "mood_des", "energy_des", "dance_des", "album_id", "url", "genius_songid", "keywords", "affect_freq", "metacritic", "msttr", "lexical_depth", "cliche_word_perc", "cliche_total_words" ]] df = df.to_dict('records') return df
#custom_tweet = "YES, i think yes think A bat cat loves think yes car car yes" wordsInFile = numpy.array([]) for line in file_tokens: line = line.replace("Mike Mazzei: ", "") custom_tokens = remove_noise(word_tokenize(line)) if (line[0].isdigit() == False and line.startswith("Jacob") == False and len(line.strip()) != 0): # This is positive/negative classifier #print(classifier.classify(dict([token, True] for token in custom_tokens)), " : ", line) wordsInFile = numpy.append(wordsInFile, custom_tokens) #Cheer up EMOLec text_object = NRCLex(line) print(line) print(text_object.raw_emotion_scores) print( multi_topic_scorer(line, topic_dictionary, sim_thresh=0.7, return_hits=True)) # print(custom_tokens) # with open ("/Users/thomaskennedy/downloads/interview1.txt", "r") as myfile: # custom = myfile.readlines() # print(wordsInFile) Counter = Counter(wordsInFile)
description=description.lower() description=nltk.word_tokenize(description) description=[word for word in description if not word in set(stopwords.words("english"))] lemma=nltk.WordNetLemmatizer() description=[lemma.lemmatize(word) for word in description] description=" ".join(description) description_list.append(description) pd_tweet["normalized_text_new"]=description_list pd_tweet.head(5) # In[56]: from nrclex import NRCLex text_object = NRCLex(' '.join(pd_tweet['normalized_text_new'])) # In[57]: text_object.affect_frequencies # In[58]: text_object.top_emotions # In[59]:
def lyrics_to_emotions(artistName, songName, songLyrics, trackID): if songLyrics == "": return [artistName, songName, trackID, None, None, None, None, None, None, None, None, None, None] #init emotion values positive = negative = anger = anticipation = disgust = fear = joy = sadness = surprise = trust = 0 #init counter for emotions positive_cnt = negative_cnt = anger_cnt = anticipation_cnt = disgust_cnt = fear_cnt = joy_cnt = sadness_cnt = surprise_cnt = trust_cnt = 0 if songLyrics == "": return [artistName, songName, anger, anticipation, disgust, fear, joy, sadness, surprise, trust] #clean and tokenize lyrics str #tokenization segments song lyrics into atomic elements raw = songLyrics.lower() tokens = tokenizer.tokenize(songLyrics) #create English stop words list en_stop = get_stop_words('en') #remove stop words from tokens #creates a list of tokens w/o stop words stopped_tokens = [i for i in tokens if not i in en_stop] print('Stopped') print(stopped_tokens) #lemmatize tokens wnl = WordNetLemmatizer() lem_tokens = [wnl.lemmatize(word) for word in stopped_tokens] #remove one-two letter tokens lem_tokens = [word for word in lem_tokens if len(word)>2] print("lem_tokens") print(lem_tokens) for item in lem_tokens: #for each tokenized word, evaluate emotions emotion = NRCLex(item) #extract emotions with non-zero values for j in range(len(emotion.top_emotions)): if emotion.top_emotions[j][1] != 0: print("word: ", item, emotion.top_emotions[j][0], "score: ", emotion.top_emotions[j][1] ) #update appropriate emotion with amount if emotion.top_emotions[j][0] == 'positive': #update positive positive += emotion.top_emotions[j][1] positive_cnt+=1 if positive_cnt > 0 : positive = positive/positive_cnt elif emotion.top_emotions[j][0] == 'negative': #update negative negative += emotion.top_emotions[j][1] negative_cnt+=1 if negative_cnt > 0 : negative = negative/negative_cnt elif emotion.top_emotions[j][0] == 'anger': #update anger anger += emotion.top_emotions[j][1] anger_cnt+=1 if anger_cnt > 0 : anger = anger/anger_cnt elif emotion.top_emotions[j][0] == 'anticipation': #update anticipation anticipation += emotion.top_emotions[j][1] anticipation_cnt+=1 if anticipation_cnt > 0: anticipation = anticipation/anticipation_cnt elif emotion.top_emotions[j][0] == 'disgust': #update disgust disgust += emotion.top_emotions[j][1] disgust_cnt+=1 if disgust_cnt > 0: disgust = disgust/disgust_cnt elif emotion.top_emotions[j][0] == 'fear': #update fear fear += emotion.top_emotions[j][1] fear_cnt+=1 if fear_cnt > 0: fear = fear/fear_cnt elif emotion.top_emotions[j][0] == 'joy': #update joy joy += emotion.top_emotions[j][1] joy_cnt+=1 if joy_cnt > 0: joy = joy/joy_cnt elif emotion.top_emotions[j][0] == 'sadness': #update sadness sadness += emotion.top_emotions[j][1] sadness_cnt+=1 if sadness_cnt > 0: sadness = sadness/sadness_cnt elif emotion.top_emotions[j][0] == 'surprise': #update surprise surprise += emotion.top_emotions[j][1] surprise_cnt+=1 if surprise_cnt > 0: surprise= surprise/surprise_cnt elif emotion.top_emotions[j][0] == 'trust': #update trust trust += emotion.top_emotions[j][1] trust_cnt+=1 if trust_cnt > 0: trust= trust/trust_cnt #ENDOF extracting emotions from songLyrics # print("positive : ", positive, "positive_cnt: ", positive_cnt) # print("negative : ", negative, "negative_cnt: ", negative_cnt) # print("anger : ", anger, "anger_cnt: ", anger_cnt) # print("anticipation : ", anticipation, "anticipation_cnt: ", anticipation_cnt) # print("disgust: ", disgust, "disgust_cnt: ", disgust_cnt) # print("fear: ", fear, "fear_cnt: ", fear_cnt) # print("joy: ", joy, "joy_cnt: ", joy_cnt) # print("sadness: ", sadness, "sadness_cnt: ", sadness_cnt) # print("surprise: ", surprise, "surprise_cnt: ", surprise_cnt) # print("trust: ", trust, "trust_cnt: ", trust_cnt) return [artistName, songName, trackID, positive, negative, anger, anticipation, disgust, fear, joy, sadness, surprise, trust]