Ejemplo n.º 1
0
def emotion_storyline():
    empty_synopsis = "It looks like we don't have a Synopsis for this title yet."
    file_list = get_dataset_list()
    for file in file_list:
        with open(file) as data_file:
            data = json.load(data_file)
        for item in data:
            id = item['id']
            with open('../synopsis/' + id + '.txt', 'r') as f:
                synopsis = f.read()
            if empty_synopsis in synopsis:
                script_txt = item['meta'].get('storyline')
                if script_txt is None:
                    script_txt = item.get('description')
            else:
                script_txt = synopsis
            if len(script_txt)<=0:
                continue
            text_object = NRCLex(script_txt)
            raw_emotion_scores = text_object.raw_emotion_scores
            emotions = raw_emotion_scores.items()
            s = sum(raw_emotion_scores.values())
            for key, value in emotions:
                item[key] = value/s
        with open(file, 'w') as result_file:
            json.dump(data, result_file)
    return
Ejemplo n.º 2
0
def get_sentiment_breakdown(string):
    text_object = NRCLex(string)
    raw_scores = text_object.raw_emotion_scores
    affect_dict = text_object.affect_dict
    emotion_total = 0
    sentiment_total = 0
    emotions = {}
    sentiments = {}

    for key, value in raw_scores.items():
        if (key != "positive" and key != "negative"):
            emotion_total += value
        else:
            sentiment_total += value

    for key, value in raw_scores.items():
        if (key != "positive" and key != "negative"):
            emotions[key] = value / emotion_total
        else:
            sentiments[key] = value / sentiment_total

    return {
        "emotions": emotions,
        "sentiments": sentiments,
        "raw_scores": raw_scores,
        "affect_dict": affect_dict
    }
Ejemplo n.º 3
0
def get_emotions(df):
    emotions = {
        'fear': [],
        'anger': [],
        'anticipation': [],
        'trust': [],
        'surprise': [],
        'positive': [],
        'negative': [],
        'sadness': [],
        'disgust': [],
        'joy': []
    }
    for index, row in df.iterrows():
        tweet = row['Text']
        text_object = NRCLex(tweet)
        length_sentence = len(tweet.split())
        absolute_numbers = text_object.raw_emotion_scores  #just a dictionary, similiar to one above
        for emot in emotions:
            try:
                val = absolute_numbers[emot] / length_sentence
            except:
                val = 0
            emotions[emot].append(val)
    for e in emotions:
        df[e] = emotions[e]
    df.to_csv(
        '/Users/gabriel/PycharmProjects/Finance/Data/4B-Ungrouped non-ML sentiment/FB_1d_no_ML.csv'
    )
    return True
Ejemplo n.º 4
0
def emotions(books, name):
    word = [word for word in books['Text'] if word not in stop_words_file]
    word = str([cell.encode('utf-8') for cell in word])
    emotions = NRCLex(word)
    emotions = emotions.raw_emotion_scores
    emotions = pd.DataFrame(emotions, index=[0])
    emotions = pd.melt(emotions)
    emotions.columns = ('Emotions', 'Count')
    emotions = emotions.sort_values('Count')

    #plot emotions
    plt.figure(figsize=(12, 6))
    plt.title(('{} and its emotional effects').format(name))
    sns.set_style('dark')
    sns.set_context(context='notebook', font_scale=1.5)
    sns.barplot(x='Emotions', y='Count', data=emotions[0:8], palette='viridis')
Ejemplo n.º 5
0
def emotions(script) :
    totalScript = script[0] + script[1]
    text_object = NRCLex(totalScript);
    #Return words list.

    text_object.words

    #Return sentences list.

    text_object.sentences

    #Return affect list.

    text_object.affect_list

    #Return affect dictionary.

    text_object.affect_dict

    #Return raw emotional counts. This one gives the actual frequency
    #of each emotion

    print(text_object.raw_emotion_scores)

    #Return highest emotions.

    print(text_object.top_emotions)

    print(text_object.affect_frequencies)

    frequencies = text_object.affect_frequencies

    SUM = list(frequencies.values())

    return SUM
Ejemplo n.º 6
0
def get_nrc_sentiments(df):
    df['fear'] = 0
    df['anger'] = 0
    df['anticip'] = 0
    df['trust'] = 0
    df['surprise'] = 0
    df['positive'] = 0
    df['negative'] = 0
    df['sadness'] = 0
    df['disgust'] = 0
    df['joy'] = 0
    df['text'] = df['text'].fillna("")
    df.reset_index(drop=True, inplace=True)
    for j in tqdm(range(df.shape[0])):
        d = NRCLex(df.loc[j, 'text']).affect_frequencies
        df.loc[j, 'fear'] = d['fear']
        df.loc[j, 'anger'] = d['anger']
        df.loc[j, 'anticip'] = d['anticip']
        df.loc[j, 'trust'] = d['trust']
        df.loc[j, 'surprise'] = d['surprise']
        df.loc[j, 'positive'] = d['positive']
        df.loc[j, 'negative'] = d['negative']
        df.loc[j, 'sadness'] = d['sadness']
        df.loc[j, 'disgust'] = d['disgust']
        df.loc[j, 'joy'] = d['joy']
    return df
Ejemplo n.º 7
0
def get_emotion_dict():
    FILES = {
        "Hans": Hans.words,
        "Fletcher": Fletcher.words,
        "Plankton": Plankton.words,
        "Snowball": Snowball.words,
        "HarleyQuinn": HarleyQuinn.words,
        "Jigsaw": Jigsaw.words,
        "Joker": Joker.words,
        "Vader": Vader.words,
        "Thanos": Thanos.words,
        "HannibalLecter": HannibalLecter.words,
        "JimMoriarty": JimMoriarty.words,
        "Scar": Scar.words
    }
    emotion_dict = {}

    for fn, words in FILES.items():
        word_dict = get_vader_score(words)
        polar_list = get_polar(word_dict)
        emotion_list = []

        for word in polar_list:
            emotion = NRCLex(word)
            emo = emotion.affect_dict
            if emo:
                emotion_list.append(emo)
        
        emotion_dict[fn] = emotion_list

    return emotion_dict
Ejemplo n.º 8
0
def get_score(text, search_for, exclude):
    for exclude_word in exclude:
        if re.search("\\b" + exclude_word + "\\b", text):
            return None
    for include_word in search_for:
        if re.search("\\b" + include_word + "\\b", text):
            return NRCLex(text).affect_frequencies['anger']
    return None
Ejemplo n.º 9
0
def classify_emotion(data):
    emotion_list = []

    for word in data:
        emotion = NRCLex(word)
        top_emo = emotion.top_emotions
        if top_emo[0][1] != 0.0:
            emotion_list.append(top_emo)

    return emotion_list
Ejemplo n.º 10
0
def main():
    #lines = f.readlines()
    lines = ['hello', 'i\'m happy', 'i enjoy watching netflix']

    for line in lines:
        l = preprocessing(line)
        score.append(NRCLex(
            l).raw_emotion_scores)  #[('anticipation', 0.3333333333333333)]
    print(score)
    return score
def __getEmotions(text):
    """
    Get emotion affect frequencies for the input text. The values returned are continuous numbers between 0 & 1.
    Values are returned for the following emotions: fear, anger, trust, surprise, sadness, disgust, joy, anticipation

    :param text: the input text
    :return: a dict with a continuous value between 0-1 for each emotion
    """
    textObject = NRCLex(text)
    emtionsDict = textObject.affect_frequencies
    if 'anticipation' not in emtionsDict:
        emtionsDict['anticipation'] = 0.0
    [emtionsDict.pop(key) for key in ['positive', 'negative', 'anticip']]
    return emtionsDict
Ejemplo n.º 12
0
def get_features(song):
    """use NRCLex to get emotional scores @ the song level 
	"""
    features = {
        'anger': 0,
        'anticipation': 1,
        'disgust': 2,
        'fear': 3,
        'joy': 4,
        'sadness': 5,
        'surprise': 6,
        'trust': 7,
        'negative': 8,
        'positive': 9
    }

    emotion_d = NRCLex(song).raw_emotion_scores
    vec = np.zeros(10)

    for k, v in emotion_d.items():
        vec[features[k]] = (v / len(song.split()))

    return vec
Ejemplo n.º 13
0
def get_emotion_scores(text):

    """
    Input
    ----------
    text(string)

    Returns
    ----------
    emotions_dictionary (list): Two items in the list: 1)  a dictionary of scores 
                                for each emotions and 2) a dictionary of emotions
                                and words from the text associated with the emotion
    """

    # preprocess the text 
    text = lowercase(text) 
    text = strip_accents(text)
    text = strip_punctuation(text)
    text = strip_url(text) 

    # instantiate the text object 
    text_object = NRCLex(text)

    # get scores for: fear, anger, trust, surprise, sadness, disgust, joy, anticipation 
    emotion_scores_dictionary = text_object.affect_frequencies
    keys = ['fear', 'anger', 'trust', 'surprise', 'sadness', 'disgust', 'joy', 'anticipation']
    filtered_emotion_scores_dictionary = dict((k, round(emotion_scores_dictionary[k], 4)) for k in keys if k in emotion_scores_dictionary)

    # get a dictionary of the words corresponding to each emotion
    words_emotions_dictionary = text_object.affect_dict

    # inverse the keys and values in the dictionary so the keys are emotions and values are the words
    emotions_words_dictionary = {}
    for k,v in words_emotions_dictionary.items():
        for x in v:
            emotions_words_dictionary.setdefault(x,[]).append(k)

    # get emotion/word association for: fear, anger, trust, surprise, sadness, disgust, joy, anticipation
    filtered_emotions_words_dictionary = dict((k, emotions_words_dictionary[k]) for k in keys if k in emotions_words_dictionary)

    emotions_output = [filtered_emotion_scores_dictionary, filtered_emotions_words_dictionary]

    return emotions_output
Ejemplo n.º 14
0
def multiCategorySentiment(text):
    text_object = NRCLex(text)
    # countDict = dict({
    #     'fear': 0,
    #     'sadness': 0,
    #     'negative': 0,
    #     'disgust': 0,
    #     'anticip':0,
    #     'joy': 0,
    #     'trust': 0,
    #     'positive': 0,
    #     'surprise': 0,
    #     'anger': 0
    #     })
    # countDict.update(text_object.raw_emotion_scores)
    # total = sum(countDict.values())
    # if total == 0:
    #     return countDict
    # norm = {k: v/total for k, v in countDict.items()}
    return text_object.affect_frequencies  #norm
 def find_main_emotion(text):
     emotion = NRCLex(text)
     if len(emotion.top_emotions) == 1:
         s =  emotion.top_emotions[0][0]
         listToStr=''.join(map(str, s))
         return listToStr
     elif len(emotion.top_emotions) == 2:
         s = emotion.top_emotions[0][0] + ' ' + emotion.top_emotions[1][0]
         listToStr=''.join(map(str, s))
         return listToStr
     else:
         max = emotion.top_emotions[0][1]
         max_str = emotion.top_emotions[0][0]
         listToStr=''.join(map(str, max_str))
         for i in range(len(text)):
             if emotion.top_emotions[i+1][i+1] > max:
                 max = emotion.top_emotions[i+1][i+1]
                 max_str = emotion.top_emotions[i+1][i+1]
                 listToStr = ''.join(map(str, max_str))
             return listToStr
Ejemplo n.º 16
0
 def populateEmotions(self, script):
     totalScript = script[0] + script[1]
     text_object = NRCLex(totalScript)
     #emotion_scores = text_object.raw_emotion_scores
     frequency_of_emotions = list(text_object.affect_frequencies.values())
     self.fear.append(frequency_of_emotions[0])
     self.anger.append(frequency_of_emotions[1])
     self.anticipation.append(frequency_of_emotions[2])
     self.trust.append(frequency_of_emotions[3])
     self.surprise.append(frequency_of_emotions[4])
     self.positive.append(frequency_of_emotions[5])
     self.negative.append(frequency_of_emotions[6])
     self.sadness.append(frequency_of_emotions[7])
     self.disgust.append(frequency_of_emotions[8])
     self.joy.append(frequency_of_emotions[9])
     # getting polarity and subjectivity
     blob_object = TextBlob(totalScript)
     sentiments = list(blob_object.sentiment)
     self.polarity.append(sentiments[0])
     self.subjectivity.append(sentiments[1])
    # Generate emotions per comments and store their result
    overall_scores = {
        'fear': 0.0,
        'anger': 0.0,
        'anticip': 0.0,
        'anticipation': 0.0,
        'trust': 0.0,
        'surprise': 0.0,
        'positive': 0.0,
        'negative': 0.0,
        'sadness': 0.0,
        'disgust': 0.0,
        'joy': 0.0
    }
    for comment in comments_per_event[event]:
        comment_emotion = NRCLex(comment)
        for emotion in comment_emotion.affect_frequencies:
            overall_scores[emotion] += comment_emotion.affect_frequencies[
                emotion]
    #print(overall_scores)

    #Sum positive and negative emotions
    emotion_types = {
        'positive': ['anticipation', 'trust', 'surprise', 'positive', 'joy'],
        'negative': ['fear', 'anger', 'negative', 'sadness', 'disgust']
    }
    emotion_scores = {'positive': 0.0, 'negative': 0.0}

    for emotion_type in emotion_types:
        emotions = emotion_types[emotion_type]
        for emotion in emotions:
Ejemplo n.º 18
0
def get_sentiment_breakdown(string):
    text_object = NRCLex(string)
    frequencies = text_object.affect_frequencies
    return frequencies
def app():
    st.balloons()
    
    st.markdown("# Visualizations :art:")

    menu = ["Number of Tweets per Day", "Number of Retweets per Day", 
    "Number of Likes per Day", "Most Common Tweets", "Sentiment Scores", "Common Entities"]
    choice = st.selectbox("View", menu)

    if choice == "Number of Retweets per Day":
        fig1 = px.histogram(covid_data, x="datetime", color="retweets", title="Number of Retweets Per Day")
        st.write(fig1)


    elif choice == "Number of Likes per Day":
        fig2 = px.histogram(covid_data, x="datetime", color="likes" ,title="Likes Per Day")
        st.write(fig2)


    elif choice == "Most Common Tweets":
        st.write("Word Cloud for Most Common Tweets")
        stop_words = get_stop_words('english')
        concat_quotes = ' '.join(
            [i for i in covid_data.text_without_stopwords.astype(str)])
        #print(concat_quotes[:10])
        stylecloud.gen_stylecloud(  text=concat_quotes,
                                    icon_name='fab fa-twitter',
                                    palette='cartocolors.qualitative.Bold_9',
                                    background_color='white',
                                    output_name='tweets.png',
                                    collocations=False,
                                    custom_stopwords=stop_words )

        #Displaying image from a file
        Image(filename="tweets.png", width=780, height=780)
        st.image("tweets.png")

        #Display the most common words after stemming
        #
        #Create separate columns
        table_col, input_col = st.beta_columns([3,2])

        covid_data['text_stem'] = covid_data['text_stem'].apply(lambda x:str(x).split()) #Use tokenize or split, smae results
        top = Counter([item for sublist in covid_data['text_stem'] for item in sublist]) #Counts the frequency of words

        with input_col:
            top_n = st.slider("How many of the common words do you want to see?", 0, 5, 10)
            temp = pd.DataFrame(top.most_common(top_n))
            temp.columns = ['common_words', 'count']
            #temp = temp.reset_index()

        with table_col:
            fig = px.pie(temp, values='count', names='common_words',
                         title='Top Common Words',
                         hover_data=['common_words'], color_discrete_sequence=px.colors.qualitative.G10)
            fig.update_layout(showlegend=False, width=450, height=450)
            st.write(fig)
            # colorscale = [[0, '#4d004c'], [.5, '#f2e5ff'], [1, '#ffffff']]
            # fig = ff.create_table(temp, height_constant=15, colorscale=colorscale)
            # st.write(fig)
        #st.write(temp.style.background_gradient(cmap = 'Blues'))
        
    elif choice == "Sentiment Scores":
        pie_col, input_col = st.beta_columns([3,2])
       #Convert the text_stem column to string type. nrclext only takes input of type str
        covid_data['text_stem'] = covid_data['text_stem'].astype(str)
        #Create a text object
        text_object = NRCLex(' '.join(covid_data['text_stem']))

        #Create a list from the  text object
        sentiment_scores = pd.DataFrame(list(text_object.raw_emotion_scores.items())) 
        #Create a dataframe of two columns
        sentiment_scores = sentiment_scores.rename(columns={0: "Sentiment", 1: "Count"})
        with input_col:
            num_n = st.slider("Change Pie Chart Values Here", 0, 5, 10)
            sentiment_scores = sentiment_scores.head(num_n)

            btn = st.button("Show Table")
            colorscale = [[0, '#272D31'], [.5, '#ffffff'], [1, '#ffffff']]
            font=['#FCFCFC', '#00EE00', '#008B00']
            if btn:
                fig =  ff.create_table(sentiment_scores, colorscale=colorscale,
                font_colors=font)
                st.write(fig)
        
        with pie_col:
            fig = px.pie(sentiment_scores, values='Count', names='Sentiment',
            title='Top Emotional Affects',
            hover_data=['Sentiment'], color_discrete_sequence=px.colors.qualitative.Dark24)
            fig.update_traces(textposition='inside', textinfo='percent+label')

            fig.update_layout(showlegend = False, width = 450, height = 450,
            font=dict(color='#383635', size=15)
            )

            st.write(fig)

        #Create a dataframe with a dictionary of the sentiments
        st.title("Table Showing Words & Sentiments")

        sentiment_words = pd.DataFrame(list(text_object.affect_dict.items()),columns = ['words','sentiments'])

        num_o = st.slider("Change table size", 0, 50, 100)
        sentiment_words = sentiment_words.head(num_o)
        
        fig = go.Figure(data=[go.Table(columnwidth=[1, 2], header=dict(values=
        list(sentiment_words[['words', 'sentiments']].columns),
            fill_color='maroon',
        align=['left', 'center'], height=40, font=dict(color='white', size=18)),

        cells=dict(values=[sentiment_words.words, sentiment_words.sentiments],
        fill_color='lightseagreen',
        align='left'))
        ])

        fig.update_layout(margin=dict(l=5, r=5, b=10, t=10))
        st.write(fig)

    elif choice == "Common Entities":
        st.write("Word Cloud for Most Common Entities")

        # remove duplicate claims (Not really needed since dropped already)
        words = covid_data.text_stem.unique()
        # NER list we'll use - Perhaps could be expanded?
        nlp = en_core_web_sm.load()
        #nlp = spacy.load(en_core_web_sm)
        corpus = list(nlp.pipe(words[:700]))
        all_ents = defaultdict(int)
        for i, doc in enumerate(corpus):
            #print(i,doc)
            for ent in doc.ents:
                all_ents[str(ent)] += 1
        sorted_ents = pd.DataFrame(sorted(all_ents.items(), key=operator.itemgetter(1), reverse=True),columns = ['entities','count'])

        stop_words = get_stop_words('english')
        hashtags = sorted_ents['entities'].dropna().tolist()
        unique_entities=(" ").join(hashtags)
        # concat_quotes = ' '.join(
        #     [i for i in sorted_ents.entities.astype(str)])
        # #print(concat_quotes[:10])
        stylecloud.gen_stylecloud(  text=unique_entities,
                                    #file_path='concat_quotes',
                                    icon_name='fas fa-comments',
                                    palette='cartocolors.qualitative.Prism_8',
                                    background_color='white',
                                    output_name='entities.png',
                                    collocations=False,
                                    custom_stopwords=stop_words )
        
        #Displaying image from a file
        Image(filename="entities.png", width=780, height=780)
        st.image("entities.png")

    else:
        fig3 = px.histogram(covid_data, x="datetime", title="Number of Tweets Per Day")
        st.write(fig3)
Ejemplo n.º 20
0
def QueryTwitter(search_string):

	#Fetching the Configuration Settings
	key = "CRn9TI5ITd3OLA548dzNfzRq2"
	secret = "A129CIOeLb6Z8FxXO1aFWOWOyERfHvnEN8oD5uxg5ED6nXfRBF"
	access_token = "1181121350183706626-5b5mWWAvrh9DLJGEIt93ht7KCjeWdg"
	access_secret = "vMI1tZNrHWjWPDgver7U6Oj9Fo4C5rKOLxdiTR4ddhtxT"

	#Authenticating ::
	#Receiving Access Tokens
	auth = tweepy.OAuthHandler(consumer_key=key,consumer_secret=secret)
	auth.set_access_token(access_token, access_secret)

	#Instantiating the API with our Access Token
	api = tweepy.API(auth)

	tweet_list = []
	for tweet in limit_handled(tweepy.Cursor(api.search,q=search_string).items(50)):
		tweet_list.append(tweet)

	#We now extract details from the tweet and get the resultant DataFrame
	tweet_Data = filter_tweets(tweet_list)
#spcae for additional code
    #Here we can see that at many places we have '@names', which is of no use, since it don't have any meaning, So needs to be removed.
	def remove_pattern(text, pattern_regex):
		r = re.findall(pattern_regex, text)
		for i in r:
			text = re.sub(i, '', text)
		return text 
    # We are keeping cleaned tweets in a new column called 'tidy_tweets'
	tweet_Data['tidy_tweets'] = np.vectorize(remove_pattern)(tweet_Data['translate'], "@[\w]*: | *RT*")
	cleaned_tweets = []

	for index, row in tweet_Data.iterrows():
    # Here we are filtering out all the words that contains link
		words_without_links = [word for word in row.tidy_tweets.split() if 'http' not in word]
		cleaned_tweets.append(' '.join(words_without_links))
	tweet_Data['tidy_tweets'] = cleaned_tweets

	tweet_Data['absolute_tidy_tweets'] = tweet_Data['tidy_tweets'].str.replace("[^a-zA-Z# ]", "")
      
	stopwords_set = set(stopwords.words('english'))
	cleaned_tweets = []

	for index, row in tweet_Data.iterrows():
    
        # filerting out all the stopwords 
		words_without_stopwords = [word for word in row.absolute_tidy_tweets.split() if not word in stopwords_set and '#' not in word.lower()]
    
        # finally creating tweets list of tuples containing stopwords(list) and sentimentType 
		cleaned_tweets.append(' '.join(words_without_stopwords))
    
	tweet_Data['absolute_tidy_tweets'] = cleaned_tweets
	tokenized_tweet = tweet_Data['absolute_tidy_tweets'].apply(lambda x: x.split())
	for i, tokens in enumerate(tokenized_tweet):
		tokenized_tweet[i] = ' '.join(tokens)

	tweet_Data['absolute_tidy_tweets'] = tokenized_tweet
    
    
	def generate_wordcloud_1(all_words):
		wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=100, relative_scaling=0.5, colormap='Dark2').generate(all_words)

		plt.figure(figsize=(14, 10))
		plt.imshow(wordcloud, interpolation="bilinear")
		plt.axis('off')
		#plt.title("POSITIVE WORD CLOUD")
		#plt.show()
		plt.savefig('C:\\Users\\Admin\\Desktop\\twitter\\static\\poswc.png')

	def generate_wordcloud_2(all_words):
		wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=100, relative_scaling=0.5, colormap='Dark2').generate(all_words)

		plt.figure(figsize=(14, 10))
		plt.imshow(wordcloud, interpolation="bilinear")
		plt.axis('off')
		#plt.title("NEGATIVE WORD CLOUD")
		#plt.show()
		plt.savefig('C:\\Users\\Admin\\Desktop\\twitter\\static\\negwc.png')
        
	all_words = ' '.join([text for text in tweet_Data['absolute_tidy_tweets'][tweet_Data.sentiments_group == 'positive']])
	generate_wordcloud_1(all_words)
	all_words = ' '.join([text for text in tweet_Data['absolute_tidy_tweets'][tweet_Data.sentiments_group == 'negative']])
	generate_wordcloud_2(all_words)
    
    # function to collect hashtags
	def hashtag_extract(text_list):
		hashtags = []
		# Loop over the words in the tweet
		for text in text_list:
			ht = re.findall(r"#(\w+)", text)
			hashtags.append(ht)

		return hashtags

	def generate_hashtag_freqdist(hashtags):
		a = nltk.FreqDist(hashtags)
		d = pd.DataFrame({'Hashtag': list(a.keys()),
                      'Count': list(a.values())})
		# selecting top 15 most frequent hashtags     
		d = d.nlargest(columns="Count", n = 25)
		plt.figure(figsize=(16,7))
		ax = sns.barplot(data=d, x= "Hashtag", y = "Count")
		plt.xticks(rotation=80)
		ax.set(ylabel = 'Count')
    #plt.show()
		plt.savefig('C:\\Users\\Admin\\Desktop\\twitter\\static\\hashtag.png')
        
	hashtags = hashtag_extract(tweet_Data['tidy_tweets'])
	hashtags = sum(hashtags, [])
	generate_hashtag_freqdist(hashtags)
    
	from nrclex import NRCLex
	words = ' '.join([text for text in tweet_Data['absolute_tidy_tweets']])
	text_object = NRCLex(words)
	nrc=(text_object.affect_frequencies)
	e = pd.DataFrame({'Emotion': list(nrc.keys()),
                      'Frequency': list(nrc.values())})
	plt.figure(figsize=(16,7))
	ax = sns.barplot(data=e, x= "Emotion", y = "Frequency")
	plt.xticks(rotation=80)
	ax.set(ylabel = 'Frequency')
    #plt.show()
	plt.savefig('C:\\Users\\Admin\\Desktop\\twitter\\static\\nrc_lexicon.png')    


	(doughnut,sentiment_map,sources_plot,sentiment_pie,retweet_table) = make_maps(tweet_Data)
	#return tweet_Data
	return (doughnut,sentiment_map,sources_plot,sentiment_pie,retweet_table)
def emotion_plotter(text):
    # text = "Astronaut science is best most perfect great thing"
    print(text)

    # ---EMOTION ANALYSIS---
    # Tokenize text
    text_tokens = []
    text_tokens.append(regexp_tokenize(text.lower(), "[\w']+")) 
    # print(text_tokens[0])

    # Remove stop words and assign part of speech
    filtered_tokens = []
    for token in text_tokens[0]:
        if token not in stopwords.words('english'):
            filtered_tokens.append(pos_tag(word_tokenize(token)))
    # print(filtered_tokens)

    # Lemmatize words (identify base words from other forms of the word)
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = []
    morphy_tag = {'NN':'n', 'JJ':'a', 'VB':'v', 'RB':'r'}
    for token in filtered_tokens:
        for word, tag in token:
            if tag in morphy_tag.keys():
                morphy_pos = morphy_tag[tag]
            else:
                morphy_pos = ''
            if morphy_pos in ["a", "n", "v"]:
                lemmatized_tokens.append(lemmatizer.lemmatize(word, pos=morphy_pos))
            else:
                lemmatized_tokens.append(lemmatizer.lemmatize(word))
    # print(lemmatized_tokens)
        
    # Join lemmatized words back into sentence
    lemmatized_text = " ".join(lemmatized_tokens)
    print(lemmatized_text)

    # Get emotions
    text_object = NRCLex(lemmatized_text)
    # print(text_object.words)
    # print(text_object.affect_dict)
    # print(text_object.raw_emotion_scores)
    # print(text_object.affect_frequencies)

    # Create emotion data for plot
    emotion_data = {"words": [], "emotions": []}
    for word, emotions in text_object.affect_dict.items():
        for emotion in emotions:
            emotion_data["words"].append(word)
            emotion_data["emotions"].append(emotion.title())

    print(emotion_data)

    emotion_trace = {
        "x": emotion_data["words"],
        "y": emotion_data["emotions"],
        "mode": "markers",
        "marker": {"size": 40, "color": "midnightblue"}
    }

    emotion_plot_data = [emotion_trace]

    emotion_plot_layout = {
        "title": {"text": "Emotions Detected in Your Headline"},
        "xaxis": {
            "type": "category",
            "title": "Your Words",
        },
        "yaxis": {
            "type": "category",
            "categoryorder": "array",
            "categoryarray": [
                "Disgust", 
                "Anger", 
                "Fear", 
                "Sadness", 
                "Negative",
                "Anticipation",
                "Positive",
                "Surprise",
                "Trust",
                "Joy",
            ]
        }
    }

    return emotion_plot_data, emotion_plot_layout
                        content_type='audio/mp3',
                        model='en-US_NarrowbandModel',
                        continious=True).get_result()
#print(res) #transcript

########################################################################

#write transcript to .txt file

res_string = str(res)
print('text --->', res_string[81:-26])
string = res_string[81:-26]

#############################################################

# Emotions of text

from nrclex import NRCLex

#text_file = open('sad.txt')
#text = text_file.read()
text = string

text_object = NRCLex(text)

#print(dir('text_object'))

print(text_object.raw_emotion_scores)
print(text_object.top_emotions)
print(text_object.affect_frequencies)
Ejemplo n.º 23
0
    ]))

all_song_lyrics['lemma_count'] = all_song_lyrics['lyrics_lemmatized'].apply(
    lambda x: len(x.split(' ')))
all_song_lyrics['unique_lemmas_on_song'] = all_song_lyrics[
    'lyrics_lemmatized'].apply(lambda x: len(set(x.split(' '))))

# + [markdown] code_folding=[]
# ### NRC Lexicon
# -

from nrclex import NRCLex

#Instantiate text object (for best results, 'text' should be unicode).
all_song_lyrics.loc[:, 'nrc_emotions'] = all_song_lyrics['lyrics_clean'].apply(
    lambda x: NRCLex(x).raw_emotion_scores)
all_song_lyrics.loc[:, 'nrc_emotions_total_words'] = all_song_lyrics[
    'lyrics_clean'].apply(lambda x: len(NRCLex(x).words))

all_song_lyrics.head()

# Save dataset until here
all_song_lyrics.to_csv('./all_song_lyrics_info.csv', index=False)
#flair_sentiment_df.head()

# ### Sentiments

classifier = TextClassifier.load('sentiment')

all_song_lyrics.head()
#all_song_lyrics.index.tolist()
Ejemplo n.º 24
0
    if review:
        # st.markdown('## Name Entity Recognition (NER)')
        docx = nlp(review)
        spacy_streamlit.visualize_ner(docx,
                                      show_table=False,
                                      labels=nlp.get_pipe('ner').labels)

        col1, _, col2, _, col3 = st.beta_columns([8, 1, 6, 1, 6])
        col1.markdown('### Processes text')
        review_p = process_review(review)
        col1.markdown(review_p)

        ## Emotional Sentiment
        col2.markdown('### Emotions')
        col2.table(pd.Series(NRCLex(review).raw_emotion_scores))

        col3.markdown('### Sentiment')
        blob = TextBlob(review)
        col3.markdown('**Note:** Positive = 1 & Negative = 0')
        col3.markdown(f'Polarity : {blob.sentiment.polarity:0.2f}')
        with col3.beta_expander('Looks like a wrong prediction?'):
            correct = st.selectbox('Select the correct label',
                                   ('None', 'Positive', 'Neutral', 'Negative'))
            if correct != 'None':
                st.balloons()

elif tab == 'Aggregated Stats':
    st.markdown('## Aggregated Stats')
    df = load_data()
            'fear': 0.0,
            'anger': 0.0,
            'anticip': 0.0,
            'anticipation': 0.0,
            'trust': 0.0,
            'surprise': 0.0,
            'positive': 0.0,
            'negative': 0.0,
            'sadness': 0.0,
            'disgust': 0.0,
            'joy': 0.0
        }
        # Use index to find corresponding uncleand comment
        for commend_id in ordered_documents[event][topic]:
            comment = comments_per_event_uncleaned[event][commend_id]
            comment_emotion = NRCLex(comment)
            for emotion in comment_emotion.affect_frequencies:
                overall_scores[emotion] += comment_emotion.affect_frequencies[
                    emotion]

        #Sum positive and negative emotions
        emotion_types = {
            'positive':
            ['anticipation', 'trust', 'surprise', 'positive', 'joy'],
            'negative': ['fear', 'anger', 'negative', 'sadness', 'disgust']
        }
        emotion_scores = {'positive': 0.0, 'negative': 0.0}

        for emotion_type in emotion_types:
            emotions = emotion_types[emotion_type]
            for emotion in emotions:
Ejemplo n.º 26
0
def _process_fragment(fragment, xhtml_dict):
    for key, val in list(fragment.items()):
        fragment[key]['text'] = xhtml_dict[key]
        fragment[key]['emotion'] = NRCLex(xhtml_dict[key]).affect_frequencies

    return fragment
Ejemplo n.º 27
0
def analyze_album(album_id):
    tracks = []
    track_ids = []
    results = sp.album_tracks(album_id)
    tracks.extend(results['items'])
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    for track in tracks:
        track_ids.append(track['id'])
    analysis_json = sp.audio_features(tracks=track_ids)
    analysis_json = list(filter(None, analysis_json))
    tracks_json = sp.album_tracks(album_id)["items"]
    tracks_json = list(filter(None, tracks_json))
    analysis_df = json_normalize(analysis_json)
    tracks_df = json_normalize(tracks_json)
    df = analysis_df.merge(tracks_df, on='id', how='inner')
    album_name = sp.album(album_id)["name"]
    album_name = clean_lyrics(album_name)
    release_date = sp.album(album_id)["release_date"]

    artist = json_normalize(
        sp.album_tracks(album_id)["items"][0]["artists"])["name"][0]

    keys = {
        0: 'C',
        1: 'C#',
        2: 'D',
        3: 'D#',
        4: 'E',
        5: 'F',
        6: 'F#',
        7: 'G',
        8: 'G#',
        9: 'A',
        10: 'A#',
        11: 'B'
    }

    df["key"] = df['key'].map(keys, na_action='ignore')

    mode = {0: 'Minor', 1: 'Major'}

    df["mode"] = df['mode'].map(mode, na_action='ignore')

    df["duration"] = (df["duration_ms_x"] / (1000 * 60)) % 60

    df['track'] = df['track_number']
    df = df.loc[df["disc_number"] == 1]
    df = df.set_index('track_number')
    df["album_id"] = album_id

    sent_score = []
    song_lyrics = []
    new_titles = []
    genius_url = []
    genius_songid = []
    keywords = []
    affect_freq = []
    msttr = []
    lexical_depth = []
    cliche_word_perc = []
    cliche_total_count = []
    df["metacritic"] = search_metacritic(artist, album_name)

    for title in df["name"]:
        try:
            title = title.split("- Remaster", 1)[0]
            title = title.split("[Remaster", 1)[0]
            title = title.split("(Remaster", 1)[0]
            title = title.split("- Mono", 1)[0]
            title = title.split("(Mono", 1)[0]
            title = title.split("[Mono", 1)[0]
            title = title.split("(with", 1)[0]
            title = title.split("[with", 1)[0]
            title = title.split("(featuring", 1)[0]
            title = title.split("- featuring", 1)[0]
            title = title.split("[featuring", 1)[0]
            new_titles.append(title)
            remote_song_info = request_song_info(title, artist)
            matching_artist = remote_song_info['result']['primary_artist'][
                'name']
            matching_artist = matching_artist.lower()
            ratio = levenshtein_ratio_and_distance(artist.lower(),
                                                   matching_artist,
                                                   ratio_calc=True)
            if ratio > .6:
                url = remote_song_info['result']['url']
                genius_url.append(url)
                genius_songid.append(str(remote_song_info['result']['id']))
                lyrics = get_lyrics(url)
                flt = ld.flemmatize(clean_lyrics(lyrics))
                clean_flt = [x for x in flt if x.lower() not in excluded_words]
                spacy_stopwords = list(spacy.lang.en.stop_words.STOP_WORDS)
                depth = sum(
                    [1 for x in clean_flt if x.lower() not in spacy_stopwords])
                cliche_count = sum(
                    [1 for x in clean_flt if x.lower() in cliche_words])
                cliche_perc = cliche_count / depth
                if depth >= 5:
                    msttr.append(ld.msttr((clean_flt), window_length=100))
                    lexical_depth.append(depth)
                    cliche_word_perc.append(cliche_perc)
                    cliche_total_count.append(cliche_count)
                else:
                    msttr.append(None)
                    lexical_depth.append(None)
                    cliche_word_perc.append(None)
                    cliche_total_count.append(None)
                keywords.append(
                    return_keywords(preprocess(clean_lyrics(lyrics))))
                sent = sentiment_analyzer_scores(clean_lyrics(lyrics))
                sent = round((sent + 1) / 2, 3)
                sent_score.append(sent)
                text_object = NRCLex(lyrics)
                affect_freq.append(text_object.affect_frequencies)
                song_lyrics.append(clean_lyrics(lyrics))
            else:
                sent_score.append(None)
                song_lyrics.append(None)
                keywords.append(None)
                affect_freq.append(None)
                genius_url.append(None)
                genius_songid.append(None)
                msttr.append(None)
                lexical_depth.append(None)
                cliche_word_perc.append(None)
                cliche_total_count.append(None)
        except:
            sent_score.append(None)
            song_lyrics.append(None)
            keywords.append(None)
            affect_freq.append(None)
            # genius_url.append(None)
            # genius_songid.append(None)
            msttr.append(None)
            lexical_depth.append(None)
            cliche_word_perc.append(None)
            cliche_total_count.append(None)

    df['title'] = new_titles
    df["lyr_valence"] = sent_score
    df['mood'] = np.where(df['lyr_valence'].isnull(), df['valence'],
                          round((df["lyr_valence"] + df["valence"]) / 2, 3))
    df["mood_discrep"] = df["valence"] - df["lyr_valence"]
    df["lyrics"] = song_lyrics
    pos_neg(df, 'lyr_valence_des', 'lyr_valence')
    pos_neg(df, 'valence_des', 'valence')
    pos_neg(df, 'mood_des', 'mood')
    high_low(df, 'energy_des', 'energy')
    high_low(df, 'dance_des', 'danceability')
    df["artist"] = artist
    df["album_name"] = album_name
    df["release_date"] = release_date
    df["sp_id"] = df["id"]
    print(album_name)
    print(genius_songid)
    df["genius_songid"] = genius_songid
    df["url"] = genius_url
    df['keywords'] = keywords
    df['affect_freq'] = affect_freq
    df["lyr_valence"] = df["lyr_valence"].replace({np.nan: None})
    df["mood_discrep"] = df["mood_discrep"].replace({np.nan: None})
    df["lyr_valence_des"] = df["lyr_valence_des"].replace({'0': 'Not Found'})
    df['msttr'] = msttr
    df['lexical_depth'] = lexical_depth
    df['cliche_word_perc'] = cliche_word_perc
    df['cliche_total_words'] = cliche_total_count
    df["lexical_depth"] = df["lexical_depth"].replace({np.nan: None})
    df["msttr"] = df["msttr"].replace({np.nan: None})
    df["cliche_word_perc"] = df["cliche_word_perc"].replace({np.nan: None})
    df["cliche_total_words"] = df["cliche_total_words"].replace({np.nan: None})

    df = df.rename(columns={"valence": "mus_valence"})
    df = df.rename(columns={"external_urls.spotify": "external_urls_spotify"})

    energy_z = abs(stats.zscore(df["energy"]))
    mood_z = abs(stats.zscore(df["mood"]))
    mus_valence_z = abs(stats.zscore(df["mus_valence"]))
    dance_z = abs(stats.zscore(df["danceability"]))
    duration_z = abs(stats.zscore(df["duration"]))
    loudness_z = abs(stats.zscore(df["loudness"]))
    if None in df["msttr"].values:
        df["uniqueness"] = (energy_z + dance_z + duration_z + loudness_z +
                            mood_z) / 5
    else:
        lex_diversity = abs(stats.zscore(df["msttr"]))
        lyr_valence_z = abs(stats.zscore(df["lyr_valence"]))
        df["uniqueness"] = (energy_z + dance_z + duration_z + loudness_z +
                            lyr_valence_z + mus_valence_z + lex_diversity) / 7
    df = df[[
        "title", "energy", "mus_valence", "lyr_valence", "mood",
        "danceability", "loudness", "tempo", "key", "mode", "time_signature",
        "duration", "sp_id", "track", "lyrics", "speechiness", "acousticness",
        "instrumentalness", "liveness", "artist", "album_name", "disc_number",
        "explicit", "external_urls_spotify", "mood_discrep", "release_date",
        "uniqueness", "lyr_valence_des", "valence_des", "mood_des",
        "energy_des", "dance_des", "album_id", "url", "genius_songid",
        "keywords", "affect_freq", "metacritic", "msttr", "lexical_depth",
        "cliche_word_perc", "cliche_total_words"
    ]]

    df = df.to_dict('records')
    return df
Ejemplo n.º 28
0
    #custom_tweet = "YES, i think yes think A bat cat loves think yes car car yes"
    wordsInFile = numpy.array([])
    for line in file_tokens:
        line = line.replace("Mike Mazzei: ", "")
        custom_tokens = remove_noise(word_tokenize(line))

        if (line[0].isdigit() == False and line.startswith("Jacob") == False
                and len(line.strip()) != 0):

            # This is positive/negative classifier
            #print(classifier.classify(dict([token, True] for token in custom_tokens)), " : ", line)
            wordsInFile = numpy.append(wordsInFile, custom_tokens)

            #Cheer up EMOLec
            text_object = NRCLex(line)
            print(line)
            print(text_object.raw_emotion_scores)
            print(
                multi_topic_scorer(line,
                                   topic_dictionary,
                                   sim_thresh=0.7,
                                   return_hits=True))

#     print(custom_tokens)

#     with open ("/Users/thomaskennedy/downloads/interview1.txt", "r") as myfile:
#         custom = myfile.readlines()
#     print(wordsInFile)

    Counter = Counter(wordsInFile)
    description=description.lower()
    description=nltk.word_tokenize(description)
    description=[word for word in description if not word in set(stopwords.words("english"))]
    lemma=nltk.WordNetLemmatizer()
    description=[lemma.lemmatize(word) for word in description]
    description=" ".join(description)
    description_list.append(description)
pd_tweet["normalized_text_new"]=description_list
pd_tweet.head(5)


# In[56]:


from nrclex import NRCLex 
text_object = NRCLex(' '.join(pd_tweet['normalized_text_new']))


# In[57]:


text_object.affect_frequencies


# In[58]:


text_object.top_emotions


# In[59]:
Ejemplo n.º 30
0
def lyrics_to_emotions(artistName, songName, songLyrics, trackID):

    if songLyrics == "":
        return [artistName, songName, trackID, None, None, None, None, None, None, None, None, None, None]

    #init emotion values
    positive = negative = anger = anticipation = disgust = fear = joy = sadness = surprise = trust = 0

    #init counter for emotions
    positive_cnt = negative_cnt = anger_cnt = anticipation_cnt = disgust_cnt = fear_cnt = joy_cnt = sadness_cnt = surprise_cnt = trust_cnt = 0

    if songLyrics == "":
        return [artistName, songName, anger, anticipation, disgust, fear, joy, sadness, surprise, trust]

    #clean and tokenize lyrics str 
    #tokenization segments song lyrics into atomic elements
    raw = songLyrics.lower()
    tokens = tokenizer.tokenize(songLyrics)

    #create English stop words list
    en_stop = get_stop_words('en')

    #remove stop words from tokens
    #creates a list of tokens w/o stop words
    stopped_tokens = [i for i in tokens if not i in en_stop]
    print('Stopped')
    print(stopped_tokens)

    #lemmatize tokens
    wnl = WordNetLemmatizer()
    lem_tokens = [wnl.lemmatize(word) for word in stopped_tokens]

    #remove one-two letter tokens 
    lem_tokens = [word for word in lem_tokens if len(word)>2]

    print("lem_tokens")
    print(lem_tokens)


    for item in lem_tokens:
        #for each tokenized word, evaluate emotions
        emotion = NRCLex(item)
        #extract emotions with non-zero values
        for j in range(len(emotion.top_emotions)):
            if emotion.top_emotions[j][1] != 0:
                print("word: ", item, emotion.top_emotions[j][0], "score: ", emotion.top_emotions[j][1] )
                #update appropriate emotion with amount 
                if emotion.top_emotions[j][0] == 'positive':
                    #update positive
                    positive += emotion.top_emotions[j][1]
                    positive_cnt+=1
                    if positive_cnt > 0 :
                        positive = positive/positive_cnt 
                elif emotion.top_emotions[j][0] == 'negative':
                    #update negative
                    negative += emotion.top_emotions[j][1]
                    negative_cnt+=1
                    if negative_cnt > 0 :
                        negative = negative/negative_cnt 
                elif emotion.top_emotions[j][0] == 'anger':
                    #update anger
                    anger += emotion.top_emotions[j][1]
                    anger_cnt+=1
                    if anger_cnt > 0 :
                        anger = anger/anger_cnt 
                elif emotion.top_emotions[j][0] == 'anticipation':
                    #update anticipation
                    anticipation += emotion.top_emotions[j][1]
                    anticipation_cnt+=1
                    if anticipation_cnt > 0: 
                        anticipation = anticipation/anticipation_cnt 
                elif emotion.top_emotions[j][0] == 'disgust':
                    #update disgust
                    disgust += emotion.top_emotions[j][1]
                    disgust_cnt+=1
                    if disgust_cnt > 0:
                        disgust = disgust/disgust_cnt 
                elif emotion.top_emotions[j][0] == 'fear':
                    #update fear
                    fear += emotion.top_emotions[j][1]
                    fear_cnt+=1
                    if fear_cnt > 0:
                        fear = fear/fear_cnt  
                elif emotion.top_emotions[j][0] == 'joy':
                    #update joy
                    joy += emotion.top_emotions[j][1]
                    joy_cnt+=1
                    if joy_cnt > 0:
                        joy = joy/joy_cnt 
                elif emotion.top_emotions[j][0] == 'sadness':
                    #update sadness
                    sadness += emotion.top_emotions[j][1]
                    sadness_cnt+=1
                    if sadness_cnt > 0:
                        sadness = sadness/sadness_cnt 
                elif emotion.top_emotions[j][0] == 'surprise':
                    #update surprise
                    surprise += emotion.top_emotions[j][1]
                    surprise_cnt+=1
                    if surprise_cnt > 0:
                        surprise= surprise/surprise_cnt 
                elif emotion.top_emotions[j][0] == 'trust':
                    #update trust
                    trust += emotion.top_emotions[j][1]
                    trust_cnt+=1
                    if trust_cnt > 0:
                        trust= trust/trust_cnt 

    #ENDOF extracting emotions from songLyrics  
    # print("positive : ", positive, "positive_cnt: ", positive_cnt)
    # print("negative : ", negative, "negative_cnt: ", negative_cnt)
    # print("anger : ", anger, "anger_cnt: ", anger_cnt)
    # print("anticipation : ", anticipation, "anticipation_cnt: ", anticipation_cnt)
    # print("disgust: ", disgust, "disgust_cnt: ", disgust_cnt)
    # print("fear: ", fear, "fear_cnt: ", fear_cnt)
    # print("joy: ", joy, "joy_cnt: ", joy_cnt)
    # print("sadness: ", sadness, "sadness_cnt: ", sadness_cnt)
    # print("surprise: ", surprise, "surprise_cnt: ", surprise_cnt)
    # print("trust: ", trust, "trust_cnt: ", trust_cnt)

    return [artistName, songName, trackID, positive, negative, anger, anticipation, disgust, fear, joy, sadness, surprise, trust]