def nlp_sentiment_analysis(movies, movies_data): """ This function creates a dictionary to store the normalized sentiment data for each movie """ sentiment_analysis = {} for movie in movies: reviews = movies_data[movie] sentiment_data = {} for review in reviews: review_text = review["reviewText"] review_anaylsis = SentimentIntensityAnalyzer().polarity_scores( review_text) for key in review_anaylsis.keys(): if key in sentiment_data: sentiment_data[key] += review_anaylsis[key] else: sentiment_data[key] = review_anaylsis[key] normalize_sent_data = normalize_sentiment(sentiment_data, len(reviews)) sentiment_analysis[movie] = normalize_sent_data visualize_sentiment(sentiment_analysis) return sentiment_analysis
def polarity(dataframe, review_column): """ Takes a dataframe and the column name containing text to calculate the sentiment polarity of. Calculates polarity then appends to existing dataframe to return. :params dataframe dataframe: :params review_column string: :returns dataframe: """ # Datatype checks if not isinstance(dataframe, pd.DataFrame): raise ValueError('dataframe is not a pandas dataframe') if not isinstance(review_column, str): raise ValueError('review_column is not a string') if not review_column in dataframe: raise ValueError('review_column is not in the dataframe') dataframe = dataframe.dropna() rows = dataframe.shape[0] scores_array = np.zeros([rows, 4]) keys = [] index_1 = 0 for review in dataframe[review_column]: scores = SentimentIntensityAnalyzer().polarity_scores(review) scores_array[index_1] = list(scores.values()) if index_1 == 0: keys = list(scores.keys()) index_1 += 1 index_2 = 0 for k in keys: dataframe[k] = scores_array[:, index_2] index_2 += 1 return dataframe