def nlp_sentiment_analysis(movies, movies_data):
    """
    This function creates a dictionary to store the normalized sentiment data for each movie 
    """
    sentiment_analysis = {}
    for movie in movies:
        reviews = movies_data[movie]
        sentiment_data = {}
        for review in reviews:
            review_text = review["reviewText"]
            review_anaylsis = SentimentIntensityAnalyzer().polarity_scores(
                review_text)
            for key in review_anaylsis.keys():
                if key in sentiment_data:
                    sentiment_data[key] += review_anaylsis[key]
                else:
                    sentiment_data[key] = review_anaylsis[key]
        normalize_sent_data = normalize_sentiment(sentiment_data, len(reviews))
        sentiment_analysis[movie] = normalize_sent_data

    visualize_sentiment(sentiment_analysis)
    return sentiment_analysis
Beispiel #2
0
def polarity(dataframe, review_column):
    """
    Takes a dataframe and the column name containing text to calculate the
    sentiment polarity of. Calculates polarity then appends to existing
    dataframe to return.

    :params dataframe dataframe:
    :params review_column string:
    :returns dataframe:
    """
    # Datatype checks
    if not isinstance(dataframe, pd.DataFrame):
        raise ValueError('dataframe is not a pandas dataframe')
    if not isinstance(review_column, str):
        raise ValueError('review_column is not a string')
    if not review_column in dataframe:
        raise ValueError('review_column is not in the dataframe')

    dataframe = dataframe.dropna()
    rows = dataframe.shape[0]
    scores_array = np.zeros([rows, 4])
    keys = []

    index_1 = 0
    for review in dataframe[review_column]:
        scores = SentimentIntensityAnalyzer().polarity_scores(review)
        scores_array[index_1] = list(scores.values())
        if index_1 == 0:
            keys = list(scores.keys())
        index_1 += 1

    index_2 = 0
    for k in keys:
        dataframe[k] = scores_array[:, index_2]
        index_2 += 1

    return dataframe