Beispiel #1
0
    def transform(raw_reviews: pd.DataFrame) -> pd.DataFrame:
        """
        Applies sentiment analysis on the reviews.
        """
        transformed_reviews = raw_reviews.copy()

        # prepare the sentiment analyzer
        nltk.download("vader_lexicon", download_dir=NLTK_DATA_DIRECTORY)
        nltk.data.path.append(NLTK_DATA_DIRECTORY)
        sentiment_analyzer = SentimentIntensityAnalyzer()

        sentiments = []
        for _, review in transformed_reviews["comments"].items():
            sentiment_compound = sentiment_analyzer.polarity_scores(
                review)["compound"]

            # decide sentiment as positive, negative and neutral
            if sentiment_compound >= 0.05:
                sentiment = "positive"
            elif sentiment_compound <= -0.05:
                sentiment = "negative"
            else:
                sentiment = "neutral"

            sentiments.append(sentiment)

        transformed_reviews["sentiments"] = sentiments

        return transformed_reviews
Beispiel #2
0
    def Analyze(self, request, context):
        """ The method that will be exposed to the snet-cli call command.

        :param request: incoming data
        :param context: object that provides RPC-specific information (timeout, etc).
        :return:
        """

        # In our case, request is a InputMessage() object (from .proto file)
        self.value = request.value

        # Convert in json array
        sentence_list = json.loads(self.value)

        # Result list
        result_list = []

        # Sentiment Analyser Instance
        analizer = SentimentIntensityAnalyzer()

        for sentence_item in sentence_list:
            # Classifying sentences
            analysis = str(analizer.polarity_scores(sentence_item["sentence"]))
            result_list.append({
                "id": sentence_item["id"],
                "analysis": analysis
            })

        # To respond we need to create a OutputMessage() object (from .proto file)
        self.result = OutputMessage()
        self.result.value = json.dumps(result_list)
        return self.result
Beispiel #3
0
def sentiment_score(text):
    list_text = text.split('.')
    s = SentimentIntensityAnalyzer()
    list_scores = []
    for sentence in list_text:
        list_scores.append(s.polarity_scores(sentence)['compound'])
    return list_scores
Beispiel #4
0
def graph_eng():
    file = filedialog.askopenfilename(filetypes=(("Text files", "*.txt"),
                                                 ("all files", "*.*")))
    f = open(file)
    raw = f.read()
    sentences = nltk.sent_tokenize(raw)
    sid = SentimentIntensityAnalyzer()

    positive_values = []

    for sentence in sentences:
        ss = sid.polarity_scores(sentence)
        pos_ss = ss.get('pos')
        positive_values.append(pos_ss)
    summary = len(positive_values)
    negative_values = []

    for sentence in sentences:
        ss = sid.polarity_scores(sentence)
        neg_ss = ss.get('neg')
        negative_values.append(neg_ss)

    n_value = np.array(negative_values)
    p_value = np.array(positive_values)
    counts_value = np.arange(summary)
    plt.plot(counts_value, p_value, n_value)
    plt.show()
Beispiel #5
0
def score_data(data):
    '''
        Computes VADER sentiment scores for every string in the data passed in.

        Input:
        -data: a pandas Series object containing strings to be scored

        Returns:
        pos, neu, neg, com list objects containing the respective scores
          for each string
    '''
    sid = SentimentIntensityAnalyzer()

    pos = []
    neu = []
    neg = []
    com = []
    for i in range(data.shape[0]):
        score = sid.polarity_scores(data[i])
        pos.append(score['pos'])
        neu.append(score['neu'])
        neg.append(score['neg'])
        com.append(score['compound'])

    return pos, neu, neg, com
Beispiel #6
0
def perform_sentiment_analysis(text):
  sid = SentimentIntensityAnalyzer()
  scores = sid.polarity_scores(text)
  strongest = max(scores, key=scores.get)
  sentiment = {'neg': 'Negative', 'neu': 'Neutral', 'pos': 'Positive'}

  return sentiment[strongest]
Beispiel #7
0
    def sentiment_analysis_per_sentence(self):
        """This function takes the json_object from the /sentiment-analysis-long url.
        It will perform a more detailed analysis on larger text structures.
        my dict object has the following parameters:
        title: title of something they want analyzed so that the data has a heading
        input_text: the actual text being analyzed
        Good parsing tip from: https://stackoverflow.com/questions/17618149/divide-string-by-line-break-or-period-with-python-regular-expressions"""

        if self.json_object['input_text'] is None or self.json_object is None:
            return self.json_object
        else:
            input_text = self.json_object['input_text']

            sentences = [x for x in map(str.strip, input_text.split('.')) if x]

            sia = SentimentIntensityAnalyzer()
            # Getting the values:
            # Setting a list to hold all the sentiment_scores
            sentiment_scores = []
            for sentence in sentences:
                sentiment_scores.append(sia.polarity_scores(sentence))

            output_objects = self.sentiment_analysis_sentence_stats(
                sentiment_scores
            )  # summoning the following function to perform the statistics

            return output_objects
def SentimentAnalysis(_arg1, library='nltk'):
    '''
    Sentiment Analysis is a procedure that assigns a score from -1 to 1
    for a piece of text with -1 being negative and 1 being positive. For
    more information on the function and how to use it please refer to
    tabpy-tools.md
    '''
    if not (isinstance(_arg1[0], str)):
        raise TypeError

    library = library.lower()
    supportedLibraries = {'nltk', 'textblob'}

    if library not in supportedLibraries:
        raise ValueError

    scores = []
    if library == 'nltk':
        sid = SentimentIntensityAnalyzer()
        for text in _arg1:
            sentimentResults = sid.polarity_scores(text)
            score = sentimentResults['compound']
            scores.append(score)
    elif library == 'textblob':
        for text in _arg1:
            currScore = TextBlob(text)
            scores.append(currScore.sentiment.polarity)
    return scores
def get_sentiment_score(df, sentiment_command):
    if sentiment_command == "Yes":

        def join_sentiment_text(row):
            row = " ".join(row)
            return row

        df['sentiment_text'] = df['cleaned_text'].apply(join_sentiment_text)

        # Instatiating the sentiment intensity analyzer -
        sid = SentimentIntensityAnalyzer()

        # Finding sentiment of each tweet -
        df['sentiment_score'] = df['sentiment_text'].apply(
            lambda review: sid.polarity_scores(review))

        # Getting the sentiment from dictionary -
        def get_sentiment(score_dict):
            if score_dict['compound'] > 0.2:
                return 'Positive'
            elif score_dict['compound'] < -0.2:
                return 'Negative'
            else:
                return 'Neutral'

        # Storing the sentiment in a separate column
        df['sentiment'] = df['sentiment_score'].apply(get_sentiment)
        df.drop(['sentiment_text', 'sentiment_score'], axis=1, inplace=True)
    return df
Beispiel #10
0
    def __init__(self):
        #document represented by a tuple (sentence,labelt)
        n_instances = 100
        subj_docs = [(sent, 'subj') for sent in subjectivity.sents(categories='subj')[:n_instances]]
        obj_docs = [(sent, 'obj') for sent in subjectivity.sents(categories='obj')[:n_instances]]

        #split subj and objinstances to keep a balanced uniform class distribution in both train and test sets.
        train_subj_docs = subj_docs[:80]
        test_subj_docs = subj_docs[80:100]
        train_obj_docs = obj_docs[:80]
        test_obj_docs = obj_docs[80:100]
        training_docs = train_subj_docs+train_obj_docs
        testing_docs = test_subj_docs+test_obj_docs

        #train classifier
        sentim_analyzer = SentimentAnalyzer()
        all_words_neg = sentim_analyzer.all_words([mark_negation(doc) for doc in training_docs])

        #use simple unigram word features, handling negation
        unigram_feats = sentim_analyzer.unigram_word_feats(all_words_neg, min_freq=4)
        sentim_analyzer.add_feat_extractor(extract_unigram_feats, unigrams=unigram_feats)

        #apply features to obtain a feature_value representations of our datasets
        training_set = sentim_analyzer.apply_features(training_docs)
        test_set = sentim_analyzer.apply_features(testing_docs)
        self.trainer = NaiveBayesClassifier.train
        self.classifier = sentim_analyzer.train(self.trainer, training_set)
        for key,value in sorted(sentim_analyzer.evaluate(test_set).items()):
            print('{0}: {1}'.format(key, value))
        self.sid = SentimentIntensityAnalyzer()
    def intensivityAnalysis(self, request, context):

        # In our case, request is a InputMessage() object (from .proto file)
        self.value = request.value

        analizer = SentimentIntensityAnalyzer()

        text = base64.b64decode(self.value)
        # Decode do string
        temp = text.decode('utf-8')
        # Convert in array
        tempArray = temp.split("\n")
        # Result of sentences
        stringResult = ''

        # Generating result
        for line in tempArray:
            if line is not None:
                if len(line) > 1:
                    stringResult += line
                    stringResult += '\n'
                    stringResult += str(analizer.polarity_scores(line))
                    stringResult += '\n\n'

        # Encoding result
        resultBase64 = base64.b64encode(str(stringResult).encode('utf-8'))

        # To respond we need to create a OutputMessage() object (from .proto file)
        self.result = OutputMessage()
        self.result.value = resultBase64
        # log.debug('add({},{})={}'.format(self.a, self.b, self.result.value))
        return self.result
Beispiel #12
0
def sentiment_analysis(blurb, index):
    # Empty list is created for storing the results of the next segment
    # print(index, " ", blurb)
    # analyzed_sentences = []

    # We loop through all the reviews that we import from the file

    # A dictionary is created to store the data of one sentence temporarily
    data = {'compound': 0, 'neu': 0, 'neg': 0, 'pos': 0}

    # Reviews are taken, one at a time, from the review texts list
    #     blurb = df['blurb'][index]
    # And then the review is separated into sentences
    sentence_list = nltk.tokenize.sent_tokenize(blurb)

    # Then, Vader Analyzer from the NLTK Library is used to do a sentiment analysis of each of the sentences obtained
    #  from the review. This analyzer gives us four parameters in the result: Compound, Neutral, Positive and Negative
    vader_analyzer = SentimentIntensityAnalyzer()
    for text in sentence_list:
        temp = vader_analyzer.polarity_scores(text)
        for key in ('compound', 'neu', 'neg', 'pos'):
            # Here, an average of the parameters is taken for all the sentences obtained from the review to find the
            # Vader Analysis scores for the review
            if sentence_list.__len__() is not 0:
                data[key] += temp[key] / sentence_list.__len__()

    # We add all the analysis scores in a list for later use

    return (index, data)
Beispiel #13
0
def analyze_sentiment_vader_lexicon(review, threshold=0.1, verbose=False):
    review = tn.strip_html_tags(review)
    review = tn.remove_accented_chars(review)
    review = tn.expand_contractions(review)

    analyze = SentimentIntensityAnalyzer()
    scores = analyze.polarity_scores(review)
    agg_score = scores["compound"]
    final_sentiment = "positive" if agg_score >= threshold else "negative"

    if verbose:
        positive = str(round(scores['pos'], 2) * 100) + "%"
        final = round(agg_score, 2)
        negative = str(round(scores['neg'], 2) * 100) + "%"
        neutral = str(round(scores['neu'], 2) * 100) + "%"
        sentiment_frame = pd.DataFrame(
            [[final_sentiment, final, positive, negative, neutral]],
            columns=pd.MultiIndex(levels=[["SENTIMENT STATS: "],
                                          [
                                              "Predicted Sentiment ",
                                              "Polarity Score", "Positive",
                                              "Negative", "Neutral"
                                          ]],
                                  labels=[[0, 0, 0, 0, 0], [0, 1, 2, 3, 4]]))
        print(sentiment_frame)
    return final_sentiment
def get_sentiment_features(input_file, output):
    # Positive words
    output['positive_num'] = (input_file['posemo'] / 100) * input_file['WC']
    output['positive_prop'] = input_file['posemo']
    # Negative Words
    output['negative_num'] = (input_file['negemo'] / 100) * input_file['WC']
    output['negative_prop'] = input_file['negemo']
    # Anxiety words
    output['anxiety_num'] = (input_file['anx'] / 100) * input_file['WC']
    output['anxiety_prop'] = input_file['anx']
    # Anger Words
    output['anger_num'] = (input_file['anger'] / 100) * input_file['WC']
    output['anger_prop'] = input_file['anger']
    # Sadness Words
    output['sadness_num'] = (input_file['sad'] / 100) * input_file['WC']
    output['sadness_prop'] = input_file['sad']
    # Overall Emotional Words
    output['overall_emotional_num'] = (input_file['affect'] /
                                       100) * input_file['WC']
    output['overall_emotional_prop'] = input_file['affect']
    senti_analyser = SentimentIntensityAnalyzer()
    # pass unprocessed text to sentiment analyser, but remove new lines and dashes (\n and -)
    output['average_sentiment_of_word'] = input_file['unprocessed_text'].apply(
        lambda x: senti_analyser.polarity_scores(" ".join(
            re.sub(r'[^\w\s!?.]', "", x).splitlines()))['compound'])
    return output
Beispiel #15
0
def analysis():
    sid = SentimentIntensityAnalyzer()
    comments = ["islam is the worst religion"]
    for comment in comments:
        sentiment = sid.polarity_scores(comment)
    print(sentiment)
    print(word_tokenize(comment))
    def extract_features(text):
        wordcount_pos = 0
        wordcount_neg = 0
        bigram_count_pos = 0
        bigram_count_neg = 0
        compound_scores = list()
        positive_scores = list()
        sia = SentimentIntensityAnalyzer()
        for sentence in nltk.sent_tokenize(text):
            for word in nltk.word_tokenize(sentence):
                if word.lower() in top_100_positive:
                    wordcount_pos += 1
                if word.lower() in top_100_negative:
                    wordcount_neg += 1
                if word in positive_bigram_finder.word_fd:
                    bigram_count_pos += 1
                if word in negative_bigram_finder.word_fd:
                    bigram_count_neg += 1
            compound_scores.append(sia.polarity_scores(sentence)["compound"])
            positive_scores.append(sia.polarity_scores(sentence)["pos"])

        # Adding 1 to the final compound score to always have positive numbers
        # since some classifiers you'll use later don't work with negative numbers.
        curr_features = [
            mean(compound_scores) + 1,
            mean(positive_scores), wordcount_pos, wordcount_neg,
            bigram_count_pos, bigram_count_neg
        ]
        return curr_features
Beispiel #17
0
def sentiment_analyzer(input_data: dict) -> None:
    """Check package for spelling errors."""
    nltk.download('vader_lexicon')
    info("Processing tasks with sentimentanalyzer plugin now!")
    sid = SentimentIntensityAnalyzer()
    package_export_content_modules = get_value(CONTENT_MOD_STRING,
                                               input_data)[CONTENT_MOD_STRING]
    for values in package_export_content_modules:
        raw_task_data = get_task_data_listed(package_export_content_modules,
                                             values)
        for package in raw_task_data:
            for titles, task_item in raw_task_data[package].items():
                line_count = 0
                line_item = task_item.split("\n")
                for task_line_item in line_item:
                    line_count += 1
                    test_search = "data:image\/\S{1,4};base64"
                    x = re.findall(test_search, task_line_item)
                    if len(x) < 1 < len(task_line_item):
                        print(
                            f"Package: {package}\nTask Title: {titles}\nLine Count: {line_count}\nSentence Analyzed:"
                            f" {task_line_item}")
                        kvp = sid.polarity_scores(task_line_item)
                        for k in kvp:
                            print(f"{k}: {kvp[k]}")
                        print()
def get_comments_and_parents(post):
    post.comments.replace_more(limit=None)
    comments = post.comments.list()
    vader_analyzer = SentimentIntensityAnalyzer()
    parents = []
    parents_scores = []
    rtn_comments = []
    scores = []
    for comment in comments:
        comment_parent = comment.parent()
        comment_scores = vader_analyzer.polarity_scores(comment.body)
        comment_scores_lst = [
            comment_scores["neg"], comment_scores["neu"],
            comment_scores["pos"], comment_scores["compound"]
        ]
        scores += [comment_scores_lst]
        try:
            parents += [comment_parent.body]
            parent_scores = vader_analyzer.polarity_scores(comment_parent.body)
            parent_scores_lst = [
                parent_scores["neg"], parent_scores["neu"],
                parent_scores["pos"], parent_scores["compound"]
            ]
            parents_scores += [parent_scores_lst]
            rtn_comments += [comment.body]
        except AttributeError:
            pass
    return ([comment.body for comment in comments], scores,
            [comment.score
             for comment in comments]), (parents, parents_scores, rtn_comments)
Beispiel #19
0
def post_request_user():
    json_object = request.get_json()
    sa = Sentiment(json_object)
    string_plot = sa.get_string_from_object()['input_text']
    filtered_txt = sa.remove_stopwords(string_plot)
    sia = SentimentIntensityAnalyzer()
    dict_out = sia.polarity_scores(filtered_txt)
    return dict_out
Beispiel #20
0
def init_model():
    # Train Catboost model to predict number of streams
    global model
    print('********* Training model... *********')
    official_competition_dataset = pd.read_csv('https://datahack2020dataset.s3.us-east-2.amazonaws.com/OfficialCompetitionDataset.csv')
    numerical_cols = ['auditory', 'beats_per_measure', 'beats_per_min', 'concert_probability',
                     'danceability', 'hype', 'instrumentalness', 'length_minutes',
                     'lyricism', 'nplays', 'positivity', 'volume'] # no hotness and critic rating because unlikely for amateur band
    categorical_features = ['major/minor', 'styles', 'tone', 'vulgar'] # no critic/reviewer_type/album/artist/name
    numerical_cols_no_nplays = [x for x in numerical_cols if x != 'nplays']
    y = official_competition_dataset['nplays']
    y = np.log1p(y)
    X = official_competition_dataset.drop('nplays', axis=1)
    X["reviewer_type"].fillna("contributor", inplace=True) 
    X["styles"].fillna("rock", inplace=True) 

    text_features = ['name', 'album', 'artist']
    extra_numerical_features = []
    sia = SentimentIntensityAnalyzer()
    for f in text_features:
        temp = [f'{f}_len', f'{f}_upper', f'{f}_sent_pos', f'{f}_sent_neg']
        X[f'{f}_len'] = X[f'{f}'].str.len()
        X[f'{f}_upper'] = X[f'{f}'].apply(lambda x: len([x for x in x.split() if x.isupper()]))
        X[f'{f}_sent_pos'] = X[f'{f}'].apply(lambda x: sia.polarity_scores(x)['pos'])
        X[f'{f}_sent_neg'] = X[f'{f}'].apply(lambda x: sia.polarity_scores(x)['neg'])
        extra_numerical_features = extra_numerical_features + temp
    X.drop(text_features, axis=1, inplace=True)

    X = X[numerical_cols_no_nplays + extra_numerical_features + categorical_features]
    print(X.head())
    cor_matrix = X.corr().abs()
    upper_tri = cor_matrix.where(np.triu(np.ones(cor_matrix.shape),k=1).astype(np.bool))
    to_drop = [column for column in upper_tri.columns if any(upper_tri[column] > 0.5)]
    print(to_drop)
    numerical_cols_no_nplays_no_highly_correlated_features = [x for x in numerical_cols_no_nplays if x not in to_drop]
    X = X.drop(to_drop, axis=1)
    print(X.head())
    print(X.columns)
    print(list(X.iloc[0]))
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
    global scaler
    scaler = MinMaxScaler()
    X_train[numerical_cols_no_nplays_no_highly_correlated_features] = scaler.fit_transform(X_train[numerical_cols_no_nplays_no_highly_correlated_features])
    X_test[numerical_cols_no_nplays_no_highly_correlated_features] = scaler.transform(X_test[numerical_cols_no_nplays_no_highly_correlated_features])
    print("**********************")
    print(numerical_cols_no_nplays_no_highly_correlated_features)

    model = catboost.CatBoostRegressor(depth=10, l2_leaf_reg=5, learning_rate=0.1, cat_features=categorical_features, logging_level="Silent")
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    n = X_test.shape[0]
    p = X_test.shape[1]
    r2 = r2_score(y_test, pred)
    print('r2 score: ', r2)
    print('adjusted r2 score: ', 1-(1-r2)*(n-1)/(n-p-1))
    print('root mean squared error: ', math.sqrt(mean_squared_error(y_test, pred)))
    print('mean absolute error: ', mean_absolute_error(y_test, pred))
    model.save_model('catboost_regressor')
Beispiel #21
0
def get_features(paragraph):
    sia = SentimentIntensityAnalyzer()
    sentiment_score = sia.polarity_scores(paragraph)
    ret = []
    ret.append(sentiment_score['neg'])
    ret.append(sentiment_score['neu'])
    ret.append(sentiment_score['pos'])
    ret.append(sentiment_score['compound'])
    return ret
Beispiel #22
0
def get_sentiments(text):
    l_pos = "pos"
    l_neg = "neg"
    l_neut = "neu"

    vader_analyzer = SentimentIntensityAnalyzer()
    sentiments = vader_analyzer.polarity_scores(text)

    return {key: sentiments[key] for key in [l_pos, l_neg, l_neut]}
Beispiel #23
0
def polarity(text):
    """
    Output polarity scores for a text using Vader approach.

    :param text: a text whose polarity has to be evaluated.
    """
    
    vader_analyzer = SentimentIntensityAnalyzer()
    return (vader_analyzer.polarity_scores(text))
Beispiel #24
0
def sentiment(text):
    vader_analyzer = SentimentIntensityAnalyzer()
    output =vader_analyzer.polarity_scores(text)

    if output['neg']>0.3:
	   return 0,output['neg']
    elif  output['pos']>0.3:
	   return 1,output['pos']
    return 2,output['neu']
Beispiel #25
0
def isPositiveMovieReview(review_id: str) -> bool:
    """True if the average of all sentence compound scores is positive."""
    text = nltk.corpus.movie_reviews.raw(review_id)
    sia = SentimentIntensityAnalyzer()
    scores = [
        sia.polarity_scores(sentence)["compound"]
        for sentence in nltk.sent_tokenize(text)
    ]
    return mean(scores) > 0
Beispiel #26
0
def save_articles_with_sentiment():
    articles = get_combined_articles()
    sid = SentimentIntensityAnalyzer()
    for i, article in enumerate(articles):
        print(i)
        ss = sid.polarity_scores(article['text'])
        article["polarity"] = ss
    with open('out/articles_train_data_with_sentiment.json', 'w') as fout:
        json.dump(articles, fout)
Beispiel #27
0
def analyze_email_sentiment(email: str) -> bool:
    '''
    analyze the email sentiment, 
    return True if the email has positive compound sentiment
    False otherwise
    '''

    sia = SentimentIntensityAnalyzer()
    return sia.polarity_scores(email)["compound"] > 0
def extract_significant_words(review):
    score = 0
    mySentAnalyzer = SentimentIntensityAnalyzer()
    for word in review:
        score = mySentAnalyzer.polarity_scores(word)["compound"]
        if score != 0:
            if word not in significantWords:
                significantWords.append(word)
                significantWordPolarities.append(score)
Beispiel #29
0
 def intensity_analyser_score(self, tweet):
     analyser = SentimentIntensityAnalyzer()
     analysis = analyser.polarity_scores(tweet)
     if analysis['compound'] >= 0.05:
         return 'positive'
     elif analysis['compound'] <= -0.05:
         return 'negative'
     else:
         return 'neutral'
Beispiel #30
0
def demo_vader_instance(text):
    """
    Output polarity scores for a text using Vader approach.

    :param text: a text whose polarity has to be evaluated.
    """
    from nltk.sentiment import SentimentIntensityAnalyzer
    vader_analyzer = SentimentIntensityAnalyzer()
    print(vader_analyzer.polarity_scores(text))
Beispiel #31
0
def demo_vader_instance(text):
    """
    Output polarity scores for a text using Vader approach.

    :param text: a text whose polarity has to be evaluated.
    """
    from nltk.sentiment import SentimentIntensityAnalyzer
    vader_analyzer = SentimentIntensityAnalyzer()
    print(vader_analyzer.polarity_scores(text))
def main():
    t_start = timer()
    training_set, content_data = make_training_set()
    # print(training_set)
    print_dt('training_set', t_start)

    # --

    t_start = timer()
    # split the hashtag content now!!
    for twitter in content_data:
        if 'hashtag_content' in twitter.keys():
            twitter['hashtag_content'] = find_match_word(twitter['hashtag_content'].lower(), training_set)
    print_dt('split_hash_tag', t_start)

    # --

    t_start = timer()
    sid = SentimentIntensityAnalyzer()
    positive_tweets = []
    negative_tweets = []
    neutral_tweets = []
    compound_tweets = []

    for twitter in content_data:
        if 'hashtag_content' in twitter.keys():
            temp_content = process_content(twitter['twitter_content'] + ' '.join(twitter['hashtag_content']))
            temp_content = ' '.join(temp_content)
            result = sort_ordered_dict(sid.polarity_scores(temp_content))
            if result.keys()[0] == 'pos':
                positive_tweets.append(temp_content)
            elif result.keys()[0] == 'neg':
                negative_tweets.append(temp_content)
            elif result.keys()[0] == 'neu':
                neutral_tweets.append(temp_content)
            elif result.keys()[0] == 'compound':
                compound_tweets.append(temp_content)
        else:
            temp_content = process_content(twitter['twitter_content'])
            temp_content = ' '.join(temp_content)
            result = sort_ordered_dict(sid.polarity_scores(temp_content))
            if result.keys()[0] == 'pos':
                positive_tweets.append(temp_content)
            elif result.keys()[0] == 'neg':
                negative_tweets.append(temp_content)
            elif result.keys()[0] == 'neu':
                neutral_tweets.append(temp_content)
            elif result.keys()[0] == 'compound':
                compound_tweets.append(temp_content)

    print_dt('sentiment_analysis', t_start)

    print('positive_tweets: ', len(positive_tweets))
    print('negative_tweets: ', len(negative_tweets))
    print('negative_tweets: ', len(neutral_tweets))
    print('negative_tweets: ', len(compound_tweets))
Beispiel #33
0
    def sentiment_filter(self, text_type):
        if self.sentiment == 'positive':
            sentiment_factor = .3
            sentiment = 'pos'
        elif self.sentiment == 'negative':
            sentiment_factor = .3
            sentiment = 'neg'
        elif self.sentiment == 'neutral':
            sentiment_factor = .3
            sentiment = 'neu'

        if text_type == 'Speech':
            text_type = self.corpus_speech
        elif text_type == 'Tweet':
            text_type = self.corpus_tweet

        sentences = sent_tokenize(text_type)
        sid = SentimentIntensityAnalyzer()
        for sentence in sentences:
            ss = sid.polarity_scores(sentence)
            if ss[sentiment] > sentiment_factor:
                self.tokens += word_tokenize(sentence)
Beispiel #34
0
class Vader_Sentiment:
    sentiments = ('pos', 'neg', 'neu', 'compound')
    name = 'Vader'

    @wait_nltk_data
    def __init__(self):
        self.vader = SentimentIntensityAnalyzer()

    def transform(self, corpus, copy=True):
        scores = []
        for text in corpus.documents:
            pol_sc = self.vader.polarity_scores(text)
            scores.append([pol_sc[x] for x in self.sentiments])
        X = np.array(scores).reshape((-1, len(self.sentiments)))

        # set  compute values
        shared_cv = SharedTransform(self)
        cv = [VectorizationComputeValue(shared_cv, col)
              for col in self.sentiments]

        if copy:
            corpus = corpus.copy()
        corpus.extend_attributes(X, self.sentiments, compute_values=cv)
        return corpus
import csv
import re
import nltk
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
from nltk.sentiment import SentimentIntensityAnalyzer


#output file is in the following format
'''tweet label positive negative neutral totalpos totalneg totalneu negation hashtag ? * ! capitalized capsPos capsNeg capsNeu'''

with open('../data/temppreprocessedTraining.data','rt') as f:
    reader=csv.reader(f, delimiter='\t')
    l=list(reader)

sid = SentimentIntensityAnalyzer()

f = open("../data/featuresTraining.data", 'w+')


for row in l:
    sentiment=row[2]
    tweet=row[3]
    tweet=tweet[:-2] 
    ss = sid.polarity_scores(tweet)
    f.write(tweet+" "+sentiment+" ")
    #for k in sorted(ss):
    #	print(k, ss[k])

    # positive negative and neatral polarities
    if ss['pos']>0.0:
Beispiel #36
0
def demo_vader_tweets(n_instances=None, output=None):
    """
    Classify 10000 positive and negative tweets using Vader approach.

    :param n_instances: the number of total tweets that have to be classified.
    :param output: the output file where results have to be reported.
    """
    from collections import defaultdict
    from nltk.corpus import twitter_samples
    from nltk.sentiment import SentimentIntensityAnalyzer
    from nltk.metrics import (accuracy as eval_accuracy, precision as eval_precision,
        recall as eval_recall, f_measure as eval_f_measure)

    if n_instances is not None:
        n_instances = int(n_instances/2)

    fields = ['id', 'text']
    positive_json = twitter_samples.abspath("positive_tweets.json")
    positive_csv = 'positive_tweets.csv'
    json2csv_preprocess(positive_json, positive_csv, fields, strip_off_emoticons=False,
                        limit=n_instances)

    negative_json = twitter_samples.abspath("negative_tweets.json")
    negative_csv = 'negative_tweets.csv'
    json2csv_preprocess(negative_json, negative_csv, fields, strip_off_emoticons=False,
                        limit=n_instances)

    pos_docs = parse_tweets_set(positive_csv, label='pos')
    neg_docs = parse_tweets_set(negative_csv, label='neg')

    # We separately split subjective and objective instances to keep a balanced
    # uniform class distribution in both train and test sets.
    train_pos_docs, test_pos_docs = split_train_test(pos_docs)
    train_neg_docs, test_neg_docs = split_train_test(neg_docs)

    training_tweets = train_pos_docs+train_neg_docs
    testing_tweets = test_pos_docs+test_neg_docs

    vader_analyzer = SentimentIntensityAnalyzer()

    gold_results = defaultdict(set)
    test_results = defaultdict(set)
    acc_gold_results = []
    acc_test_results = []
    labels = set()
    num = 0
    for i, (text, label) in enumerate(testing_tweets):
        labels.add(label)
        gold_results[label].add(i)
        acc_gold_results.append(label)
        score = vader_analyzer.polarity_scores(text)['compound']
        if score > 0:
            observed = 'pos'
        else:
            observed = 'neg'
        num += 1
        acc_test_results.append(observed)
        test_results[observed].add(i)
    metrics_results = {}
    for label in labels:
        accuracy_score = eval_accuracy(acc_gold_results,
            acc_test_results)
        metrics_results['Accuracy'] = accuracy_score
        precision_score = eval_precision(gold_results[label],
            test_results[label])
        metrics_results['Precision [{0}]'.format(label)] = precision_score
        recall_score = eval_recall(gold_results[label],
            test_results[label])
        metrics_results['Recall [{0}]'.format(label)] = recall_score
        f_measure_score = eval_f_measure(gold_results[label],
            test_results[label])
        metrics_results['F-measure [{0}]'.format(label)] = f_measure_score

    for result in sorted(metrics_results):
            print('{0}: {1}'.format(result, metrics_results[result]))

    if output:
        output_markdown(output, Approach='Vader', Dataset='labeled_tweets',
            Instances=n_instances, Results=metrics_results)
Beispiel #37
0
def sentiment(text):
    vader_analyzer = SentimentIntensityAnalyzer()
    output =vader_analyzer.polarity_scores(text)
    return output
Beispiel #38
0
 def __init__(self):
     self.vader = SentimentIntensityAnalyzer()
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import sys
from github import Github
from nltk.sentiment import SentimentIntensityAnalyzer

g = Github("github_username", "github_password")

if len(sys.argv) > 1:
    username = sys.argv[1]
    repo = sys.argv[2]
else:
    username = input("username: "******"repo: ")

sid = SentimentIntensityAnalyzer()

for commit in g.get_user(username).get_repo(repo).get_commits():
    print(commit.commit.message)
    ss = sid.polarity_scores(commit.commit.message)
    for k in sorted(ss):
        print('{0}: {1}, '.format(k, ss[k]), end='')
        print()