Beispiel #1
0
class Sentiment(object):

    def __init__(self):
        self._analyser = SentimentIntensityAnalyzer()
        self._return = {}

    def sentiment_analyzer_scores(self, sentence):
        score = self._analyser.polarity_scores(sentence)
        self._return["sentence"] = sentence
        self._return["score"] = score
        # print("{:-<40} {}\n".format(sentence, str(score)))
        return self._return

    def similarity(self, obj1, obj2, fuzzy_match=False, match_threshold=0.8):
        return textacy.similarity.jaccard(obj1, obj2, fuzzy_match=fuzzy_match, match_threshold=match_threshold)

    def hamming(self, str1, str2):
        textacy.similarity.hamming(str1, str2)

    """
    Returns the sentiment with maximum score
    
    pos, neg or neu
    
    """

    def sentiment(self):
        self._return["score"].pop("compound", None)
        return max(self._return["score"].items(), key=operator.itemgetter(1))[0]
Beispiel #2
0
def handler(context, event):
    body = event.body.decode('utf-8')
    context.logger.debug_with('Analyzing ', 'sentence', body)

    analyzer = SentimentIntensityAnalyzer()

    score = analyzer.polarity_scores(body)

    return str(score)
def onTrigger(context, session):
  flow_file = session.get()
  if flow_file is not None:
    sentiment = VaderSentiment()
    session.read(flow_file,sentiment)
    analyzer = SentimentIntensityAnalyzer()
    vs = analyzer.polarity_scores(sentiment.content)
    flow_file.addAttribute("positive",str(vs['pos']))
    flow_file.addAttribute("negative",str(vs['neg']))
    flow_file.addAttribute("neutral",str(vs['neu']))
    session.transfer(flow_file, REL_SUCCESS)
def fetch_tweets(api, name):
    """
    Given a tweepy API object and the screen name of the Twitter user,
    create a list of tweets where each tweet is a dictionary with the
    following keys:

       id: tweet ID
       created: tweet creation date
       retweeted: number of retweets
       text: text of the tweet
       hashtags: list of hashtags mentioned in the tweet
       urls: list of URLs mentioned in the tweet
       mentions: list of screen names mentioned in the tweet
       score: the "compound" polarity score from vader's polarity_scores()

    Return a dictionary containing keys-value pairs:

       user: user's screen name
       count: number of tweets
       tweets: list of tweets, each tweet is a dictionary

    For efficiency, create a single Vader SentimentIntensityAnalyzer()
    per call to this function, not per tweet.
    """

    ret_user_info = dict()
    tweets = []
    user = api.get_user(name)
    ret_user_info['user'] = user.screen_name
    ret_user_info['count'] = user.statuses_count
    analyzer = SentimentIntensityAnalyzer()
    raw_tweets = api.user_timeline(screen_name = name,count=100)
    for raw_tweet in raw_tweets:
        tweet = dict()
        tweet['id'] = raw_tweet.id_str
        tweet['created'] = raw_tweet.created_at.date()
        tweet['retweeted'] = raw_tweet.retweet_count
        tweet['text'] = raw_tweet.text
        tweet['hashtags'] = [ raw_tweet.entities[u'hashtags'][i][u'text'] for i in range(len(raw_tweet.entities[u'hashtags'])) ]
        tweet['urls']     = [ raw_tweet.entities[u'urls'][i][u'expanded_url'] for i in range(len(raw_tweet.entities[u'urls'])) ]
        tweet['mentions'] = [ raw_tweet.entities[u'user_mentions'][i][u'screen_name'] for i in range(len(raw_tweet.entities[u'user_mentions'])) ]
        tweet['score']    = analyzer.polarity_scores(raw_tweet.text)['compound']
        tweets.append(tweet)
    ret_user_info['tweets'] = tweets

    return ret_user_info
Beispiel #5
0
worst = 0
worstLine = ""
diffs = []
mean = 0
std = 0
analyzer = SentimentIntensityAnalyzer()

with open('../../../data/train.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    for row in csv_reader:
        if line_count == 0:
            print(f'Column names are {", ".join(row)}')
            line_count += 1
        trainScores[row["comment_text"]] = float(row["target"])
        #print(f'{row["id"]}, {row["target"]} => {row["comment_text"]}')
        vs = analyzer.polarity_scores(row["comment_text"])
        vaderScores[row["comment_text"]] = float(vs["neg"])
        diffs.append((trainScores[row["comment_text"]] -
                      vaderScores[row["comment_text"]])**2)
        mean += diffs[line_count - 1]
        if (diffs[line_count - 1] > worst):
            worst = diffs[line_count - 1]
            worstLine = f'{row["comment_text"]}\n' + str(
                vaderScores[row["comment_text"]]) + ',' + str(
                    trainScores[row["comment_text"]]) + ',' + str(worst)
        line_count += 1
        if (line_count > 30000):
            break
    print(f'Processed {line_count} lines.')

print(worstLine)

# In[30]:


senti_analyzer = SentimentIntensityAnalyzer()


# In[229]:


compound_score = []

for sen in dfs['Tweets']:
    
    compound_score.append(senti_analyzer.polarity_scores(sen)['compound'])


# In[230]:


dfs['Compound Score'] = compound_score


# In[231]:


Sentiment = []

for i in compound_score:
    
Beispiel #7
0
def sentiment(s):
	from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
	analyzer = SentimentIntensityAnalyzer()
	vs = analyzer.polarity_scores(s)
	return vs
Beispiel #8
0
def get_sentiment(text):
    analyser = SentimentIntensityAnalyzer()
    return list(analyser.polarity_scores(text).values())[-1]
Beispiel #9
0
def dictionary(sentence):
    #instantiate sentinment object
    sent_obj = SentimentIntensityAnalyzer()

    # dict has 'neg', 'pos', 'neu', and 'compound'
    return sent_obj.polarity_scores(sentence)
Beispiel #10
0
def vadersent(text):
    analyzer = SentimentIntensityAnalyzer()
    vs = analyzer.polarity_scores(text)
    return vs['neg'], vs['neu'], vs['pos']
Beispiel #11
0
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd

analyser = SentimentIntensityAnalyzer()
df = pd.read_csv('data_sentiment.csv', encoding='utf-8')
print(df.head())

# vader 결과
vaderresult = []

# 상품리뷰 점수와 같은 경우
truecount = 0

for i in range(df.id.count()):
    # compound결과가 0.05보다 크면 긍정, -0.05보다 작으면 부정
    if analyser.polarity_scores(df.textcontent[i]).get("compound") > 0.05:
        vaderresult.append(1)
    elif analyser.polarity_scores(df.textcontent[i]).get("compound") < -0.05:
        vaderresult.append(0)
    else:
        vaderresult.append(3)

    print(df.textcontent[i], df.reviewrating[i], vaderresult[i])

    #자세히 보고 싶다면..
    #print(df.textcontent[i],analyser.polarity_scores(df.textcontent[i]))

for i in range(df.id.count()):
    if vaderresult[i] == df.reviewrating[i]:
        truecount = truecount + 1
def analyse(sentence):
	analyser = SentimentIntensityAnalyzer()
	score = analyser.polarity_scores(sentence)
	return score['compound']   #-0.05 to 0.05 is neutral
Beispiel #13
0
def sentiment_analysis(content):
    senti_analyzer=SentimentIntensityAnalyzer()
    return senti_analyzer.polarity_scores(content)
Beispiel #14
0
def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    print("{:-<40} {}".format(sentence, str(score)))


sentiment_analyzer_scores("The phone is super cool.")

# --- examples -------
sentences = [
    "VADER is smart, handsome, and funny.",  # positive sentence example
    "VADER is smart, handsome, and funny!",  # punctuation emphasis handled correctly (sentiment intensity adjusted)
    "VADER is very smart, handsome, and funny.",  # booster words handled correctly (sentiment intensity adjusted)
    "VADER is VERY SMART, handsome, and FUNNY.",  # emphasis for ALLCAPS handled
    "VADER is VERY SMART, handsome, and FUNNY!!!",  # combination of signals - VADER appropriately adjusts intensity
    "VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!",  # booster words & punctuation make this close to ceiling for score
    "VADER is not smart, handsome, nor funny.",  # negation sentence example
    "The book was good.",  # positive sentence
    "At least it isn't a horrible book.",  # negated negative sentence with contraction
    "The book was only kind of good.",  # qualified positive sentence is handled correctly (intensity adjusted)
    "The plot was good, but the characters are uncompelling and the dialog is not great.",  # mixed negation sentence
    "Today SUX!",  # negative slang with capitalization emphasis
    "Today only kinda sux! But I'll get by, lol",  # mixed sentiment example with slang and constrastive conjunction "but"
    "Make sure you :) or :D today!",  # emoticons handled
    "Catch utf-8 emoji such as such as 💘 and 💋 and �",  # emojis handled
    "Not bad at all"  # Capitalized negation
]

for sentence in sentences:
    vs = analyser.polarity_scores(sentence)
    print("{:-<65} {}".format(sentence, str(vs)))
Beispiel #15
0
def sentiment_analysis(text):
    """Determine if sentiment is positive, negative, or neutral
    algorithm to figure out if sentiment is positive, negative or neutral
    uses sentiment polarity from TextBlob, VADER Sentiment and
    sentiment from text-processing URL
    could be made better :)
    """

    # pass text into sentiment url
    sentiment_url = get_sentiment_from_url(text, sentimentURL)

    # pass text into TextBlob
    text_tb = TextBlob(text)

    # pass text into VADER Sentiment
    analyzer = SentimentIntensityAnalyzer()
    text_vs = analyzer.polarity_scores(text)

    if sentiment_url is None:
        if text_tb.sentiment.polarity <= 0 and text_vs['compound'] <= -0.5:
            sentiment = "negative"  # very negative
        elif text_tb.sentiment.polarity <= 0 and text_vs['compound'] <= -0.1:
            sentiment = "negative"  # somewhat negative
        elif text_tb.sentiment.polarity == 0 and text_vs[
                'compound'] > -0.1 and text_vs['compound'] < 0.1:
            sentiment = "neutral"
        elif text_tb.sentiment.polarity >= 0 and text_vs['compound'] >= 0.1:
            sentiment = "positive"  # somewhat positive
        elif text_tb.sentiment.polarity > 0 and text_vs['compound'] >= 0.1:
            sentiment = "positive"  # very positive
        else:
            sentiment = "neutral"
    else:
        if text_tb.sentiment.polarity < 0 and text_vs[
                'compound'] <= -0.1 and sentiment_url == "negative":
            sentiment = "negative"  # very negative
        elif text_tb.sentiment.polarity <= 0 and text_vs[
                'compound'] < 0 and sentiment_url == "neutral":
            sentiment = "negative"  # somewhat negative
        elif text_tb.sentiment.polarity >= 0 and text_vs[
                'compound'] > 0 and sentiment_url == "neutral":
            sentiment = "positive"  # somewhat positive
        elif text_tb.sentiment.polarity > 0 and text_vs[
                'compound'] >= 0.1 and sentiment_url == "positive":
            sentiment = "positive"  # very positive
        else:
            sentiment = "neutral"

    # calculate average polarity from TextBlob and VADER
    polarity = (text_tb.sentiment.polarity + text_vs['compound']) / 2
    # output sentiment polarity
    print("Sentiment Polarity: " + str(polarity))

    # output sentiment subjectivity (TextBlob)
    print("Sentiment Subjectivity: " + str(text_tb.sentiment.subjectivity))

    # output sentiment
    print("Sentiment (url): " + str(sentiment_url))
    print("Sentiment (algorithm): " + str(sentiment))

    return polarity, text_tb.sentiment.subjectivity, sentiment
Beispiel #16
0
        except:
            print("ERROR: WORD ELONGATION CLEANER FAILED")

        # grab subreddit team
        comment_subr = comment.subreddit.display_name
        if comment_subr == "nfl":
            comment_team = comment.author_flair_text
            if (comment_team not in team_names): continue
        else:
            comment_team = teams_dict[comment_subr]
            comment_subr = "team"

        # get sentiment
        try:
            #comment_sent = getSentiment(comment.body)
            comment_sent = vader.polarity_scores(
                comment_text_clean)['compound']
        except:
            print("ERROR: SENTIMENT ANALYSIS FAILED")

        # sentiment heuristics
        if comment_text_clean in [
                "let's go", "lets go", "f**k yes", "f**k yeah"
        ]:
            comment_sent = .7
        elif comment_text_clean in ["boo"]:
            comment_sent = -1

        # get goodell boo-meter
        if "boo" in comment_text_clean.split(
                " ") or comment_text_clean == "boo":
            comment_boo = 1
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# create sentiment analyzer object
analyzer = SentimentIntensityAnalyzer()

score = analyzer.polarity_scores('programming is fun :)')
print(score)
import pandas as pd
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


continuing_file_path =os.getcwd()+ '\input.csv'

d = pd.read_csv(continuing_file_path, #loading that dataset
                encoding = "ISO-8859-1",
                header=None
               )


analyzer = SentimentIntensityAnalyzer()
#for sentence in sentences:
#    vs = analyzer.polarity_scores(sentence)
#    print("{:-<65} {}".format(sentence, str(vs)))

df = pd.DataFrame(columns=['neg', 'neu', 'pos','compound', 'text'])
print(df)

for index, row in d.iterrows():
    vs = analyzer.polarity_scores(row[0])
    vs['text'] = row[0]
    df = df.append(vs, ignore_index=True)

df.to_csv("output.csv",index=None, header=True,encoding="utf-8")
posts_without_punct = [re.sub(r'[^a-z\s]','',posts) for posts in mbti_clean['posts']]
words = [posts.split() for posts in posts_without_punct]
stemmer = PorterStemmer()
for row in range(len(words)):
    words[row] = " ".join([stemmer.stem(word) for word in words[row] if word not in list(stop_words.ENGLISH_STOP_WORDS) and len(word) >= 3])
vectorizer = CountVectorizer(min_df=25)
word_count = vectorizer.fit_transform(words)
word_count_df = pd.DataFrame(data = word_count.toarray(), columns = vectorizer.get_feature_names())
word_count_df.insert(loc=0, column='true_type', value=mbti['type'])
word_count_df.head()

analyzer = SentimentIntensityAnalyzer()

total_compound_score = []
for i in range(len(mbti)):
    score = pd.Series([analyzer.polarity_scores(post)['compound'] for post in mbti['split_posts'].iloc[i]]).mean()
    total_compound_score.append(score)
    
mbti_features['compound_score'] = total_compound_score
total_pos_score = []
for i in range(len(posts_without_punct)):
    score = pd.Series([analyzer.polarity_scores(post)['pos'] for post in mbti['split_posts'].iloc[i]]).mean()
    total_pos_score.append(score)
mbti_features['pos_score'] = total_pos_score
total_neg_score = []
for i in range(len(posts_without_punct)):
    score = pd.Series([analyzer.polarity_scores(post)['neg'] for post in mbti['split_posts'].iloc[i]]).mean()
    total_neg_score.append(score)
mbti_features['neg_score'] = total_neg_score
total_neu_score = []
for i in range(len(posts_without_punct)):
Beispiel #20
0
    new_row['basepk2'] = row['basepk2']
    sentence2 = " ".join(row['speechtext2'].split())
    new_row['speechtext2'] = sentence2
    new_row['stance'] = row['stance']

    argument1 = np.array([row['speechtext1']])
    argument2 = np.array([row['speechtext2']])

    X1_test = pd.Series(argument1)
    X2_test = pd.Series(argument2)

    sentiment_arr_test = np.array([])
    cosine_similarity_test = np.array([])

    for i, v in X1_test.iteritems():
        snt = analyser.polarity_scores(X1_test[i])
        comp = snt['compound']
        cosine_distance = cosine_distance_wordembedding_method(
            X1_test[i], X2_test[i])
        sentiment_arr_test = np.concatenate((sentiment_arr_test, [comp]))
        cosine_similarity_test = np.concatenate(
            (cosine_similarity_test, [cosine_distance]))

    sentiment_arr_test = sentiment_arr_test.reshape(
        sentiment_arr_test.shape[0], -1)
    sentiment_arr_test = sparse.csr_matrix(
        sparse.csr_matrix(sentiment_arr_test))

    cosine_similarity_test = cosine_similarity_test.reshape(
        cosine_similarity_test.shape[0], -1)
    cosine_similarity_test = sparse.csr_matrix(
def one_day_window_model():
    files = glob.glob('../DATA/A*/A*/*.json')
    sentimentAnalyzer = SentimentIntensityAnalyzer()

    with open('tweet_sentiment.csv', 'w+') as sfl:
        for file in files:
            with open(file) as fl:
                lines = fl.readlines()
                tweets = json.loads(lines[0])
                for tweet in tweets:
                    date = time.strftime('%Y/%m/%d',
                                         time.localtime(int(tweet['time'])))
                    scores = sentimentAnalyzer.polarity_scores(tweet['text'])
                    sfl.write(date + ',' + str(scores['pos']) + ',' +
                              str(scores['neg']) + ',' + str(scores['neu']) +
                              ',' + str(scores['compound']))
                    sfl.write('\n')

    prices = pd.read_csv('../DATA/CHARTS/APPLE1440.csv').values
    all_tweets = pd.read_csv('tweet_sentiment.csv').values

    with open('features.csv', 'w+') as fl:
        for price in prices:
            current_date = datetime.strptime(price[0], '%Y.%m.%d').date()
            previous_date = current_date - timedelta(days=1)
            tweets = all_tweets[all_tweets[:, 0] == previous_date.strftime(
                '%Y/%m/%d')]

            if len(tweets) != 0:
                if float(price[5]) > float(price[2]):
                    label = "1"
                else:
                    label = "0"

                for tweet in tweets:
                    fl.write(price[0] + ',' + str(tweet[1]) + ',' +
                             str(tweet[2]) + ',' + str(tweet[3]) + ',' +
                             str(tweet[4]) + ',' + label)
                    fl.write('\n')

    dataset = pd.read_csv('features.csv')
    X = dataset.iloc[:, [1, 2, 3, 4]].values
    y = dataset.iloc[:, 5].values

    scaler = StandardScaler()
    X[:, :] = scaler.fit_transform(X[:, :])

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    rf = RandomForestClassifier(n_estimators=500,
                                criterion='entropy',
                                max_depth=3)
    rf.fit(X_train, y_train)
    y_pred = rf.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    print("Random Forest Accuracy: " +
          str((cm[0, 0] + cm[1, 1]) /
              (cm[0, 0] + cm[1, 1] + cm[1, 0] + cm[0, 1])))

    svc = SVC(kernel='poly', random_state=0)
    svc.fit(X_train, y_train)
    y_pred = svc.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    print("SVM Accuracy: " + str((cm[0, 0] + cm[1, 1]) /
                                 (cm[0, 0] + cm[1, 1] + cm[1, 0] + cm[0, 1])))

    mlp = MLPClassifier(hidden_layer_sizes=(100, 100, 100, 100),
                        random_state=10)
    mlp.fit(X_train, y_train)
    y_pred = mlp.predict(X_test)
    cm = confusion_matrix(y_test, y_pred)
    print("MLP Accuracy: " + str((cm[0, 0] + cm[1, 1]) /
                                 (cm[0, 0] + cm[1, 1] + cm[1, 0] + cm[0, 1])))
Beispiel #22
0
## Streaming comments from reddit
comment_count = 0
while True:
    try:
        # list of subreddits to be tracked -- you can add the ones you think are important to track
        subreddit = reddit.subreddit(
            "wallstreetbets+investing+stocks+pennystocks+weedstocks+StockMarket+Trading+Daytrading+algotrading"
        )
        for comment in subreddit.stream.comments(skip_existing=True):
            current_time = datetime.datetime.now()
            subreddit = str(comment.subreddit)
            author = str(comment.author)
            title = str(comment.link_title)
            body = str(comment.body)
            if len(body) < 2000:
                body = body
            elif len(body) > 2000:
                body = "data is too large"  ## very rare situation - less than 0.1% of the cases have comment more than 2000 characters
            vs = analyzer.polarity_scores(unidecode(body))
            sentiment = vs['compound']
            db = (current_time, subreddit, title, body, author, sentiment)
            mycursor.execute(sqlFormula, db)
            mydb.commit()
            comment_count += 1
            print(comment_count)
    # Keep an exception so that in case of error you dont hit the api multiple times and also your code wont crash on the vm
    except Exception as e:
        print('Exception')
        print(str(e))
        time.sleep(10)
Beispiel #23
0
    # print("Distinuguished:", submission.distinguished)
    # print("URL:", submission.url)
    # sub_info = [submission.id, submission.title, submission.author, submission.score, submission.num_comments,
    #  submission.upvote_ratio, submission.stickied, submission.link_flair_text, submission.distinguished, submission.permalink, submission.url]
    # data.append(sub_info)
    submission.comments.replace_more(limit=None)
    for comment in submission.comments.list():
        # This for loop will go through all the coments in the comment tree
        # print("Comment Author:", comment.author)
        # print("Body:", comment.body)
        # print("Replies:", len(comment.replies))
        # print("Score:", comment.score)
        # print("Link ID:", comment.link_id)
        # print("Parent ID:", comment.parent_id)
        # print("Permalink:", comment.permalink)
        score = analyzer.polarity_scores(comment.body)
        comment_info = [
            submission.title, comment.author, comment.body,
            len(comment.replies), comment.score, comment.id, comment.link_id,
            comment.parent_id, comment.permalink, score['neg'], score['neu'],
            score['pos'], score['compound']
        ]
        comment_data.append(comment_info)
        # print(analyzer.polarity_scores(comment.body))

# df = pd.DataFrame(data, columns=columns)
# df.to_csv('Ayahuasca_hot.csv')
# df = pd.DataFrame(comment_data, columns=comment_columns)
# df.to_csv('Ayahuasca_hot_100_comments.csv')
df = pd.DataFrame(comment_data, columns=comment_columns)
df.to_csv('Ayahuasca_comments_sentiment.csv')
def motion_classifier(text):
    from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(text)
    return score['compound']
Beispiel #25
0
tokenized, _, _ = st.tokenize_sentences(TEST_SENTENCES)

model = deepmoji_emojis(maxlen, PRETRAINED_PATH)

prob = model.predict(tokenized)

for i, t in enumerate(TEST_SENTENCES):
    t_tokens = tokenized[i]
    t_score = [t]
    t_prob = prob[i]
    ind_top = top_elements(t_prob, 3)
    ind_top = [x if t_prob[x] >= 0.1 else -1 for x in ind_top]
    output[i].extend(ind_top)
    output[i].append(sum(x > -1 for x in ind_top))

    snt = analyzer.polarity_scores(output[i][1])
    output[i].append(snt['compound'])
    out_int = convert_vals(output[i][2:])
    output[i].append(out_int)

    # try:
    #     print("=============")
    #     print(output[i])
    #     decode_int(output[i][-1])
    #     print('http://bubbleup-api.herokuapp.com/posts/' + str(output[i][0]))
    #     print("=============")
    # except Exception as e:
    #     print("Exception at row {}!".format(i))
    #     print(str(e))
    # sys.stdout.flush()
Beispiel #26
0
import pickle

data = pd.read_sql_query(
    "SELECT id_str,created_at,text FROM new_target_tweets;", conn)
conn.close()
print("data retrieved")

print("dataset has: " + str(len(data)) + " rows")

sentences = data['text']
analyzer = SentimentIntensityAnalyzer()
vs = list()
count = 0
for sentence in sentences:
    count += 1
    vs.append(analyzer.polarity_scores(sentence)['compound'])
    if count % 100000 == 0:
        print(str(round(count / len(sentences) * 100, 2)) + "% completed")
print('Vader Scoring Complete')


def become_a_pickle(data, file_name):
    '''Stores data in a pickle file'''
    with open(file_name, 'wb') as file:
        pickle.dump(data, file, protocol=pickle.HIGHEST_PROTOCOL)
    print('Python', type(data), 'object has been pickled')


def serve_pickle(file_name):
    '''Retrieves data stored in a pickle file
    returns python object originally saved in pickle file'''
Beispiel #27
0
def sentiment_analyzer_scores_vader(sentence):
    analyser = SentimentIntensityAnalyzer()
    score = analyser.polarity_scores(sentence)
    return score
Beispiel #28
0
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

#initialize the sentiment analyzer
analyzer = SentimentIntensityAnalyzer()

score = analyzer.polarity_scores('The movie is great')

#this will print the normalized (-1 to 1) polarity scores for each of pos, neg, neu and combined score
print(score)
def tw_sent(text):
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(text)
    #print(type(ss))
#    print(ss)
    return(ss)
Beispiel #30
0
class TextAnalysis(AbstractUtil):
    def __init__(self):
        self.analyzer = SentimentIntensityAnalyzer()
        # self.test()

    def analyse(self,  text):
        # The compound score is computed by summing the valence scores of each word in the lexicon, adjusted according
        # to the rules, and then normalized to be between -1 (most extreme negative) and +1 (most extreme positive).
        # https://github.com/cjhutto/vaderSentiment
        return self.analyzer.polarity_scores(text)["compound"]

    # returns the article object and the analysis result
    def analyse_web_page_article(self, url):
        article = Article(url)
        article.download()
        article.parse()
        return article, self.analyse(article.text)

    # return a list of high influential value websites
    @staticmethod
    def get_high_value_websites():
        return [
            "https://www.youtube.com"
                ]

    @staticmethod
    def is_analysable_url(url):
        url_ending = str(url).split(".")[-1]
        return url_ending.lower() not in IMAGE_ENDINGS

    # official account tweets that can be used for testing purposes
    def test(self):
        texts = [
            "So excited at what I am working on for the future.  I don’t get to talk about what I am actively doing on a daily basis because it’s far ahead of our messaging but I am beyond excited about it! #substratum $sub",
            "Have you read about VeChain and INPI ASIA's integration to bring nanotechnology for digital identity to the VeChainThor blockchain? NDCodes resist high temperature, last over 100 years, are incredibly durable and invisible to the naked eye",
            "Crypto market update: BTC holds near $9K, ETH rising over $640, BCH grows 85% on the week",
            "Extremely excited & proud to announce that #Substratum Node is NOW Open Source! https://github.com/SubstratumNetwork/SubstratumNode …#NetNeutrality $SUB #cryptocurrency #bitcoin #blockchain #technology #SubSavesTheInternet",
            "A scientific hypothesis about how cats, infected with toxoplasmosis, are making humans buy Bitcoin was presented at last night's BAHFest at MIT.",
            "Net Neutrality Ends! Substratum Update 4.23.18",
            "One more test from @SubstratumNet for today. :)",
            "Goldman Sachs hires crypto trader as head of digital assets markets",
            "Big news coming! Scheduled to be 27th/28th April... Have a guess...😎",
            "A great step to safer #exchanges: @WandXDapp Joins REMME’s 2018 Pilot Program for testing functionality of certificate-based signup and login for end users. https://medium.com/remme/wandx-joins-remmes-2018-pilot-program-588379aaea4d … #nomorepasswords #blockchain #crypto $REM"
            "omeone transferred $99 million in litecoin — and it only cost them $0.40 in fees. My bank charges me a hell of a lot more to transfer a hell of a lot less. Can we hurry up with this crypto/blockchain revolution I'm tired of paying fees out of my ass to a bunch of fat cats",
            "This week's Theta Surge on http://SLIVER.tv  isn't just for virtual items... five PlayStation 4s will be given out to viewers that use Theta Tokens to reward the featured #Fortnite streamer! Tune in this Friday at 1pm PST to win!",
            "The European Parliament has voted for regulations to prevent the use of cryptocurrencies in money laundering and terrorism financing. As long as they have good intention i don' t care.. but how much can we trust them??!?!"
            "By partnering with INPI ASIA, the VeChainThor Platform incorporates nanotechnology with digital identification to provide solutions to some of the worlds most complex IoT problems.",
            "Thanks to the China Academy of Information and Communication Technology, IPRdaily and Nashwork for organizing the event.",
            "Delivered a two hour open course last week in Beijing. You can tell the awareness of blockchain is drastically increasing by the questions asked by the audience. But people need hand holding and business friendly features to adopt the tech.",
            "Introducing the first Oracle Enabler tool of the VeChainThor Platform: Multi-Party Payment Protocol (MPP).",
            "An open letter from Sunny Lu (CEO) on VeChainThor Platform.",
            "VeChain has finished the production of digital intellectual property services with partner iTaotaoke. This solution provides a competitive advantage for an industry in need of trust-free reporting and content protections.#GoVeChain",
            "Special thanks to @GaboritMickael to have invited @vechainofficial to present our solution and make a little demo to @AccentureFrance",
            "VeChain’s COO, @kfeng027, is invited to ‘Crypto Media Collection Vo.1’ held at DeNA’s campus by Coinjinja in Tokyo, one of the largest cryptocurrency information platforms. Kevin’s speech begins at 16:35 UTC+9, livestreamed via https://ssl.twitcasting.tv/coinjinja ",
            "VeChain will pitch their solutions potentially landing a co-development product with LVMH.  In attendance will be CEOs Bill McDermott (SAP), Chuck Robbins (CISCO), Ginni Rometty (IBM), and Stephane Richard (Orange) as speakers -",
            "As the only blockchain company selected, VeChain is among 30 of 800+ hand-picked startups to compete for the second edition of the LVMH Innovation Award. As a result, VeChain has been invited to join the Luxury Lab LVMH at Viva Technology in Paris from May 24-26, 2018.",
            "VeChain to further its partnership with RFID leader Xiamen Innov and newly announced top enterprise solution provider CoreLink by deploying a VeChainThor enterprise level decentralized application - AssetLink.",
            "Today, a group of senior leaders from TCL's Eagle Talent program visited the VeChain SH office. @VeChain_GU demonstrated our advanced enterprise solutions and it's relation to TCL's market. As a result, we're exploring new developments within TCL related to blockchain technology.",
            "VeChain announces a partnership with eGrid, a leading publicly listed ERP, SCM and CRM solution provider to synergistically provide comprehensive blockchain technology backing for a significant portion of China’s automobile industry.",
            "We are glad to be recognized as Top 10 blockchain technology solution providers in 2018. outprovides a platform for CIOs and decision makers to share their experiences, wisdom and advice. Read the full version article via",
            "Talked about TOTO at the blockchain seminar in R University of Science and Technology business school last Saturday. It covered 3000 MBA students across business schools in China."
        ]
        for text in texts:
            print(str(self.analyse(text)) + " => "+str(DecoderEncoder.encode_into_bytes(text)))
    def searchid_db(self):
        self.input = self.idpat.get()
        # execute sql

        sql = "SELECT * FROM appointments WHERE id LIKE ?"
        self.res = c.execute(sql, (self.input,))
        for self.row in self.res:
            self.id1 = self.row[0]
            self.name1 = self.row[1]
            self.time = self.row[5]
            self.idpatient = self.idpat.get()
            self.uname = Label(self.master, text="PATIENT'S NAME", font=('arial 18 bold'))
            self.uname.place(x=0, y=260)
            self.utime = Label(self.master, text="ANALYSIS TYPE", font=('arial 18 bold'))
            self.utime.place(x=0, y=300)

            # entries for each labels==========================================================

            self.ent1 = Entry(self.master, width=30)
            self.ent1.place(x=350, y=260)
            self.ent1.insert(END, str(self.name1))

            self.ent5 = Entry(self.master, width=30)
            self.ent5.place(x=350, y=300)
            self.ent5.insert(END, str(self.time))


        if(self.time=="audio" or self.time == "AUDIO"):
            tkinter.messagebox.showinfo("Alert", "PlEASE ENTER YOUR AUDIO REVIEW")
            root.filename = filedialog.askopenfilename(initialdir="/", title="Select file",
                                                       filetypes=(("Audio files", "*.wav"), ("all files", "*.*")))
            print(root.filename)
            self.uraudio= Label(self.master, text="ENTER AUDIO LOCATION", font=('arial 18 bold'))
            self.uraudio.place(x=0, y=350)
            self.uraudio1 = Entry(self.master, width=40)
            self.uraudio1.place(x=350, y=350)
            self.uraudio1.insert(END, str(root.filename))
            self.audiofile = self.uraudio1.get()
            self.textfile="NULL"
            if self.audiofile == '':
                tkinter.messagebox.showinfo("Warning", "Please Fill Up All Boxes")
            else:
                self.add = Button(self.master, text="ADD", width=12, height=1, bg='steelblue', command=self.adddb1)
                self.add.place(x=400, y=402)
            audio_file = (self.audiofile)
            r = sr.Recognizer()
            with sr.AudioFile(audio_file) as source:
                audio = r.record(source)
                try:
                    print("audio file contain " + r.recognize_google(audio))
                    autxt = r.recognize_google(audio)
                except sr.UnknownValueError:
                    print("Google speech recognizer cant understand your audio")
                except sr.RequestError:
                    print("Couldnt get the result from Google Speach Recognition")

            from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
            # load the SentimentIntensityAnalyser
            analyser = SentimentIntensityAnalyzer()
            sentence = autxt
            # find the polarity scores, i.e. sentiment score in form of positive, negative, neutral and compound scores
            scores = analyser.polarity_scores(sentence)
            print(scores)
            self.positive=scores['pos']
            self.negative = scores['neg']
            self.compound= scores['compound']
            self.box =Label(self.right,text="POSITIVE RATING",font=('arilal 14 bold'))
            self.box.place(x=20, y=60)
            self.box1 =Entry(self.right, width=35)
            self.box1.place(x=20, y=100)
            self.box1.insert(END,str(scores['pos']))
            self.boxa =Label(self.right,text="NEGATIVE RATING",font=('arilal 14 bold'))
            self.boxa.place(x=20, y=140)
            self.box12 =Entry(self.right, width=35)
            self.box12.place(x=20, y=180)
            self.box12.insert(END, str(scores['neg']))
            self.boxB=Label(self.right,text="OVER DESCRIPITION OF RATING",font=('arilal 14 bold'))
            self.boxB.place(x=20, y=220)
            self.box13 =Entry(self.right, width=35)
            self.box13.place(x=20, y=260)
            self.box14 = Entry(self.right, width=35)
            self.box14.place(x=20, y=300)
            self.box13.insert(END, str(scores['compound']))

            if (scores['pos'] > scores['neg']):
                if (scores['pos'] > 0.25):
                    a="STRONGLY POSITIVE"
                    print("STRONGLY POSITIVE")
                    self.box14.insert(END, str(a))

                else:
                    b="WEAKLY POSITIVE"
                    print("WEAKLY POSITIVE")
                    self.box14.insert(END, str(b))

            else:
                if (scores['neg'] > 0.25):
                    a="STRONGLY NEGATIVE"
                    print("STRONGLY NEGATIVE")
                    self.box14.insert(END, str(a))

                else:
                    b="WEAKLY NEGATIVE"
                    print("WEAKLY NEGATIVE")
                    self.box14.insert(END, str(b))
        else:
            tkinter.messagebox.showinfo("Alert", "PlEASE ENTER YOUR TEXT REVIEW")
            self.urtext= Label(self.master, text="ENTER TEXT REVIEW", font=('arial 18 bold'))
            self.urtext.place(x=0, y=350)
            self.urtext1 = Entry(self.master, width=40)
            self.urtext1.place(x=350, y=350)
            # ADD button
            self.add = Button(self.master, text="ADD", width=12, height=1, bg='steelblue', command=self.adddb)
            self.add.place(x=400, y=402)
Beispiel #32
0
class NewsRater(object):
    '''Rates News Articles via sentiment analysis.'''
    def __init__(self):
        '''Constructor'''

        self.logger = logging.getLogger()

        self.analyzer = SentimentIntensityAnalyzer()

        self.logger.info('StockNewsRater Loaded.')

    def rate_news(self, articles, stock):
        '''Rates a list of Stock Articles.
        
        Arguments:
            articles {list} -- The articles to rate.
            stock {Stock} -- The Stock the Articles relate to.
        '''

        self.logger.info('Rating News')

        for article in articles:

            # Don't rate articles you've already rated.
            if Article.get_or_none(url=article.url) is not None: continue

            if self.__publish_date_acceptable(article):

                self.logger.info('Scoring ' + article.url)

                # Rate the title and summary.
                title_score = self.__score_content(
                    article.title) if article.title is not None else 0

                summary_score = self.__score_content(
                    article.summary) if article.summary is not None else 0

                if title_score != 0 or summary_score != 0:

                    # Save all the ratings.
                    article.save()

                    if title_score != 0:
                        self.__save_content(stock, article, title_score,
                                            article.title)

                    if summary_score != 0:
                        self.__save_content(stock, article, summary_score,
                                            article.summary)

    def __publish_date_acceptable(self, article):
        '''Determines if the publish date for a given Article is within an acceptable range.
        
        Arguments:
            article {Article} -- The Article whose publish date to review.
        
        Returns:
            bool -- True if the Article's publish date is acceptable.
        '''

        publish_date_valid = False

        # Create a date object from the Article's publish date.
        pub_date = datetime.strptime(article.publish_date, '%Y-%m-%d').date()

        # TODO Determine the best range to use. 1 week may be too long.
        # Determine the number of days between today and the publish date.
        # The subtraction results in a datetime.timedelta object, which is why the .days property is called on the result of date.today() - pub_date.
        publish_date_valid = (date.today() - pub_date).days <= 7

        return publish_date_valid

    def __score_content(self, content_text):
        '''Scores the given text content.
        
        Arguments:
            content_text {str} -- The text content to score.
        
        Returns:
            float -- The score of the text content.
        '''

        avg_score = 0

        # Converts a chunk of text into individual sentences.
        sentences = tokenize.sent_tokenize(content_text)

        if len(sentences) > 0:

            total_score = 0

            # Score each sentence in the text individually.
            # The polarity_scores method returns a variety of values, 'compound' is the composite of them all.
            for sentence in sentences:
                total_score += self.analyzer.polarity_scores(
                    sentence)['compound']

            # Average all the sentence scores.
            avg_score = total_score / len(sentences)

        return avg_score

    def __save_content(self, stock, article, score, scored_content):
        '''Saves the scored content in the database.
        
        Arguments:
            stock {Stock} -- The Stock associated with the content.
            article {Article} -- The Article associated with the content.
            score {float} -- The content score.
            scored_content {str} -- The content that was scored.
        '''

        # Only saves the data if it hasn't already been saved.
        if ArticleScore.get_or_none(article=article,
                                    score=score,
                                    scored_content=scored_content) is None:
            ArticleScore.create(article=article,
                                score=score,
                                scored_content=scored_content).save()

        if StockArticle.get_or_none(stock_ticker=stock,
                                    article=article) is None:
            StockArticle.create(article=article, stock_ticker=stock).save()
all_tweets = []
for tweet in tweepy.Cursor(api.search, q=search_input, tweet_mode='extended', lang="en", result_type='recent').items(57):
  all_tweets.append(tweet.full_text)

tweets_clean = []
for tweet in all_tweets:
    tweet = p.clean(tweet)
    tweet = ' '.join(re.sub(':', ' ', tweet).split())
    tweets_clean.append(tweet)

positive_l = []
negative_l = []
neutral_l = []

for tweet in tweets_clean:
  if analyser.polarity_scores(tweet).get('compound') >= 0.05:
    positive_l.append(tweet)
  elif analyser.polarity_scores(tweet).get('compound') <= -0.05:
    negative_l.append(tweet)
  else:
    neutral_l.append(tweet)

positive = perc_response(positive_l)
negative = perc_response(negative_l)
neutral = perc_response(neutral_l)

# For the second hash word
all_tweets1 = []
for tweet in tweepy.Cursor(api.search, q=search_input1, tweet_mode='extended', lang="en", result_type='recent').items(57):
  all_tweets1.append(tweet.full_text)
Beispiel #34
0
                                                word = word.replace(char, "")
                                        # check if the word contains a number or is a stopword
                                        if not any(char.isdigit()
                                                   for char in word):
                                            if word not in stopwords:
                                                # stem words to avoid duplication by pluralization
                                                word = stem.stem(word)
                                                articleWords.append(word)
                                except ValueError:
                                    print("JsonDecodeError for file " +
                                          articleTitle)
                        for i in range(len(articleWords)):
                            if not stringArticle:
                                stringArticle = articleWords[i]
                            else:
                                stringArticle = stringArticle + " " + articleWords[
                                    i]
                        scores = analyzer.polarity_scores(stringArticle)
                        with open(
                                "C:/Users/caire/Desktop/OutputData/ClassifyArticlesContentandTitle/OutputWordSentiment/"
                                + s + ".txt",
                                'a',
                                encoding='utf-8') as newFile:
                            newFile.write(
                                str(scores.get("pos")) + ", " +
                                str(scores.get("neu")) + ", " +
                                str(scores.get("neg")) + ", " +
                                str(scores.get("compound")) + "\n")
                        stringArticle = ""
                        articleWords.clear()
Beispiel #35
0
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyser = SentimentIntensityAnalyzer()

print(analyser.polarity_scores("This sucks"))
Beispiel #36
0
# Target Search Term

target_names = ["@BBC","@CBS","@CNN","@FoxNews","@nytimes"]
sentiment_elements=["neg",'neu','pos','compound',"created_at",'text']
index2=pd.MultiIndex.from_product([[str(i) for i in range(100)], target_names],names=['#', 'Source'])

tweet_df=pd.DataFrame(columns=sentiment_elements,index=index2)

for name in target_names:
        public_tweets = api.user_timeline(screen_name=name,count=100,tweet_mode='extended')
        i_th=-1
        for tweet in public_tweets:
        # Run Vader Analysis on each tweet
            i_th+=1 
            result= analyzer.polarity_scores(tweet["full_text"])
            tweet_df.loc[(str(i_th),name),"compound"]=result["compound"]
            tweet_df.loc[(str(i_th),name),"pos"]=result["pos"]
            tweet_df.loc[(str(i_th),name),"neu"]=result["neu"]
            tweet_df.loc[(str(i_th),name),"neg"]=result["neg"]
            tweet_df.loc[(str(i_th),name),"created_at"]=datetime.strptime(tweet['created_at'], "%a %b %d %H:%M:%S %z %Y")
            tweet_df.loc[(str(i_th),name),"text"]=tweet["full_text"]
    
tweet_df['Tweets ago']=tweet_df.index.get_level_values(0)
tweet_df['Tweets ago']=pd.to_numeric(tweet_df['Tweets ago']);
tweet_df['compound']=pd.to_numeric(tweet_df['compound']);
tweet_df.head()
tweet_df=tweet_df.reset_index(level=[0,1])
tweet_df.head()

Beispiel #37
0
            time.sleep(config.sleep_idle_secs)
            continue

        #Run sentiment analysis on the batch
        logging.info("Found {0} unscored docs. Calculating sentiment scores with Vader...".format(len(hits)))
        updates = []
        for hit in hits:
            text, quoted_text = sentiment_helpers.get_tweet_text(hit)
            text = sentiment_helpers.clean_text_for_vader(text)
            action = {
                "_op_type": "update",
                "_id": hit.meta["id"],
                "doc": {
                    "sentiment": {
                        "vader": {
                            "primary": vader.polarity_scores(text)["compound"]
                        }
                    }
                }
            }
            if quoted_text is not None:
                quoted_text = sentiment_helpers.clean_text_for_vader(quoted_text)
                quoted_concat_text = "{0} {1}".format(quoted_text, text)
                action["doc"]["sentiment"]["vader"]["quoted"] = vader.polarity_scores(quoted_text)["compound"]
                action["doc"]["sentiment"]["vader"]["quoted_concat"] = vader.polarity_scores(quoted_concat_text)["compound"]

            updates.append(action)

        #Issue the bulk update request
        logging.info("Making bulk request to Elasticsearch with {0} update actions...".format(len(updates)))
        bulk(es, updates, index=config.elasticsearch_index_name, chunk_size=len(updates))