def handler(context, event): body = event.body.decode('utf-8') context.logger.debug_with('Analyzing ', 'sentence', body) analyzer = SentimentIntensityAnalyzer() score = analyzer.polarity_scores(body) return str(score)
def onTrigger(context, session): flow_file = session.get() if flow_file is not None: sentiment = VaderSentiment() session.read(flow_file,sentiment) analyzer = SentimentIntensityAnalyzer() vs = analyzer.polarity_scores(sentiment.content) flow_file.addAttribute("positive",str(vs['pos'])) flow_file.addAttribute("negative",str(vs['neg'])) flow_file.addAttribute("neutral",str(vs['neu'])) session.transfer(flow_file, REL_SUCCESS)
class Sentiment(object): def __init__(self): self._analyser = SentimentIntensityAnalyzer() self._return = {} def sentiment_analyzer_scores(self, sentence): score = self._analyser.polarity_scores(sentence) self._return["sentence"] = sentence self._return["score"] = score # print("{:-<40} {}\n".format(sentence, str(score))) return self._return def similarity(self, obj1, obj2, fuzzy_match=False, match_threshold=0.8): return textacy.similarity.jaccard(obj1, obj2, fuzzy_match=fuzzy_match, match_threshold=match_threshold) def hamming(self, str1, str2): textacy.similarity.hamming(str1, str2) """ Returns the sentiment with maximum score pos, neg or neu """ def sentiment(self): self._return["score"].pop("compound", None) return max(self._return["score"].items(), key=operator.itemgetter(1))[0]
def fetch_tweets(api, name): """ Given a tweepy API object and the screen name of the Twitter user, create a list of tweets where each tweet is a dictionary with the following keys: id: tweet ID created: tweet creation date retweeted: number of retweets text: text of the tweet hashtags: list of hashtags mentioned in the tweet urls: list of URLs mentioned in the tweet mentions: list of screen names mentioned in the tweet score: the "compound" polarity score from vader's polarity_scores() Return a dictionary containing keys-value pairs: user: user's screen name count: number of tweets tweets: list of tweets, each tweet is a dictionary For efficiency, create a single Vader SentimentIntensityAnalyzer() per call to this function, not per tweet. """ ret_user_info = dict() tweets = [] user = api.get_user(name) ret_user_info['user'] = user.screen_name ret_user_info['count'] = user.statuses_count analyzer = SentimentIntensityAnalyzer() raw_tweets = api.user_timeline(screen_name = name,count=100) for raw_tweet in raw_tweets: tweet = dict() tweet['id'] = raw_tweet.id_str tweet['created'] = raw_tweet.created_at.date() tweet['retweeted'] = raw_tweet.retweet_count tweet['text'] = raw_tweet.text tweet['hashtags'] = [ raw_tweet.entities[u'hashtags'][i][u'text'] for i in range(len(raw_tweet.entities[u'hashtags'])) ] tweet['urls'] = [ raw_tweet.entities[u'urls'][i][u'expanded_url'] for i in range(len(raw_tweet.entities[u'urls'])) ] tweet['mentions'] = [ raw_tweet.entities[u'user_mentions'][i][u'screen_name'] for i in range(len(raw_tweet.entities[u'user_mentions'])) ] tweet['score'] = analyzer.polarity_scores(raw_tweet.text)['compound'] tweets.append(tweet) ret_user_info['tweets'] = tweets return ret_user_info
def artistanalyzer_result(): # user_input = "jay z" user_input = request.form['user_input'] user_input = user_input.replace(' ', '-') user_input = user_input.replace(' ', '') ### start timer ### start = datetime.now() ### scraping song links ### print('--- scraping song links ---') source = requests.get( f'https://www.songlyrics.com/{user_input}-lyrics/').text soup = BeautifulSoup(source, 'lxml') songlist = soup.find('div', class_='listbox') tracklist = songlist.find('table', class_='tracklist').tbody song_links = [] artist_details = [] for song in tracklist.find_all('tr', itemprop="itemListElement"): if song.td.text in [str(x) for x in range(50 + 1)]: link = song.find('a')['href'] if link not in song_links: song_links.append(link) ### collecting song details ### print('--- scraping song details text ---') for val in song_links: song_title = val[27:-1].split('/', 1)[1] song_title = song_title[:-6].replace('-', ' ').capitalize() artist_name = user_input.replace('-', ' ').title() ### scraping song text ### songsource = requests.get(val).text soup2 = BeautifulSoup(songsource, 'lxml') block = soup2.find('div', id='songLyricsContainer') if block.find('p').text != False: text = block.find('p').text if 'feat.' not in text: permitted = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ " songtext = text.lower() songtext = ' '.join(word for word in songtext.split() if word[0] != '[') songtext = songtext.replace("\n", " ").strip() songtext = "".join(c for c in songtext if c in permitted) songtext = songtext.replace(" ", " ").capitalize() artist_details.append([artist_name, song_title, songtext]) ### create a data frame ### print('--- Data Frame ---') df = pd.DataFrame(artist_details, columns=['Artist Name', 'Song Title', 'Song Text']) ### analysing text ### # def sent_to_words(sentence): # yield(gensim.utils.simple_preprocess(str(sentence), deacc=True)) top_words = [] num_words = [] pol_sent = [] analyzer = SentimentIntensityAnalyzer() for val in df['Song Text']: res = len(val.split()) num_words.append(res) val = remove_stopwords(val.lower()) polar = analyzer.polarity_scores(val) es = list(polar.items()) pol = list(es[-1]) song_1 = ' '.join(word for word in val.split() if len(word) > 3) split_val = song_1.split() count = Counter(split_val) comm = count.most_common(5) top_words.append(comm) pol_sent.append(pol[1]) df['Num Words'] = num_words df['Top Words'] = top_words df['Pol Scores'] = pol_sent length_song = df['Num Words'].max() long_song = list(df.loc[df['Num Words'] == length_song, 'Song Title']) df = df[[ 'Artist Name', 'Song Title', 'Song Text', 'Num Words', 'Top Words', 'Pol Scores' ]] list_2 = list(df['Pol Scores']) pol_list = [] for val in list_2: if val > .50: val_ = ['Positive'] pol_list.append(val_) elif val < .50: val_ = ['Negative'] pol_list.append(val_) elif val == .50: val_ = ['Neutral'] pol_list.append(val_) df['+/-'] = pol_list df = df[df['Song Text'] != 'We do not have the lyrics for soon come yet'] table = HTML(df.to_html(classes='table table-striped')) pos_out = df['+/-'].mode() total_pol = pos_out[0][0] artist_name = df["Artist Name"][0] num_songs = len(df) num_words = df["Num Words"].sum() longest_song = long_song[0] longestword_count = df['Num Words'].max() dict_data = { 'Artist Name': artist_name, 'Number of Songs': num_songs, 'Number of Words': num_words, 'Longest Song': longest_song, 'Longest Song Word Count': longestword_count, "Artist's Overall Polarity": total_pol } ### data frame ### # print(df.shape) # print(df.head()) print(f'Artist Name: {artist_name}') print(f'Total number of songs: {num_songs}') print(f'Total Number of Words: {num_words}') print( f'Song with most words: {longest_song}, word count: {longestword_count}' ) print(f'Overall polarity: {total_pol}') print('-----') #### finish timer ### print('--- runtime ---') break1 = datetime.now() print("Elapsed time: {0}".format(break1 - start)) # show timer return render_template('artistanalyzer.html', tables=[table], data=dict_data.items())
def sentiment_analysis(content): senti_analyzer = SentimentIntensityAnalyzer() return senti_analyzer.polarity_scores(content)
import time data_source_url = "https://raw.githubusercontent.com/javaidnabi31/Word-Embeddding-Sentiment-Classification/master/movie_data.csv" df = pd.read_csv(data_source_url) #print(df.head(3)) sentimentValues = [] x_train = df.loc[:24999, 'review'].values y_train = df.loc[:24999, 'sentiment'].values x_test = df.loc[25000: 50000, 'review'].values y_test = df.loc[25000: 50000, 'sentiment'].values db = pd.DataFrame.from_dict(y_test) db.columns = ['yb'] analyzer = SentimentIntensityAnalyzer() start_time = time.time() for x in x_test: score = analyzer.polarity_scores(x) if score["compound"] < 0.0: result = 0 elif score['compound'] > 0.0: result = 1 sentimentValues.append(result) dt = pd.DataFrame.from_dict(sentimentValues) dt.columns = ['sentimentValues'] results = confusion_matrix(db['yb'], dt['sentimentValues']) print('Confusion Matrix') print(results)
def analyze(text): analyser = SentimentIntensityAnalyzer() score = analyser.polarity_scores(text) return score['compound']
def sentiment_scores_pos(sentence): # Create a SentimentIntensityAnalyzer object. sid_obj = SentimentIntensityAnalyzer() sentiment_dict = sid_obj.polarity_scores(sentence) print("Overall sentiment dictionary is : ", sentiment_dict) return (sentiment_dict['pos'] * 100)
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer # Adpeted from https://github.com/cjhutto/vaderSentiment import csv import numpy as np import pandas as pd inputFile = open('Clean_file.csv') fileObject = csv.reader(inputFile) sentimentIntensityAnalyzer = SentimentIntensityAnalyzer() dataFrame = pd.DataFrame(columns=['tweets', 'sentiments', 'scores']) scores = [] sentiments = [] tweets = [] for index, row in enumerate(fileObject): readTweets = sentimentIntensityAnalyzer.polarity_scores(row[3]) positive = readTweets['pos'] negative = readTweets['neg'] neutral = readTweets['neu'] npArray = np.array([positive, negative, neutral]) index = np.argmax(npArray) def switch_demo(index): swticher = {0: positive, 1: negative, 2: neutral} sentiments.append(index) scores.append(np.max(npArray)) tweets.append(row[3]) dataFrame['tweet'] = tweets dataFrame['sentiment'] = sentiments dataFrame['sentiment_score'] = scores dataFrame.to_csv('File_sentimental.csv', index=False)
def get_analyzer(lexicon=None): if lexicon is None: return SentimentIntensityAnalyzer() lexicon_file_contents = "\n".join(get_lexicon_file_lines(lexicon)) return CustomSentimentIntensityAnalyzer(lexicon_file_contents)
#!/usr/bin/env python3 import pandas as pnd dataoutput = pnd.read_csv("tweets.csv") SentiFinal = pnd.DataFrame() import nltk from nltk.tokenize import word_tokenize from nltk import pos_tag,pos_tag_sents tweets = dataoutput["text"].map(lambda x: x.lower()) dataoutput["tweetinput"]=tweets from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer output = SentimentIntensityAnalyzer() negative=[] positive=[] neutral=[] compound=[] sentiAnalysis=[] sentiScore = [] for tweet in tweets: sentiValues = output.polarity_scores(tweet) negative.append(sentiValues["neg"]) positive.append(sentiValues["pos"]) neutral.append(sentiValues["neu"]) compound.append(sentiValues["compound"]) if sentiValues["neg"]>sentiValues["pos"]: sentiAnalysis.append("Negative")
class TextAnalyzer: def __init__( self, # Model names: joeddav/xlm-roberta-large-xnli, facebook/bart-large-mnli model_name_or_path: str = None, initialize_model: bool = False, analyzer_config: AnalyzerConfig = None, ): self.classifier_model_name = model_name_or_path self.analyzer_config = analyzer_config or AnalyzerConfig(use_sentiment_model=False) if initialize_model is True or self.classifier_model_name is not None: from transformers import pipeline self.classifier_model = pipeline("zero-shot-classification", model=model_name_or_path) self.vader_sentiment_analyzer = None else: from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer self.vader_sentiment_analyzer = SentimentIntensityAnalyzer() self.sentiment_model = None def _init_classifier_model(self, classifier_model_name: str): if self.classifier_model is not None: raise AttributeError("Classifier already initialized") from transformers import pipeline self.classifier_model = pipeline("zero-shot-classification", model=classifier_model_name) def init_vader_sentiment_analyzer(self): if self.vader_sentiment_analyzer is not None: raise AttributeError("Classifier already initialized") from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer self.vader_sentiment_analyzer = SentimentIntensityAnalyzer() def _get_sentiment_score_from_vader(self, text: str) -> float: if self.vader_sentiment_analyzer is None: self.init_vader_sentiment_analyzer() scores = self.vader_sentiment_analyzer.polarity_scores(text) return scores["compound"] def _classify_text_from_model( self, text: str, labels: List[str], multi_class_classification: bool = True ) -> Dict[str, float]: if self.classifier_model is None: self._init_classifier_model(self.classifier_model_name) scores_data = self.classifier_model(text, labels, multi_class=multi_class_classification) score_dict = {label: score for label, score in zip(scores_data["labels"], scores_data["scores"])} return dict(sorted(score_dict.items(), key=lambda x: x[1], reverse=True)) def analyze_input( self, source_response_list: List[AnalyzerRequest], analyzer_config: AnalyzerConfig = None, **kwargs ) -> List[AnalyzerResponse]: analyzer_config = analyzer_config or self.analyzer_config analyzer_output: List[AnalyzerResponse] = [] labels = analyzer_config.labels or [] if "positive" not in labels: labels.append("positive") if "negative" not in labels: labels.append("negative") for source_response in source_response_list: classification_map = {} if not analyzer_config.use_sentiment_model: sentiment_value = self._get_sentiment_score_from_vader(source_response.processed_text) if sentiment_value < 0.0: classification_map["negative"] = -sentiment_value classification_map["positive"] = 1.0 - classification_map["negative"] else: classification_map["positive"] = sentiment_value classification_map["negative"] = 1.0 - classification_map["positive"] else: classification_map = self._classify_text_from_model( source_response.processed_text, labels, analyzer_config.multi_class_classification ) analyzer_output.append( AnalyzerResponse( processed_text=source_response.processed_text, meta=source_response.meta, classification=classification_map, source_name=source_response.source_name, ) ) return analyzer_output
def init_vader_sentiment_analyzer(self): if self.vader_sentiment_analyzer is not None: raise AttributeError("Classifier already initialized") from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer self.vader_sentiment_analyzer = SentimentIntensityAnalyzer()
def __init__(self): self.sentiment_analyzer = SentimentIntensityAnalyzer()
def vader_sentiment_raw(text): analyzer = SentimentIntensityAnalyzer() sent = analyzer.polarity_scores(text)['compound'] return str(sent)
import pandas as pd df = pd.read_csv('G:\python projects\FinallyMajorProject\Book1.csv') # meathod 1 with simple analysis import string from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer vs = SentimentIntensityAnalyzer() # convering in lower case df['review'] = df['review'].apply(lambda x: x.lower()) # removing punctuation def removerpunct(text): for punctuation in string.punctuation: text = text.replace(punctuation, '') return text df['review'] = df['review'].apply(lambda x: removerpunct(x)) # predicting values of vader df['compound_pred'] = df['review'].apply( lambda x: vs.polarity_scores(x)['compound']) # seprating negative and postive value with variable pos and neg df['pred'] = df['compound_pred'].apply(lambda x: 'positive' if x > 0 else 'negative')
def feature_encoder(dataobjects): """ Features included in the code are: 1. sentiment scores: pos, neg and neu easiness to read scales: 2. flesch reading, 3. dale_chall reading, 4. gunning_foc score, 5. smog_index and 6. text standard scores. all these scores are included in the entire feature set 7. perspective api scores (toxicity scores for the entire text) 8. politeness score 9. impolite-ness score 10. politeness strategies 11. POS tags :param dataobjects: reads the data objects (data frame) which incorporate the text :return: a feature encoded matrix of numeric entities for the entire data set """ nlp = spacy.load('en_core_web_sm') feature_dict = {} feature_set = {} cnt = 0 for line in dataobjects: if cnt == 0: cnt = 1 continue feature_dict[cnt] = {} text = line[2] #sentiment scores: scores with pos, neg and neutral scores: analyzer = SentimentIntensityAnalyzer() vs = analyzer.polarity_scores(text) feature_dict[cnt]['pos'] = vs['pos'] feature_dict[cnt]['neg'] = vs['neg'] feature_dict[cnt]['neu'] = vs['neu'] feature_set['pos'] = 1 feature_set['neg'] = 1 feature_set['neu'] = 1 #easiness to read scores: flesch reading: sc = textstat.flesch_reading_ease(text) feature_dict[cnt]['easiness'] = sc feature_set['easiness'] = 1 #easiness to read scores: dale chall reading: sc = textstat.dale_chall_readability_score(text) feature_dict[cnt]['easiness_dale'] = sc feature_set['easines_dale'] = 1 #easiness to read scores: gunning fog reading: sc = textstat.gunning_fog(text) feature_dict[cnt]['easiness_fog'] = sc feature_set['easines_fog'] = 1 #easiness to read scores: smog index reading: sc = textstat.smog_index(text) feature_dict[cnt]['easiness_smog'] = sc feature_set['easines_smog'] = 1 #easiness to read scores: text standard reading: sc = textstat.text_standard(text, float_output=False) feature_dict[cnt]['easiness_standard'] = sc feature_set['easines_standard'] = 1 #preprocessing text to make readable for perspective api scores: stry = str(text) sent = '' for a in stry: if a == ' ' or (a <= 'Z' and a >= 'A') or (a <= 'z' and a >= 'a') or ( a <= '9' and a >= '0') or a == '?' or a == '.': sent += a #perspective api scores call: #comment_string =""" data = '{comment: {text:"' + sent + '"}, languages: ["en"], requestedAttributes: {TOXICITY:{}} }' response = requests.post( 'https://commentanalyzer.googleapis.com/v1alpha1/comments:analyze', headers=headers, params=params, data=data) j = json.loads(response.text) feature_dict[cnt]['toxicity'] = 0.0 try: feature_dict[cnt]['toxicity'] = j['attributeScores']['TOXICITY'][ 'summaryScore']['value'] except: try: feature_dict[cnt]['toxicity'] = j['attributeScores'][ 'TOXICITY']['summaryScore']['value'] except: try: feature_dict[cnt]['toxicity'] = j['attributeScores'][ 'TOXICITY']['summaryScore']['value'] except: try: feature_dict[cnt]['toxicity'] = j['attributeScores'][ 'TOXICITY']['summaryScore']['value'] except: feature_dict[cnt]['toxicity'] = 0.0 feature_dict[cnt]['toxicity'] = 0.0 feature_set['toxicity'] = 1 #politeness strategies and politeness scores features: sc = get_scores_strategies_token_indices(text) feature_dict[cnt]['score_polite'] = sc['score_polite'] feature_dict[cnt]['score_impolite'] = sc['score_impolite'] feature_set['score_polite'] = 1 feature_set['score_impolite'] = 1 #print(feature_dict[cnt]['score_polite']) for a in sc['strategies']: feature_dict[cnt][a] = 1 feature_set[a] = 1 #POS tags in the text: doc = nlp(text) for token in doc: if (str(token.pos_) not in feature_set): feature_set[str(token.pos_)] = 1 if not (str(token.pos_) in feature_dict[cnt]): feature_dict[cnt][str(token.pos_)] = 1 else: feature_dict[cnt][str(token.pos_)] += 1 cnt += 1 #creating a systematic feature matrix from feature set feature_matrix = [] for i in range(1, cnt): feature_list = [] for key in feature_set.keys(): if key in feature_dict[i]: feature_list.append(feature_dict[i][key]) else: feature_list.append(0.0) feature_matrix.append(feature_list) return feature_matrix
import pandas as pd import numpy as np import nltk from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer analyzer = SentimentIntensityAnalyzer() df1 = pd.read_csv('./interview1.csv') app1 = df1['app'].tolist() mental_health1 = df1['mental_health'].tolist() app1_sum = 0 for sentence in app1: app1_sum += analyzer.polarity_scores(sentence)['compound'] mhealth1_sum = 0 for sentence in mental_health1: mhealth1_sum += analyzer.polarity_scores(sentence)['compound'] print("Interview 1") print("app_avg = {}".format(app1_sum/len(app1))) print("mental_health_avg = {}\n".format(mhealth1_sum/len(mental_health1))) df2 = pd.read_csv('./interview2.csv') app2 = df2['app'].tolist() mental_health2 = df2['mental_health'].tolist()
def analyze_vader_sentiment(text): vader_analyzer = SentimentIntensityAnalyzer() sentiment_dict = vader_analyzer.polarity_scores(text) return sentiment_dict['compound']
temp = pd.DataFrame({'Tweets': twt.iloc[i,5], 'Polarity': blob.sentiment.polarity}, index = [0]) FinalResults = FinalResults.append(temp) FinalResults['Sentiment'] = FinalResults['Polarity'].apply(lambda x: 'Positive' if x>0 else 'Negative' if x<0 else 'Neutral') FinalResults['Sentiment'].describe() #Results: Most of the tweets are Neutral # Sentiment Analysis using Vader FinalResults_Vader = pd.DataFrame() # Creating engine analyzer = SentimentIntensityAnalyzer() # Run Engine for i in range(0, twt.shape[0]): snt = analyzer.polarity_scores(twt.iloc[i,5]) temp = pd.DataFrame({'Tweets': twt.iloc[i,5], 'Polarity': list(snt.items())[3][1]}, index = [0]) FinalResults_Vader = FinalResults_Vader.append(temp) FinalResults_Vader['Sentiment'] = FinalResults_Vader['Polarity'].apply(lambda x: 'Positive' if x>0 else 'Negative' if x<0 else 'Neutral') FinalResults_Vader['Sentiment'].describe() #Results: Most of the tweets are Negative
class AnalyzerConfig(AppConfig): name = 'sentimentmonitor.apps.analyzer' label = 'analyzer' predictor = SentimentIntensityAnalyzer() # Using VADER
def vaderLexicon(): # wrapper for getting the lexicon from vaderSentiment sa = SentimentIntensityAnalyzer() return sa.lexicon
def __init__(self): print("Vader") self.analyzer = SentimentIntensityAnalyzer()
def sentiment(s): # wrapper for the vaderSentiment polarity scores. # note - this seems to work for strings without tokenization sa = SentimentIntensityAnalyzer() return sa.polarity_scores(text=s)
def gen_weight(date, comp, dfstk): #inputs to the function(convert it as arguments) datenow = datetime.now().date() # date=input("enter the date:(2019-05-30)") #date = "2019-06-04" company = "" # date=input("enter the date:(2019-04-17)") if comp == "CSCO": company = "cisco" elif comp == "MSFT": company = "microsoft" print(company, dfstk) # company=input("enter the name of the company:(cisco)") #company="cisco" header = ['Id', 'Text', 'price_date', 'Time', 'Followers count'] #changing to datetime format and fetching first data print("\ncollecting tweets from each date...\n") date = datetime.strptime(date, "%Y-%m-%d").date() url = "https://raw.githubusercontent.com/d4datas/twitterdata/master/" + str( company) + "-final-" + str(date) + ".csv" df = pd.read_csv(url, sep='\^', error_bad_lines=False, names=header, usecols=['Id', 'Text', 'price_date', 'Followers count'], skiprows='1', engine='python') #iterately collect data till yesterday and concatenate datenow = datenow - timedelta(days=1) while date < datenow: print(date) date = date + timedelta(days=1) url = "https://raw.githubusercontent.com/d4datas/twitterdata/master/" + str( company) + "-final-" + str(date) + ".csv" df1 = pd.read_csv( url, sep='\^', error_bad_lines=False, names=header, usecols=['Id', 'Text', 'price_date', 'Followers count'], skiprows='1', engine='python') df = pd.concat([df, df1], ignore_index=True) #delete dulpicates in terms of id and text df.drop_duplicates(subset='Id', keep=False, inplace=True) df.drop_duplicates(subset='Text', keep=False, inplace=True) #changing date to common format(optional) #df['Date'] = pd.to_datetime(df['Date']) #df['Date'] = df['Date'].dt.date #preprocessing and replacing text from each row using re and preprocessor print("starting preprocessing of each row...\n") # time.sleep(2) for j, tweet_text in df.iterrows(): print(j) tweet_text = df.at[j, 'Text'] tweet_text = re.sub(',', ' ', tweet_text) sentence = tweet_text.lower() #convert to lower case fsen = re.sub( " #| & |\n|\t", " ", sentence) #removing hastags line breaks and tabs with space fsen = re.sub( "#| \n| \t|\.", "", fsen) #removing hastags line breaks and tabs without space fsen = re.sub(r'\d+', '', fsen) #removing numbers #removing links from text sen = fsen.split(" ") lsen = [] for i in sen: if "https://" not in str(i): #lsen.append(i) if "http://" not in str(i): lsen.append(i) tweet_text = " ".join(lsen) tweet_text = p.clean(tweet_text) sen = tweet_text.split(" ") lsen = [] for i in sen: if "/" not in str(i): lsen.append(i) tweet_text = " ".join(lsen) tweet_text.lstrip() df.at[j, 'Text'] = tweet_text df.drop_duplicates(subset='Text', keep=False, inplace=True) #print(df) #df.to_csv("csco_filtered.csv") #sentiment analysis starts analyser = SentimentIntensityAnalyzer() print("\nanalysing sentiment for each date data...\n") #extracting the dates to iterate dates = df['price_date'].unique().tolist() #print("list created") f = open('sentiment_data.csv', 'a') f.write( "price_date,negative,positive,neutral,compound,subjectivity,polarity\n" ) f.close() def calcualte_senti(data, df): date = data mask = (df['price_date'] == str(date)) df = df.loc[mask] total_rows = 1 sum_pos = sum_neu = sum_pol = sum_sub = sum_comp = sum_neg = 0 f = open('sentiment_data.csv', 'a') sen_writer = csv.writer(f) for text in df['Text']: total_rows = total_rows + 1 vad = analyser.polarity_scores(text) analysis = TextBlob(text) pol = round(analysis.polarity, 4) sub = round(analysis.subjectivity, 4) sum_neg = sum_neg + vad['neg'] sum_pos = sum_pos + vad['pos'] sum_neu = sum_neu + vad['neu'] sum_comp = sum_comp + vad['compound'] sum_pol = sum_pol + pol sum_sub = sum_sub + sub #finding weighted values #print(sum_neg) neg_val = sum_neg / total_rows pos_val = sum_pos / total_rows neu_val = sum_neu / total_rows sub_val = sum_sub / total_rows pol_val = sum_pol / total_rows comp_val = sum_comp / total_rows sum_neg = sum_pos = sum_neu = sum_pol = sum_sub = sum_comp = 0 #appending to csv f.write(','.join([ str(date) + "," + str(neg_val) + "," + str(pos_val) + "," + str(neu_val) + "," + str(comp_val) + "," + str(sub_val) + "," + str(pol_val) ]) + '\n') f.close() #print(str(date)+","+str(neg_val)+","+str(pos_val)+","+str(neu_val)+","+str(comp_val)+","+str(sub_val)+","+str(pol_val)) for date in dates: print(".") calcualte_senti(date, df) #dropping unwanted raws df = pd.read_csv("sentiment_data.csv", error_bad_lines=False) df['price_date'] = pd.to_datetime(df['price_date'], format='%Y-%m-%d', errors='coerce') df = df[pd.notnull(df['price_date'])] #print(df) #removing temporary file os.remove("sentiment_data.csv") #scaling the data using minmax method to 0.8-1.2 range print("\nscaling data...\n") # time.sleep(2) #OldRange = (OldMax - OldMin) oldrange = 1.5 oldmin = -.5 newmin = .8 #NewRange = (NewMax - NewMin) newrange = 0.4 l = [] for data in df['compound']: #NewValue = (((OldValue - OldMin) * NewRange) / OldRange) + NewMin newvalue = (((float(data) - oldmin) * newrange) / oldrange) + newmin print(str(data) + '====>' + str(newvalue)) l.append(newvalue) df['sentiment'] = l #print(df) #df1.to_csv('scaledsentiment.csv') # dfstk = pd.read_csv("CSCO.csv",error_bad_lines=False) #print(dfstk) #merging sentiment data and stock data print("merging data...\n") # time.sleep(2) df['price_date'] = pd.to_datetime(df['price_date']) dfstk['price_date'] = pd.to_datetime(dfstk['price_date']) dfmrg = pd.merge(df, dfstk, on="price_date") #print(dfmrg) #dfmrg.to_csv("sentistock.csv") #creating a copy with needed columns (adjust for needed columns) dfnl = dfmrg[[ 'price_date', 'open_price', 'close_price', 'low_price', 'high_price', 'adj_close_price', 'volume', 'sentiment' ]].copy() #adding weighted value to the dataframe print("adding weightage...\n") # time.sleep(2) dfnl['weighted_close_price'] = dfnl['close_price'] * dfnl['sentiment'] dfnl[ 'weight_adj_close_price'] = dfnl['adj_close_price'] * dfnl['sentiment'] return dfnl #dfnl.to_csv("final_data.csv")
df.loc[df['score'] == 0, 'pred'] = 1 df.loc[df['score'] > 0, 'pred'] = 2 len(df[df['pred'] == df['label_binary']]) # confusion matrix with afinn len(df) confusion_matrix(df['pred'], df['label_binary']) df111 = pd.DataFrame() df111['aaa'] = Y_test1 len(df111[df111['aaa'] == 1]) len(df111) # vader analyser = SentimentIntensityAnalyzer() score = [] for i in range(len(df.index)): my_dict = analyser.polarity_scores( (df.iloc[[i]]['tweet_text'].tolist()[0])) if 'compound' in my_dict: del my_dict['compound'] score.append(max(my_dict.items(), key=operator.itemgetter(1))[0]) print(i) df['score'] = score df['pred'] = 0 df.loc[df['score'] == 'neg', 'pred'] = 0 df.loc[df['score'] == 'neu', 'pred'] = 1 df.loc[df['score'] == 'pos', 'pred'] = 2 len(df[df['pred'] == df['label_binary']])
def tw_sent(text): sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(text) #print(type(ss)) # print(ss) return (ss)
def __init__(self): self._analyser = SentimentIntensityAnalyzer() self._return = {}
"VADER is not smart, handsome, nor funny.", # negation sentence example "VADER is smart, handsome, and funny!", # punctuation emphasis handled correctly (sentiment intensity adjusted) "VADER is very smart, handsome, and funny.", # booster words handled correctly (sentiment intensity adjusted) "VADER is VERY SMART, handsome, and FUNNY.", # emphasis for ALLCAPS handled "VADER is VERY SMART, handsome, and FUNNY!!!", # combination of signals - VADER appropriately adjusts intensity "VADER is VERY SMART, uber handsome, and FRIGGIN FUNNY!!!", # booster words & punctuation make this close to ceiling for score "The book was good.", # positive sentence "The book was kind of good.", # qualified positive sentence is handled correctly (intensity adjusted) "The plot was good, but the characters are uncompelling and the dialog is not great.", # mixed negation sentence "At least it isn't a horrible book.", # negated negative sentence with contraction "Make sure you :) or :D today!", # emoticons handled "Today SUX!", # negative slang with capitalization emphasis "Today only kinda sux! But I'll get by, lol" # mixed sentiment example with slang and constrastive conjunction "but" ] analyzer = SentimentIntensityAnalyzer() for sentence in sentences: vs = analyzer.polarity_scores(sentence) print("{:-<65} {}".format(sentence, str(vs))) #note: depending on how you installed (e.g., using source code download versus pip install), you may need to import like this: #from vaderSentiment import SentimentIntensityAnalyzer # --- examples ------- sentences = [ "I love Sitoluama.", "I LOVE Sitoluama.", "I love Sitoluama!", "I love Sitoluama!!!", "I love Sitoluama :)", "I don't love Sitoluama.",
def tw_sent(text): sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(text) #print(type(ss)) # print(ss) return(ss)
import time import sqlite3 from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer analyser = SentimentIntensityAnalyzer() db = sqlite3.connect('queso.db') cursor = db.cursor() for row in cursor.execute( '''SELECT nick, compound FROM "poo" group by nick ORDER BY compound ASC LIMIT 3''' ): print(row[0])
def __init__(self): self.analyzer = SentimentIntensityAnalyzer()
import jsonlines,csv from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer d={"Jan":"01","Feb":"02","Mar":"03","Apr":"04","May":"05"} reader=jsonlines.open("data/tweet_idno4.jsonl") count=0 analyzer=SentimentIntensityAnalyzer() writer=csv.writer(open('tweet_sentiment5.csv', 'w', encoding='utf-8', newline='')) writer.writerow(["Tweet_VS","Date","go"]) for obj in reader: # obj=reader.read() vs=analyzer.polarity_scores(obj['full_text']) time=obj['created_at'].split() writer.writerow([vs['compound'],"2020-"+d[time[1]]+"-"+time[2],"go"]) # x=float(vs['compound']) # # print(time) # if x>0: # print(x,"positive") # elif x==0: # print(x,"neutral") # else: # print(x,"negative") count+=1 if count%10000==0: print(count)
def reviews(biz_id): """Sentiment score for restaurant""" googlemaps_api = os.environ['GOOGLE_API_KEY'] # we stored lat and lng as parameters/ form way dictionary values # in the HREF of restaurant name in html that loops over each restaurant div # we are extracting that information here and passing to reviews.html in jinja # and then passing that info to JS through data attributes latitude = request.args.get('lat') longitude = request.args.get('lng') restaurant_name = request.args.get('name') review = db.session.query( Review.review).filter(Review.biz_id == biz_id).all() final_list_d = [] for tup in review: final_list_d.append(tup[0]) analyser = SentimentIntensityAnalyzer() sum_compound_score = 0 analyzed_reviews = [] pos_word_list = set() neu_word_list = set() neg_word_list = set() word_cloud_dict = {} for sentence in final_list_d: snt = analyser.polarity_scores(sentence) sum_compound_score += snt['compound'] tokenized_sent = word_tokenize(sentence) for word in tokenized_sent: word = word.lower() word_compound_score = (analyser.polarity_scores(word))['compound'] if (word_compound_score) > 0: pos_word_list.add(word) # word_cloud_dict[word] = (word_compound_score*1000) elif (word_compound_score) < 0: neg_word_list.add(word) # word_cloud_dict[word] = (word_compound_score*1000) else: neu_word_list.add(word) # word_cloud_dict[word] = (word_compound_score*1000) analyzed_reviews.append(sentence + str(snt)) for word in pos_word_list: pos_score = analyser.polarity_scores(word) word_cloud_dict[word] = (pos_score['compound'] * 1000) # print(word,pos_score) for word in neg_word_list: neg_score = analyser.polarity_scores(word) word_cloud_dict[word] = (neg_score['compound'] * 1000) # print(word,neg_score) # print(word_cloud_dict) avg_compound_score = sum_compound_score / len(analyzed_reviews) avg_compound_score = ("%.3f" % avg_compound_score) return render_template("reviews.html", restaurant_name=restaurant_name, biz_id=biz_id, avg_score_for_restaurant=avg_compound_score, data=analyzed_reviews, api_key=googlemaps_api, latitude=latitude, longitude=longitude)
def semantic_summary(features, all_sentences): dir = os.path.dirname(__file__) sid = SentimentIntensityAnalyzer(dir + "/stoplists/vader_lexicon.txt") options = [] for feature in features: option = {"feature": feature[0], "neg": 0, "pos": 0, "frq": feature[1]} support_sentences = find_support_sentences(feature[0], all_sentences) sentences_with_orientation = {"pos": [], "neg": []} for ss in support_sentences: s = ss[0] stars = ss[1] helpful = ss[2] # feats = dict([(word, True) for word in nltk.wordpunct_tokenize(s)] ) # if word not in english_stopwords opt = "" scores = sid.polarity_scores(s) if scores["compound"] + 0.7 * (stars - 3) / 2 < -0.2: opt = "neg" elif scores["compound"] + 0.7 * (stars - 3) / 2 > 0.2: opt = "pos" # manual correction # positives if any(mw in nltk.word_tokenize(s) for mw in ["positives", "pros"]): opt = "pos" elif any(mw in nltk.word_tokenize(s) for mw in ["negatives"]): opt = "neg" if opt != "": option[opt] = int(option[opt] + 1 + helpful * 0.5) sentences_with_orientation[opt].append([ s, (scores["compound"] + 0.7 * (float(stars) - 3) / 2) * float(helpful * 0.3 + 1) ]) if (len(sentences_with_orientation["pos"]) + len(sentences_with_orientation["neg"])) < 5: # options.pop(feature[0],None) continue if len(sentences_with_orientation["pos"]) > 0: sorted_sentences_with_orientation_pos = sorted( sentences_with_orientation["pos"], key=lambda v: v[1], reverse=True) # option["pos_summary"] = sorted_sentences_with_orientation_pos[0] option["pos_summary"] = top_summary( "\n ".join([t[0] for t in sentences_with_orientation["pos"]]), 1, rank_summarizer) else: option["pos_summary"] = [] if len(sentences_with_orientation["neg"]) > 0: sorted_sentences_with_orientation_neg = sorted( sentences_with_orientation["neg"], key=lambda v: v[1]) # option["neg_summary"] = sorted_sentences_with_orientation_neg[0] # option["neg_summary"].append(top_summary("\n ".join([t[0] for t in sentences_with_orientation["neg"]]),1)) option["neg_summary"] = top_summary( "\n ".join([t[0] for t in sentences_with_orientation["neg"]]), 1, rank_summarizer) else: option["neg_summary"] = [] options.append(option) options = sorted(options, key=lambda v: v["neg"] + v["pos"], reverse=True) return options
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer sentence = input('Enter the senntence for analysis :') analyzer = SentimentIntensityAnalyzer() sentiment = analyzer.polarity_scores(sentence) if sentiment['compound'] >= 0.05: sentence_category = 'Positive' elif -0.05 <= sentiment['compound'] < 0.05: sentence_category = 'Neutral' else: sentence_category = 'Negative' sentence_data = dict() print('This sentence is:', sentence_category) print(dict(sentiment))
import sys from decimal import Decimal sys.path.append("../") import numpy as np from config import (consumer_key, consumer_secret, access_token, access_token_secret) # In[114]: # Import and Initialize Sentiment Analyzer from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer analyzer = SentimentIntensityAnalyzer() # In[115]: # Setup Tweepy API Authentication auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth, parser=tweepy.parsers.JSONParser()) # In[116]: # Target Search Term
class TextAnalysis(AbstractUtil): def __init__(self): self.analyzer = SentimentIntensityAnalyzer() # self.test() def analyse(self, text): # The compound score is computed by summing the valence scores of each word in the lexicon, adjusted according # to the rules, and then normalized to be between -1 (most extreme negative) and +1 (most extreme positive). # https://github.com/cjhutto/vaderSentiment return self.analyzer.polarity_scores(text)["compound"] # returns the article object and the analysis result def analyse_web_page_article(self, url): article = Article(url) article.download() article.parse() return article, self.analyse(article.text) # return a list of high influential value websites @staticmethod def get_high_value_websites(): return [ "https://www.youtube.com" ] @staticmethod def is_analysable_url(url): url_ending = str(url).split(".")[-1] return url_ending.lower() not in IMAGE_ENDINGS # official account tweets that can be used for testing purposes def test(self): texts = [ "So excited at what I am working on for the future. I don’t get to talk about what I am actively doing on a daily basis because it’s far ahead of our messaging but I am beyond excited about it! #substratum $sub", "Have you read about VeChain and INPI ASIA's integration to bring nanotechnology for digital identity to the VeChainThor blockchain? NDCodes resist high temperature, last over 100 years, are incredibly durable and invisible to the naked eye", "Crypto market update: BTC holds near $9K, ETH rising over $640, BCH grows 85% on the week", "Extremely excited & proud to announce that #Substratum Node is NOW Open Source! https://github.com/SubstratumNetwork/SubstratumNode …#NetNeutrality $SUB #cryptocurrency #bitcoin #blockchain #technology #SubSavesTheInternet", "A scientific hypothesis about how cats, infected with toxoplasmosis, are making humans buy Bitcoin was presented at last night's BAHFest at MIT.", "Net Neutrality Ends! Substratum Update 4.23.18", "One more test from @SubstratumNet for today. :)", "Goldman Sachs hires crypto trader as head of digital assets markets", "Big news coming! Scheduled to be 27th/28th April... Have a guess...😎", "A great step to safer #exchanges: @WandXDapp Joins REMME’s 2018 Pilot Program for testing functionality of certificate-based signup and login for end users. https://medium.com/remme/wandx-joins-remmes-2018-pilot-program-588379aaea4d … #nomorepasswords #blockchain #crypto $REM" "omeone transferred $99 million in litecoin — and it only cost them $0.40 in fees. My bank charges me a hell of a lot more to transfer a hell of a lot less. Can we hurry up with this crypto/blockchain revolution I'm tired of paying fees out of my ass to a bunch of fat cats", "This week's Theta Surge on http://SLIVER.tv isn't just for virtual items... five PlayStation 4s will be given out to viewers that use Theta Tokens to reward the featured #Fortnite streamer! Tune in this Friday at 1pm PST to win!", "The European Parliament has voted for regulations to prevent the use of cryptocurrencies in money laundering and terrorism financing. As long as they have good intention i don' t care.. but how much can we trust them??!?!" "By partnering with INPI ASIA, the VeChainThor Platform incorporates nanotechnology with digital identification to provide solutions to some of the worlds most complex IoT problems.", "Thanks to the China Academy of Information and Communication Technology, IPRdaily and Nashwork for organizing the event.", "Delivered a two hour open course last week in Beijing. You can tell the awareness of blockchain is drastically increasing by the questions asked by the audience. But people need hand holding and business friendly features to adopt the tech.", "Introducing the first Oracle Enabler tool of the VeChainThor Platform: Multi-Party Payment Protocol (MPP).", "An open letter from Sunny Lu (CEO) on VeChainThor Platform.", "VeChain has finished the production of digital intellectual property services with partner iTaotaoke. This solution provides a competitive advantage for an industry in need of trust-free reporting and content protections.#GoVeChain", "Special thanks to @GaboritMickael to have invited @vechainofficial to present our solution and make a little demo to @AccentureFrance", "VeChain’s COO, @kfeng027, is invited to ‘Crypto Media Collection Vo.1’ held at DeNA’s campus by Coinjinja in Tokyo, one of the largest cryptocurrency information platforms. Kevin’s speech begins at 16:35 UTC+9, livestreamed via https://ssl.twitcasting.tv/coinjinja ", "VeChain will pitch their solutions potentially landing a co-development product with LVMH. In attendance will be CEOs Bill McDermott (SAP), Chuck Robbins (CISCO), Ginni Rometty (IBM), and Stephane Richard (Orange) as speakers -", "As the only blockchain company selected, VeChain is among 30 of 800+ hand-picked startups to compete for the second edition of the LVMH Innovation Award. As a result, VeChain has been invited to join the Luxury Lab LVMH at Viva Technology in Paris from May 24-26, 2018.", "VeChain to further its partnership with RFID leader Xiamen Innov and newly announced top enterprise solution provider CoreLink by deploying a VeChainThor enterprise level decentralized application - AssetLink.", "Today, a group of senior leaders from TCL's Eagle Talent program visited the VeChain SH office. @VeChain_GU demonstrated our advanced enterprise solutions and it's relation to TCL's market. As a result, we're exploring new developments within TCL related to blockchain technology.", "VeChain announces a partnership with eGrid, a leading publicly listed ERP, SCM and CRM solution provider to synergistically provide comprehensive blockchain technology backing for a significant portion of China’s automobile industry.", "We are glad to be recognized as Top 10 blockchain technology solution providers in 2018. outprovides a platform for CIOs and decision makers to share their experiences, wisdom and advice. Read the full version article via", "Talked about TOTO at the blockchain seminar in R University of Science and Technology business school last Saturday. It covered 3000 MBA students across business schools in China." ] for text in texts: print(str(self.analyse(text)) + " => "+str(DecoderEncoder.encode_into_bytes(text)))