def on_data(self, raw_data): tweet = loads(raw_data) try: text = tweet['text'] if tweet.get('retweeted_status') is None and 'RT @' not in text: if tweet.get('coordinates') is None: # TODO: Check for rate limit. If rate limited, then perform location inference nouns = self._get_nouns(tweet_text=text) # bf = BilateralFriends(user_id=tweet['user']['id'], twitter_api=self.api) # loc_occurrence_count = bf.get_location_occurrence() tweet_nouns = defaultdict(int) for noun in nouns: tweet_nouns[noun] += 1 self.corpus[tweet['user']['id']] = {'id': tweet['user']['id'], 'location': tweet['user']['location'], # 'bilateral_friends_location_occurrences': loc_occurrence_count, 'text_nouns': tweet_nouns} loc_inf = LocationInference(user=self.corpus[tweet['user']['id']], local_words=self.local_words, geo_words=self.geo_words) inferred_location = loc_inf.get_location() print inferred_location print 'Predicted location:', inferred_location[0] tweet['coordinates'] = {'type': 'Point', 'coordinates': [LOCATIONS[inferred_location[0]][1], LOCATIONS[inferred_location[0]][0]]} print tweet['coordinates'] sentiment_analyzer = SentimentIntensityAnalyzer() sentiment_score = sentiment_analyzer.polarity_scores(text=text)['compound'] tweet['sentiment'] = sentiment_score current_time_ms = int(round(time() * 1000)) tweet['time_inserted'] = current_time_ms print text, ': ', str(sentiment_score) STREAM_BUFFER.insert(tweet) except KeyError, v: print 'KeyError: ', v
def main(): parser = argparse.ArgumentParser(description="Reads in output from " + "downloadGroupmeMessages and runs a sentiment analysis") parser.add_argument("inFile", help="The file containing the stored messages") parser.add_argument("--outFile", default="out.txt", help="Results go here") args = parser.parse_args() print("\nThis program prints the most negative and positive users of the chat ranked according to their average score from the VADER sentiment intensity analyzer in the NLTK. Not super accurate, but it's a fun conversation starter") print("The program takes a few seconds to run, and requires that you have some of the NLTK corpora installed.") with open(args.inFile, 'r') as infile: infile.readline() analyzer = SentimentIntensityAnalyzer() negList = [] positiveList = [] counter = PostSentimentCounter() for line in infile: line = line.split('\t') message = line[3] id = line[0] name = line[1] sentDict = analyzer.polarity_scores(message) counter.countPost(id, name, sentDict) counter.printSentimentLeaderboards()
def sentiment_analytis_text(self,text_insert): text = text_insert token_text = tokenize.sent_tokenize(text) sid = SentimentIntensityAnalyzer() over_all_sentiment = 0 count = 0 for sentence in token_text: score = sid.polarity_scores(sentence) # Create over all sentiment score over_all_sentiment += score.get("compound") # If sentence is not neuteral add to sentence count for average if (score.get("compound") > 0.1): count += 1 # Calculate average sentiment if count > 0: average_sentiment = over_all_sentiment/count else: average_sentiment = over_all_sentiment return average_sentiment
def add_sentiment(self): print 'Adding sentiment...', sia = SentimentIntensityAnalyzer() for sentiment in ('pos', 'neg', 'neu', 'compound'): sentify = lambda s: sia.polarity_scores(s[:200])[sentiment] self.df['sentiment_' + sentiment] = self.df['story body'].apply(sentify) print 'done'
def get_tweets(q, today): r = api.request( "search/tweets", {"q": "%s since:%s" % (q, today), "count": "100", "result_type": "recent", "lang": "en"} ) data = (json.loads(r.text))["statuses"] sid = SentimentIntensityAnalyzer() all_tweets = [] for i in range(0, len(data)): text = data[i]["text"].encode("ascii", "ignore").decode("ascii") if "RT" in text: RT = True else: RT = False others = text.count("@") sent = TextBlob(text) valance = sent.sentiment.polarity NLTK = sid.polarity_scores(text) tweet_data = { "tweetID": data[i]["id"], "created_at": data[i]["created_at"], "text": text, "textblob": valance, "NLTK": NLTK["compound"], "RT": RT, "others": others, } # print(data[i]) all_tweets.append(tweet_data) return all_tweets
def nltk_sentiment(tweets): sentiment = [] sid = SentimentIntensityAnalyzer() for tweet in tweets: st = sid.polarity_scores(tweet) sentiment.append(st['compound']) return sentiment
def analyze_sentiment_vader_lexicon(review, threshold=0.1, verbose=False): # pre-process text review = normalize_accented_characters(review) review = html_parser.unescape(review) review = strip_html(review) # analyze the sentiment for review analyzer = SentimentIntensityAnalyzer() scores = analyzer.polarity_scores(review) # get aggregate scores and final sentiment agg_score = scores['compound'] final_sentiment = 'positive' if agg_score >= threshold\ else 'negative' if verbose: # display detailed sentiment statistics positive = str(round(scores['pos'], 2)*100)+'%' final = round(agg_score, 2) negative = str(round(scores['neg'], 2)*100)+'%' neutral = str(round(scores['neu'], 2)*100)+'%' sentiment_frame = pd.DataFrame([[final_sentiment, final, positive, negative, neutral]], columns=pd.MultiIndex(levels=[['SENTIMENT STATS:'], ['Predicted Sentiment', 'Polarity Score', 'Positive', 'Negative', 'Neutral']], labels=[[0,0,0,0,0],[0,1,2,3,4]])) print sentiment_frame return final_sentiment
def add_sentiment_to_comments(): sia = SentimentIntensityAnalyzer() for story_comment_list in comments.values(): for comment in story_comment_list: if "text" in comment: comment["sentiment"] = sia.polarity_scores(comment["text"]) print(comment) # here's where to add sentiment using nltk to text
def analyze(posts): post_json = setup_json() #for post, replies in posts.iteritems() sid = SentimentIntensityAnalyzer() for key, value in posts.iteritems(): nustring = ' '.join(value[0]).replace("u'", "") ss = sid.polarity_scores(nustring) for k in sorted(ss): if k is "compound": entry = {} entry['name'] = int(ss[k]*len(nustring)) entry['size'] = len(nustring) if ss[k] == 0.0: post_json['children'][1]['children'].append(entry) elif ss[k] < -0.8: post_json['children'][2]['children'][2]['children'].append(entry) elif ss[k] < -0.4: post_json['children'][2]['children'][1]['children'].append(entry) elif ss[k] < -0.0: post_json['children'][2]['children'][0]['children'].append(entry) elif ss[k] < 0.4: post_json['children'][0]['children'][0]['children'].append(entry) elif ss[k] < 0.8: post_json['children'][0]['children'][1]['children'].append(entry) else: post_json['children'][0]['children'][2]['children'].append(entry) return post_json
def sentiment_by_subreddit(): phrase = urllib.quote(request.form["text"]) year = urllib.quote(request.form["year"]) sid = SentimentIntensityAnalyzer() year_str = str(year) if int(year) > 2014: year_str += "_01" query = '''SELECT subreddit, body, score FROM (SELECT subreddit, body, score, RAND() AS r1 FROM [fh-bigquery:reddit_comments.''' + year_str + '''] WHERE REGEXP_MATCH(body, r'(?i:''' + phrase + ''')') AND subreddit IN (SELECT subreddit FROM (SELECT subreddit, count(*) AS c1 FROM [fh-bigquery:reddit_comments.''' + year_str + '''] WHERE REGEXP_MATCH(body, r'(?i:'''+phrase+''')') AND score > 1 GROUP BY subreddit ORDER BY c1 DESC LIMIT 10)) ORDER BY r1 LIMIT 5000) ''' bigquery_service = build('bigquery', 'v2', credentials=credentials) try: query_request = bigquery_service.jobs() query_data = { 'query': query, 'timeoutMs': 30000 } query_response = query_request.query( projectId=bigquery_pid, body=query_data).execute() except HttpError as err: print('Error: {}'.format(err.content)) raise err subreddit_sentiments = defaultdict(list) subreddit_total = defaultdict(int) if 'rows' in query_response: rows = query_response['rows'] sentiments = [] for row in rows: subreddit = row['f'][0]['v'] body = row['f'][1]['v'] score = int(row['f'][2]['v']) sentiment_values = [] lines_list = tokenize.sent_tokenize(body) for sentence in lines_list: if phrase.upper() in sentence.upper():#(regex.search(sentence)): s = sid.polarity_scores(sentence) sentiment_values.append(s['compound']) comment_sentiment = float(sum(sentiment_values))/len(sentiment_values) subreddit_sentiments[subreddit].append((comment_sentiment, score)) subreddit_total[subreddit] += int(score) subreddit_sentiments = {subreddit:1 + float(sum([float(pair[0])*float(pair[1]) for pair in sentiment_list]))/subreddit_total[subreddit] for subreddit, sentiment_list in subreddit_sentiments.items()} result = sorted(subreddit_sentiments.items(), key = lambda(k,v): (-v,k)) return json.dumps(result)
def get_unique_tweets(self, data_dict): # TODO: Implement filter to check if Tweet text starts with 'RT' """ :param data_dict: :return: """ flag = False try: text = data_dict['text'].encode('ascii', 'ignore').lower() # Check for 'retweeted_status' in metadata field to determine # if tweet is a retweet (1st check) if 'retweeted_status' not in data_dict: url_match = URL.match(text) # Check if link contains url if url_match: match_group = url_match.group() if len(self.key_list) > 0: if any(match_group in item for item in self.key_list): flag = True if flag is False: data_dict['text'] = match_group print "Inserted text: " + data_dict['text'] + '\n' self.key_list.append(match_group) sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(text) print ss['compound'] score = ss['compound'] if score < 0: score += (3 * score) for w in GOOGLE: if w in text and self.google_price >= 0: self.google_price = score self.google_text = text for w in MICROSOFT: if w in text and self.microsoft_price >= 0: self.microsoft_price = score self.microsoft_text = text for w in FACEBOOK: if w in text and self.facebook_price >= 0: self.facebook_price = score self.facebook_text = text p.trigger('test_channel', 'my_event', {'google': self.google_price, 'microsoft': self.microsoft_price, 'facebook': self.facebook_price}) p.trigger('tweet_channel', 'my_event', { 'google_text': self.google_text, 'microsoft_text': self.microsoft_text, 'facebook_text' : self.facebook_text }) self.google_price = 0 self.microsoft_price = 0 self.facebook_price = 0 else: self.key_list.append(url_match.group()) except TypeError, e: print >> sys.stderr, e self.log_error(str(e))
def vader(self): sid = SentimentIntensityAnalyzer() results = {'neg': 0.0, 'pos': 0.0, 'neu': 0.0, 'compound': 0.0} ss = sid.polarity_scores(self.text) for k in sorted(ss): results[k] += ss[k] return results
def sentimentScore(sentences): analyzer = SentimentIntensityAnalyzer() results = [] for sentence in sentences: vs = analyzer.polarity_scores(sentence) print("vs: " + str(vs)) results.append(vs) return results
def avg_message_sentiment_helper(self, message): sentences = tokenize.sent_tokenize(message) sid = SentimentIntensityAnalyzer() sentence_sentiments = [] for sentence in sentences: ss = sid.polarity_scores(sentence) sentence_sentiments.append(ss['compound']) return np.mean(sentence_sentiments)
def computeVaderScore(self,sentence): sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(sentence) retList = [] for k in sorted(ss): retList.append(ss[k]) return retList
def negative_msg_count(self): sid = SentimentIntensityAnalyzer() msg_count = 0 for sentence in self.get_message_lst(): ss = sid.polarity_scores(sentence) if ss['compound'] < 0: msg_count += 1 return msg_count
def sentiment(sentence): sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(sentence) if float(ss['neg']) > float(ss['pos']) : return -1*float(ss['neg']) elif float(ss['neg']) < float(ss['pos']): return float(ss['pos']) else: return 0
def process(self, tup): # extract the sentence sentence = tup.values[0] sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(sentence) tuple_result = (str(ss['neg']),str(ss['pos']),str(ss['neu'])) self.emit(tuple_result)
def get_sentiment_score(sentence): score_dict = {} sid = SentimentIntensityAnalyzer() ss = sid.polarity_scores(sentence) for k in sorted(ss): # print('{0}: {1}, '.format(k, ss[k]), end='') score_dict[k] = ss[k] return score_dict
def vader_sentiment_scores(text_array): sid = SentimentIntensityAnalyzer() assert all([type(t) == type('') for t in text_array]) vs_dict = {'neg': [], 'neu': [], 'pos': [], 'compound': []} for i, text in enumerate(text_array): if i % 10000 == 0: print(i) vs = sid.polarity_scores(text) for key, value in vs.items(): vs_dict[key].append(value) return vs_dict
def getSentiments(sentences): from nltk import tokenize sid = SentimentIntensityAnalyzer() sentimtents = [None] * len(sentences) i = 0 for sentence in sentences: ss = sid.polarity_scores(sentence) sentimtents[i] = ss["compound"] i = i + 1 return sentimtents
def personAvgSentiment(person, month = None, year = None): from nltk.sentiment.vader import SentimentIntensityAnalyzer comp = 0 sid = SentimentIntensityAnalyzer() if month: msgLst = fullMessageListMonth(person, month, year) else: msgLst = fullMessageList(person) for message in msgLst: sentimentDict = sid.polarity_scores(message) comp += sentimentDict['compound'] return comp/len(msgLst) if len(msgLst) != 0 else 0
def get_score(candidate,sent_weight,twitter_positions,fb_positions,official_positions,nyt_positions): """ " Get position similarity score between candidate and user. Computed " using cosine similarity. " " Args: " candidate: candidate name " twitter_positions: Elasticsearch results from twitter index " fb_positions: Elasticsearch results from fb index " official_positions: Elasticsearch results from official index " nyt_positions: Elasticsearch results form NYT index " Returns: " Similarity score between candidate and user " " Author: Kersing Huang <*****@*****.**>, Matthew Garber """ from nltk.sentiment.vader import SentimentIntensityAnalyzer text = [] by_candidate = lambda x: x['candidate'] == candidate and len(x['result']['hits']['hits']) > 0 t = filter(by_candidate,twitter_positions) if len(t) > 0: text.append(t[0]['result']['hits']['hits'][0]['_source']['sm_text']) #end if fb = filter(by_candidate,fb_positions) if len(fb) > 0: text.append(fb[0]['result']['hits']['hits'][0]['_source']['sm_text']) #end if sia = SentimentIntensityAnalyzer() # get candidate polarity scores candidate_scores = sia.polarity_scores("".join(text)) # get user polarity scores user_input = json.loads(web.data()) user_scores = sia.polarity_scores(user_input['position']) # compute cosine similarity of these polarity scores u_len = vector_length(user_scores) c_len = vector_length(candidate_scores) sentiment_score = vector_dot(candidate_scores,user_scores)/(u_len*c_len) official = filter(by_candidate,official_positions) nyt = filter(by_candidate,nyt_positions) relevance_score_sum = 0 for source in [t, fb, official, nyt]: if source: relevance_score_sum += source[0]['score'] relevance_score = relevance_score_sum/4 weighted_score = (sent_weight * sentiment_score) + ((1 - sent_weight) * relevance_score) return weighted_score
def sentiAnalyze(): analyzer = SentimentIntensityAnalyzer() for candidate in tweets: for week in tweets[candidate]: for tweet in tweets[candidate][week]: if candidate not in result['sum']: result['sum'][candidate] = {} if 'total' not in result['sum'][candidate]: result['sum'][candidate]['total'] = 0 if week not in result['sum'][candidate]: result['sum'][candidate][week] = 0 result['sum'][candidate][week] += 1 result['sum'][candidate]['total'] += 1 score = analyzer.polarity_scores(tweet[2]) if (score['pos'] - score['neg']) > DISTINCT_THRESHOLD: if candidate not in pos_tweets: pos_tweets[candidate] = {} if week not in pos_tweets[candidate]: pos_tweets[candidate][week] = [] pos_tweets[candidate][week].append(tweet) if candidate not in result['pos']: result['pos'][candidate] = {} if 'total' not in result['pos'][candidate]: result['pos'][candidate]['total'] = 0 if week not in result['pos'][candidate]: result['pos'][candidate][week] = 0 result['pos'][candidate][week] += 1 result['pos'][candidate]['total'] += 1 if (score['neg'] - score['pos']) > DISTINCT_THRESHOLD: if candidate not in neg_tweets: neg_tweets[candidate] = {} if week not in neg_tweets[candidate]: neg_tweets[candidate][week] = [] neg_tweets[candidate][week].append(tweet) if candidate not in result['neg']: result['neg'][candidate] = {} if 'total' not in result['neg'][candidate]: result['neg'][candidate]['total'] = 0 if week not in result['neg'][candidate]: result['neg'][candidate][week] = 0 result['neg'][candidate][week] += 1 result['neg'][candidate]['total'] += 1 return
def vader_sentiment(df): sith = SentimentIntensityAnalyzer() sentiment = [] for sentence in df.Message: sent = sith.polarity_scores(sentence) #sent_total = sent['pos'] - sent['neg'] sentiment.append(sent['compound']) df['sentiment'] = sentiment return df
def get_news_sentiment(self,team): print '------------------------------' print 'Scanning sentiment for: ',team print '------------------------------' story_limit=5 neg=0 pos=0 visible_text='' base_url='https://www.google.co.uk/search?hl=en&gl=uk&tbm=nws&authuser=0&q=football+european+championships'+team.replace(' ','%20') req = urllib2.Request(base_url,headers = {'User-Agent': 'Mozilla/5.0'} ) print base_url page = urllib2.urlopen(req) soup = BeautifulSoup(page) # remove scripts and tags for script in soup(["script", "style"]): script.extract() # rip it out # find the links sentence_count=0 story_count=0 for a in soup.findAll('a'): if story_count<story_limit: # ignore internal google links and remove the tracking guff from the end of the URL if ('google' not in a.attrs['href'] and '/search?q=football+european+championships' not in a.attrs['href'] and 'http://' in a.attrs['href']): print a.attrs['href'].replace('/url?q=','').split('&')[0] story_req=urllib2.Request(a.attrs['href'].replace('/url?q=','').split('&')[0],headers={'User-Agent': 'Mozilla/5.0'}) try: story_page=urllib2.urlopen(story_req,timeout=5) story_soup=BeautifulSoup(story_page) # analyse text visible_text = story_soup.getText() neg=0 pos=0 sentences = tokenize.sent_tokenize(visible_text) sid = SentimentIntensityAnalyzer() # for each sentence in the story, get the sentiment/polarity for sentence in sentences: ss = sid.polarity_scores(sentence) neg=neg+ss['neg'] pos=pos+ss['pos'] sentence_count=sentence_count+ len(sentences) score=score+pos-neg # print out a story by story sentiment for logging print 'Sentiment: ',(pos-neg)/len(sentences) except socket.timeout as e: print type(e) except urllib2.HTTPError: print 'failed on url: ',a.attrs['href'] story_count=story_count+1 # return the average net polarity/sentiment return (pos-neg)/sentence_count
def ProcessReviews(df, ptype): parse_type = ptype # Divide reviews into individual sentences sentences = df['text'].apply(tokenizetext) # Stick the sentences back into the dataframe df['sentlist'] = sentences d1, d2, d3 = [], [], [] d4, d5, d6 = [], [], [] # Initialize the sentiment vader analyzer sid = SentimentIntensityAnalyzer() # Loop over sentences and process them for i in range(0, df.shape[0]): sent_list = df['sentlist'][i] for sentence in sent_list: sent_raw = ''.join(sentence) sent_pro = strip_punctuation(sent_raw) sent_pro = rmstopwords(sent_pro) sent_pro = lemmatize(sent_pro) sentiment = sid.polarity_scores(sent_raw)['compound'] if parse_type[0] == 'ngram': pos = ngrams(sent_pro, ptype[1]) elif parse_type == 'chunk': pos = extract_candidate_chunks(sent_pro) elif parse_type == 'rake': pos = rake_object.run(sent_raw) pos = ['_'.join(word[0].split()) for word in pos] for j in pos: d1.append(df['date'][i]) d2.append(df['location'][i]) d3.append(df['rating'][i]) d4.append(j), d5.append(sentiment) d6.append(sent_raw) # Put everything in a dataframe processed_df = pd.DataFrame() processed_df['date'] = d1 processed_df['location'] = d2 processed_df['rating'] = d3 processed_df['aspects'] = d4 processed_df['sentiment'] = d5 processed_df['context'] = d6 # Remove any entry where the sentence # was determined to be neutral processed_df = processed_df[(processed_df['sentiment'] != 0)] return processed_df
def clean_df(user_df): user_df['simple_text'] = user_df['text'].apply(lambda x: remove_links(x)) #A warning will appear if Twython is not installed sid = SentimentIntensityAnalyzer() user_df['sentiment'] = user_df['text'].apply(lambda x: sid.polarity_scores(x)) #keep only fairly positive tweets pos_user_tweets = user_df[user_df['sentiment'].apply(lambda x: x['neg']) < .45] pos_user_tweets['stemmed_text'] = pos_user_tweets['simple_text'].apply(lambda tweet: remove_punctuation(tweet)) pos_user_tweets['stripped_text'] = pos_user_tweets['stemmed_text'].apply(lambda tweet: remove_irrelevant_terms(tweet)) return pos_user_tweets
def run(): nltk_dl_dir = nltk.downloader.Downloader()._download_dir vader_path = os.path.join(nltk_dl_dir, 'sentiment', 'vader_lexicon.zip') if not os.path.exists(vader_path): nltk.download('vader_lexicon') sia = SentimentIntensityAnalyzer() fb_at = click.prompt(__initial_message, type=str, prompt_suffix='>> ') for post in FacebookHandler(fb_at).get_posts(): try: ps = sia.polarity_scores(post['message']) click.echo('%s:\n%s\n' % (post['message'], ps)) except KeyError: pass
def sentiment_analysis(): sid = SentimentIntensityAnalyzer() for country in countries: for query in queries: for evaluator in evaluators[country]: f = open(country + "/" + query + "/suggestions_" + query + "_" + evaluator + ".csv", 'r') g = open(country + "/" + query + "/suggestions_" + query + "_" + evaluator + "_sentiments.csv", 'w') for record in f: sentence = record.split(",")[1] ss = sid.polarity_scores(sentence) record = record.split("\n")[0] record += ", " + str(ss) + "\n" g.write(record)
def sentimentText(chatname): #Text group sentiment text = getListChat(chatname) sia = SentimentIntensityAnalyzer() #Needs to be a string of phrases return sia.polarity_scores(text)
class StdOutListener(StreamListener): def __init__(self, time_interval=60, term='trump'): self.sid = SentimentIntensityAnalyzer() self.anomalyDetector = probabilisticEWMA(term) self.interval = time_interval self.tweetBuffer = deque(maxlen=50) self.negCount = 0 self.siesta = 0 self.nightnight = 0 self.trend = 0 self.start = time.time() def on_data(self, data): if (time.time() - self.start) < self.interval: try: tweet = json.loads(data)['text'] processedTweet = processTweet(tweet) sentiment = self.sid.polarity_scores( processedTweet)['compound'] if sentiment < 0.0: self.negCount += 1 except: pass else: # Number of negative mentions per elapsed time (in seconds) frequency = self.negCount / (time.time() - self.start) # Reseting configurations self.negCount = 0 self.start = time.time() self.tweetBuffer.append(frequency) anom = self.anomalyDetector.predict(self.tweetBuffer) status = 0 # 0 for normal, 1 for positive anomaly, -1 for negative anomaly # Check if the last added data point produced an anomaly if anom: if anom[-1] == len(self.tweetBuffer) - 2: if self.tweetBuffer[-1] > self.tweetBuffer[-2]: self.trend = 1 status = 1 #beforeSpike = self.tweetBuffer[-2] #print '[' + str(datetime.now()) + '] Alert: anomaly detected' else: self.trend = 0 status = -1 #else: # if frequency < beforeSpike: # self.trend = 0 jsonData = { "status": status, "trend": self.trend, "frequency": frequency, "timestamp": datetime.utcnow().isoformat() } postData(jsonData) print "Frequency: ", frequency, " | Trend: ", self.trend, " | Status: ", status return True def on_error(self, status_code): print 'Error:', str(status_code) if status_code == 420: sleepy = 60 * math.pow(2, self.siesta) print "A reconnection attempt will occur in " + \ str(sleepy/60) + " minutes." time.sleep(sleepy) self.siesta += 1 else: sleepy = 5 * math.pow(2, self.nightnight) print "A reconnection attempt will occur in " + \ str(sleepy) + " seconds." time.sleep(sleepy) self.nightnight += 1 return True
cursor.execute( """ INSERT INTO results (stock, mentions) VALUES (%s, %s) """, (i, symbols[i])) cnx.commit() # print(f"{i}: {symbols[i]}") # times.append(symbols[i]) # top.append(f"{i}: {symbols[i]}") # Applying Sentiment Analysis scores, s = {}, {} vader = SentimentIntensityAnalyzer() # adding custom words from data.py vader.lexicon.update(new_words) picks_sentiment = list(symbols.keys())[0:picks_ayz] for symbol in picks_sentiment: stock_comments = a_comments[symbol] for cmnt in stock_comments: score = vader.polarity_scores(cmnt) if symbol in s: s[symbol][cmnt] = score else: s[symbol] = {cmnt: score} if symbol in scores: for key, _ in score.items(): scores[symbol][key] += score[key]
positiveSent = 0 negativeSent = 0 neutralSent = 0 polarity = 0 tweets = [] positive_tweets = [] negative_tweets = [] neutral_tweets = [] begin_time = datetime.datetime.now() for tweet in vaccine_tweets: tweets.append(tweet) analysis = TextBlob(tweet) score = SentimentIntensityAnalyzer().polarity_scores(tweet) neg = score['neg'] neu = score['neu'] pos = score['pos'] comp = score['compound'] polarity += analysis.sentiment.polarity if neg > pos: negative_tweets.append(tweet) negativeSent += 1 elif pos > neg: positive_tweets.append(tweet) positiveSent += 1 elif pos == neg: neutral_tweets.append(tweet)
def scrap_news(): parsed_news = [] from datetime import date today = date.today() # NLTK VADER for sentiment analysis from nltk.sentiment.vader import SentimentIntensityAnalyzer # New words and values new_words = { 'crushes': 10, 'beats': 5, 'misses': -5, 'trouble': -10, 'falls': -100, } # Instantiate the sentiment intensity analyzer with the existing lexicon vader = SentimentIntensityAnalyzer() # Update the lexicon vader.lexicon.update(new_words) # Use these column names columns = ['ticker', 'date', 'time', 'headline'] for url in url_list: r = requests.get(url, headers=headers) soup = BeautifulSoup(r.text, 'html.parser') news = soup.find('table', {'id': 'news-table'}) tr = news.findAll('tr') for x in tr: text = x.get_text() date_scrape = x.td.text.split() headline = x.a.text if len(date_scrape) == 1: time = date_scrape[0] else: date = date_scrape[0] time = date_scrape[1] # Extract the ticker from the file name, get the string up to the 1st '_' ticker = url.split('=')[1] # Append ticker, date, time and headline as a list to the 'parsed_news' list parsed_news.append([ticker, date, time, headline]) # print(parsed_news[:10]) # Convert the list of lists into a DataFrame scored_news = pd.DataFrame(parsed_news, columns=columns) # Iterate through the headlines and get the polarity scores scores = scored_news['headline'].apply(vader.polarity_scores) # Convert the list of dicts into a DataFrame scores_df = pd.DataFrame.from_records(scores) # Join the DataFrames scored_news = scored_news.join(scores_df) # Convert the date column from string to datetime scored_news['date'] = pd.to_datetime(scored_news.date).dt.date # print(scored_news.tail()) # DF TO EXCEL todays_news = scored_news[scored_news['date'] == today] writer = ExcelWriter('sentiment_analysis.xlsx') todays_news.to_excel(writer, 'sentiment') writer.save()
selectDF_quotes_hdfs = selectDF_quotes.select( simulate_time("latestUpdate").cast("Timestamp").alias("Datetime"), simulate_price("latestPrice").alias("latestPrice"), "symbol") selectDF_quotes_es = selectDF_quotes.select( get_id_now("latestUpdate").alias("depotid"), simulate_time("latestUpdate").cast("String").alias("date"), simulate_price("latestPrice").cast("float").alias("latestPrice"), "symbol") # news selectDF_news = parsedDF_news \ .select(F.explode(F.array("news_data")))\ .select("col.*") \ .dropna() ##################### sentiment analysis of news ############################# sia = SentimentIntensityAnalyzer() #update lexicon with open('lexicon_data/final_lex.json', 'r') as fp: final_lex = json.load(fp) final_lex.update(sia.lexicon) sia.lexicon = final_lex def sentiment_analysis(txt1, txt2): text = txt1 + ' ' + txt2 return sia.polarity_scores(text)['compound'] sentiment_analysis_udf = F.udf(sentiment_analysis, FloatType()) ##############################################################################
def get_features(track_id): features_results = sp.audio_features([track_id]) json_features = json.dumps(features_results) features_data = json.loads(json_features) # Convert features dictionary to a list features_list = list(features_data[0].values()) return features_list client_credentials_manager = SpotifyClientCredentials() sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager) sentiment_analyzer = SIA() # IDs of monthly playlists from November 2016 to November 2017 playlist_ids = [ "07zqCIPCroFMKSajvERGvE", "30PgYnoeT2PAgFNuYLR5qd", "1vS1nakUrLYkTd3W0yRMYe", "3scPGVlAn7d74uXRtFnmUC", "5LzBRPVAPYUssZ4ZESnRmH", "6hDHXewz8qBTezvONSqzyl", "00riJCYiVJ1yptAXtv2h6k", "0HxFI5dOlKztf38T9sa0cF", "7EFWm7Mjy6GLJHOEgKEblM", "6YAG0Li1BoUkmhc8iycY6l", "7Iw0yI71QX59zyFq0kAZTS", "69XTCqVzbSWPMLucSvzlLl", "7pRnKuQMkmntEj7Nnj94r0" ] # Audio features feature_names = [ "danceability", "energy", "key", "loudness", "mode", "speechiness",
def sa_tweets(tweets_df, image_path, image_title): """ apply a sentiment classifier to a dataframe of tweets Given a dataframe including a 'text' field, it runs a sentiment classifier and it returns an augmented version of the original dataframe that includes the results of the analysis in the fields 'neg', 'neu', pos', and 'compound'. It also generates a bar graph of the results. Parameters ---------- tweets_df: DataFrame It must include a 'text' field. image_path: str filepath for the bar graph image_title:str image caption Returns ------- DataFrame a tweets' dataframe including the original fields and the new fields 'neg', 'neu', pos', and 'compound' that contain the results of the analysis. """ tweets_df['cleaned_text'] = tweets_df['text'].apply(preprocess_tweet_text) #show the dataframe #tweets_df.head sid = SentimentIntensityAnalyzer() tweets_df['neg'] = 0.0 tweets_df['neu'] = 0.0 tweets_df['pos'] = 0.0 tweets_df['compound'] = 0.0 for i in range(len(tweets_df)): aux_list = dict_converter( sid.polarity_scores(tweets_df.loc[i, 'cleaned_text'])) tweets_df.loc[i, 'neg'] = aux_list[0][1] tweets_df.loc[i, 'neu'] = aux_list[1][1] tweets_df.loc[i, 'pos'] = aux_list[2][1] tweets_df.loc[i, 'compound'] = aux_list[3][1] print(tweets_df[['cleaned_text', 'compound']]) thres = 0.25 num_pos = len(tweets_df[tweets_df.compound >= thres]) num_neg = len(tweets_df[tweets_df.compound <= -thres]) num_neu = len(tweets_df[(tweets_df.compound < thres) & (tweets_df.compound > -thres)]) num_tweets = len(tweets_df.index) per_pos = (num_pos / num_tweets) * 100 per_neg = (num_neg / num_tweets) * 100 per_neu = (num_neu / num_tweets) * 100 print("positive tweets: " + str(num_pos)) print("negative tweets: " + str(num_neg)) print("neutral tweets: " + str(num_neu)) classification = ['Positive', 'Negative', 'Hard to classify/Neutral'] scores = [num_pos, num_neg, num_neu] percentages = [per_pos, per_neg, per_neu] y_pos = np.arange(len(classification)) #plt.bar(y_pos,scores, align='center', color=['#EE442F', '#00ff00', '#0000ff']) plt.bar(y_pos, percentages, align='center', color=['#ABC3C9', '#E0DCD3', '#CCBE9F']) plt.ylabel("Percentage") plt.xticks(y_pos, classification) plt.title(image_title + '(' + str(num_tweets) + ' tweets)') plt.savefig(image_path + '.png') plt.close('all') #plt.show() return tweets_df
def __init__(self, master=None): """This class creates a window with two frames.""" Frame.__init__(self, master) self.grid() self.master.title("Reddit conversation sentiment.") # Configuring the window with rows and columns. for r in range(6): self.master.rowconfigure(r, weight=1) for c in range(6): self.master.columnconfigure(c, weight=1) # Creates two frames self.frame1 = Frame(master, height=100, width=300, bg="white") self.frame1.grid(row=0, column=0, rowspan=6, columnspan=3, sticky=W + E + N + S) self.frame2 = Frame(master, height=100, width=150) self.frame2.grid(row=0, column=3, rowspan=6, columnspan=3, sticky=W + E + N + S) # Creates labels that explain the different filters Label(self.frame2, text="Min. number of participants:").grid(column=0, row=0, sticky='w') Label(self.frame2, text="Max. number of participants:").grid(column=0, row=1, sticky='w') Label(self.frame2, text="Min. length of conversation:").grid(column=0, row=2, sticky='w') Label(self.frame2, text="Max. length of conversation:").grid(column=0, row=3, sticky='w') Label(self.frame2, text="Min. response time in minutes:").grid(column=0, row=4, sticky='w') Label(self.frame2, text="Max. response time in minutes:").grid(column=0, row=5, sticky='w') Label(self.frame2, text="Sentiment:").grid(column=0, row=6, sticky='w') # Create update button Button(self.frame2, text="Update", command=self.start).grid(column=1, row=12) # Create filter buttons self.min_participants = StringVar(self.frame2) self.min_participants.set("2") self.opt = OptionMenu(self.frame2, self.min_participants, "2", "3", "4", "5", "6", "7", "8", "9", "10").grid(column=1, row=0) self.max_participants = StringVar(self.frame2) self.max_participants.set("10") self.opt2 = OptionMenu(self.frame2, self.max_participants, "2", "3", "4", "5", "6", "7", "8", "9", "10").grid(column=1, row=1) # Create entry for min comments self.min_comments = Entry(self.frame2, width=6) self.min_comments.insert(0, "2") self.min_comments.grid(column=1, row=2) # Create entry for max comments self.max_comments = Entry(self.frame2, width=6) self.max_comments.insert(10, "10") self.max_comments.grid(column=1, row=3) # Creates spinbox for min time self.time_value = StringVar(self.frame2) self.time_value.set("0") self.min_time = Spinbox(self.frame2, from_=0, to=2880, increment=1, textvariable=self.time_value, width=4).grid(column=1, row=4) # Creates spinbox for max time self.time_value2 = IntVar(self.frame2) self.time_value2.set("2880") self.max_time = Spinbox(self.frame2, from_=-0, to=2880, increment=1, textvariable=self.time_value2, width=4).grid(column=1, row=5) # Creates an option menu where you can pick the sentiment you want self.sentiment = StringVar(self.frame2) self.sentiment.set("Neutral") self.opt3 = OptionMenu(self.frame2, self.sentiment, "Neutral", "Positive", "Negative").grid(column=1, row=6) # Create a menu instance menu = Menu(self.master) self.master.config(menu=menu) # Create the file objects menu_file = Menu(menu) menu.add_cascade(menu=menu_file, label='File') menu_file.add_command(label='Open', command=self.load) menu_file.add_command(label='Exit', command=self.master.quit) menu_edit = Menu(menu) menu.add_cascade(menu=menu_edit, label='Search') menu_edit.add_command(label='Search username', command=self.search_username) menu_3 = Menu(menu) menu.add_cascade(menu=menu_3, label='Credentials') menu_3.add_command(label='Reset', command=self.reset) menu_3.add_command(label='Change', command=self.newcredentials) # Adds a seperator between menufiles. menu_file.add_separator() # Creates text in the window with a scrollbar wich can be edited . self.S = Scrollbar(self.frame1) self.T = Text(self.frame1) self.S.pack(side=RIGHT, fill=Y) self.T.pack(side=LEFT, fill=Y) self.S.config(command=self.T.yview) self.T.config(yscrollcommand=self.S.set) # Logging in on Reddit. self.username = "******" self.password = "******" self.client_id = "CLIENT_ID" self.client_secret = "CLIENT_SECRET" self.bot_login() self.file = None self.sid = SentimentIntensityAnalyzer()
from speech_helpers import convert_to_wav, show_pydub_stats, transcribe_audio from nltk.sentiment.vader import SentimentIntensityAnalyzer sid = SentimentIntensityAnalyzer()
import ssl try: _create_unverified_https_context = ssl._create_unverified_context except AttributeError: pass else: ssl._create_default_https_context = _create_unverified_https_context nltk.download('punkt') nltk.download('sentiwordnet') nltk.download('wordnet') nltk.download('vader_lexicon') nlp = spacy.load("en_core_web_md") sid = SentimentIntensityAnalyzer() text_with_enteties = "" text_with_pos = "" with open("../tasks/02-structural-linguistics/data/examiner-headlines.txt" ) as file: data = file.readlines() statistic = {'entity': 0, 'more lvl': 0, 'most lvl': 0, 'polarity': 0} for line in tqdm(data): have_entenies = False have_more_lvl = False have_most_lvl = False have_polarity = False doc = nlp(line) for token in doc:
def initial_sentiment_tools(): rs = textacy.resources.DepecheMood(lang="en", word_rep="lemmapos") sid = SentimentIntensityAnalyzer() return rs, sid
data_df['sentiment_lemmatized'] = data_df['tweet_lemmatized'].apply( lambda x: TextBlob(x).sentiment) data_df[['sentiment_lemmatized', 'tweet_lemmatized']].head(10) all_words = ' '.join([text for text in data_df['tweet_lemmatized']]) wordcloud = WordCloud(width=800, height=500, random_state=21, max_font_size=110).generate(all_words) plt.figure(figsize=(10, 7)) plt.imshow(wordcloud, interpolation="bilinear") plt.axis('off') plt.title("Most Common words in column Tweet Lemmatized") plt.show() sid = SentimentIntensityAnalyzer() data_df['sentiment_lemmatized'] = data_df['tweet_lemmatized'].apply( lambda x: sid.polarity_scores(x)) data_df['sentiment_lemmatized'].head(10) def convert(x): if x < -0.05: return 0 elif -0.05 < x < 0.05: return 1 else: return 2 data_df['label_lemmatized'] = data_df['sentiment_lemmatized'].apply(
def main(): print("scraping...") f = open("news.txt", "r") apiKey = f.readline().strip('\n') # https://newsapi.org/docs/endpoints/sources sources = [] google_us_news_index_URL = "https://newsapi.org/v2/sources?category=business&language=en&country=us&apiKey={}".format( apiKey) google_response = requests.get(google_us_news_index_URL) if (google_response.status_code != 200): print(google_response) return google_json = json.loads(google_response.content) for source in range(len(google_json['sources'])): sources.append(google_json['sources'][source]['id']) print("# of sources: ", len(sources)) source_string = ','.join(sources) print(source_string) # overwriting sources so I can test with only one source # source_string = "bloomberg" query_date_from = date.today().replace(month=date.today().month - 1) # print(str(query_date)) # query_date_to = "&" + date.today().replace(month=date.today().month-1) # to query for specific words and such query = "q=AAPL&" # initialize sentiment analyzer sid = SentimentIntensityAnalyzer() # https://newsapi.org/docs/endpoints/everything for page in range(1, 6): google_news_URL = "http://newsapi.org/v2/everything?{}sources={}&from={}&language=en&page={}&apiKey={}".format( query, source_string, query_date_from, page, apiKey) print(google_news_URL + '\n') google_response = requests.get(google_news_URL) if (google_response.status_code != 200): print(google_response) print(google_response.text) return # only get a certain number of results, need to use page=___ in request to get other results # google_content = open("google_reponse.json", "w") google_json = json.loads(google_response.content) # json.dump(google_json, google_content) # google_content.close() for article in range(len(google_json['articles'])): print(google_json['articles'][article]['source']['id']) print(google_json['articles'][article]['publishedAt']) print(google_json['articles'][article]['title']) # print(google_json['articles'][article]['description']) # get vader scores (sentiment analysis) for article titles scores = sid.polarity_scores( google_json['articles'][article]['title']) for k in sorted(scores): print('{0}: {1}, '.format(k, scores[k]), end='') print() print("-------------------") # Database schema # companies : id , name , symbol # articles : id , source_id , author , date , title , description , compound_vader , neg_vader , neu_vader , pos_vader # if we also store sources, we could get the average of the vader scores they give for different companies (source x vader x company) # would need to be stored in a different table # sources table and sources_company_vader table # start of cnn request, couldn't get api key '''
def doAll(trainFileName, testFileName): trainSet = makeListEntries(trainFileName) testSet = makeListEntries(testFileName) """**************************************""" # data listTrainText = makeListText(trainSet) listTestText = makeListText(testSet) # target listTrainStars = makeListStars(trainSet) listTestStars = makeListStars(testSet) """*************************************""" # could do CountVectorizer cv = CountVectorizer(stop_words='english') trainCVMatr = cv.fit_transform(listTrainText) testCVMatr = cv.transform(listTestText) # could do TfidfVectorizer # tv = TfidfVectorizer(stop_words = 'english') # trainTVMatr = cv.fit_transform(listTrainText) # testTVMatr = cv.transform(listTestText) """*************************************""" # using CountVectorizer LR_CV_model = LogisticRegression(multi_class='multinomial', max_iter=1000, class_weight='balanced') LR_CV_model.fit(trainCVMatr, listTrainStars) # get it to predict LR_CV_prediction = LR_CV_model.predict(testCVMatr) # get accuracy score LR_CV_score = metrics.accuracy_score(listTestStars, LR_CV_prediction) LR_CV_f1 = metrics.f1_score(listTestStars, LR_CV_prediction, average='micro') LR_CV_r2 = metrics.r2_score(listTestStars, LR_CV_prediction, multioutput='variance_weighted') LR_my = betterScoring(listTestStars, LR_CV_prediction) # this is the bit with the tfidf vectorizer # LR_TV_model = LogisticRegression(multi_class = 'multinomial', max_iter=1000) # LR_TV_model.fit(trainTVMatr, listTrainStars) # get it to predict # LR_TV_prediction = LR_TV_model.predict(testTVMatr) # get accuracy score # LR_TV_score = metrics.accuracy_score(listTestStars, LR_TV_prediction) # what do the data say? #print("Multiclass, logistic regression, CountVectorizer: " + str(LR_CV_score)) #print("Multiclass, logistic regression, TfidfVectorizer: " + str(LR_TV_score)) """*************************************""" # using CountVectorizer NB_CV_model = MultinomialNB() NB_CV_model.fit(trainCVMatr, listTrainStars) # get it to predict NB_CV_prediction = NB_CV_model.predict(testCVMatr) # get accuracy score NB_CV_score = metrics.accuracy_score(listTestStars, NB_CV_prediction) NB_CV_f1 = metrics.f1_score(listTestStars, NB_CV_prediction, average='micro') NB_CV_r2 = metrics.r2_score(listTestStars, NB_CV_prediction, multioutput='variance_weighted') NB_my = betterScoring(listTestStars, NB_CV_prediction) # this is the bit with the tfidf vectorizer # NB_TV_model = MultinomialNB() # NB_TV_model.fit(trainCVMatr, listTrainStars) # get it to predict # NB_TV_prediction = NB_TV_model.predict(testTVMatr) # get accuracy score # NB_TV_score = metrics.accuracy_score(listTestStars, NB_TV_prediction) # what do the data say? #print("Naive Bayes, CountVectorizer: " + str(NB_CV_score)) # print("Naive Bayes, TfidfVectorizer: " + str(NB_TV_score)) """*************************************""" sid = SentimentIntensityAnalyzer() listOfRes = [] data2 = [json.loads(line) for line in open(testFileName, 'r')] for entry in data2: listOfRes.append(sid.polarity_scores(entry['review_body'])['compound']) scaledRes = [] size = len(listOfRes) for i in range(size): num = listOfRes[i] score = -1 if num >= q0 and num < q1: score = 1 elif num >= q1 and num < q2: score = 2 elif num >= q2 and num < q3: score = 3 elif num >= q3 and num < q4: score = 4 elif num >= q4 and num <= q5: score = 5 # add score back in scaledRes.append(score) vader_acc = metrics.accuracy_score(listTestStars, scaledRes) vader_f1 = metrics.f1_score(listTestStars, scaledRes, average='micro') vader_r2 = metrics.r2_score(listTestStars, scaledRes, multioutput='variance_weighted') vader_my = betterScoring(listTestStars, scaledRes) """*************************************""" # dealing with the ordinal regression ord_model = OrdinalClassifier(DecisionTreeClassifier()) ord_model.fit(trainCVMatr, listTrainStars) ord_model_prediction = ord_model.predict(testCVMatr) size = len(listTestStars) for i in range(size): if (ord_model_prediction[i] < 1): ord_model_prediction[i] = 1 ord_acc = metrics.accuracy_score(listTestStars, ord_model_prediction) ord_f1 = metrics.f1_score(listTestStars, ord_model_prediction, average='micro') ord_r2 = metrics.r2_score(listTestStars, ord_model_prediction, multioutput='variance_weighted') ord_my = betterScoring(listTestStars, ord_model_prediction) """*************************************""" # trying mord arr = np.asarray(listTrainStars) clf2 = mord.LogisticAT(alpha=1.) clf2.fit(trainCVMatr, arr) clf2_prediction = clf2.predict(testCVMatr) LAT_acc = metrics.accuracy_score(listTestStars, clf2_prediction) LAT_f1 = metrics.f1_score(listTestStars, clf2_prediction, average='micro') LAT_r2 = metrics.r2_score(listTestStars, clf2_prediction, multioutput='variance_weighted') LAT_my = betterScoring(listTestStars, clf2_prediction) #print('AccuracyScore of LogisticAT %s' % #metrics.accuracy_score(listTestStars, clf2.predict(testCVMatr))) clf3 = mord.LogisticIT(alpha=1.) clf3.fit(trainCVMatr, arr) clf3_prediction = clf3.predict(testCVMatr) LIT_acc = metrics.accuracy_score(listTestStars, clf3_prediction) LIT_f1 = metrics.f1_score(listTestStars, clf3_prediction, average='micro') LIT_r2 = metrics.r2_score(listTestStars, clf3_prediction, multioutput='variance_weighted') LIT_my = betterScoring(listTestStars, clf3_prediction) #print('AccuracyScore of LogisticIT %s' % #metrics.accuracy_score(listTestStars, clf3.predict(testCVMatr))) clf4 = mord.LogisticSE(alpha=1.) clf4.fit(trainCVMatr, arr) clf4_prediction = clf4.predict(testCVMatr) LSE_acc = metrics.accuracy_score(listTestStars, clf4_prediction) LSE_f1 = metrics.f1_score(listTestStars, clf4_prediction, average='micro') LSE_r2 = metrics.r2_score(listTestStars, clf4_prediction, multioutput='variance_weighted') LSE_my = betterScoring(listTestStars, clf4_prediction) #print('AccuracyScore of LogisticSE %s' % #metrics.accuracy_score(listTestStars, clf4.predict(testCVMatr))) """*************************************""" # return value categoryName = trainFileName.replace("dataset/prodAnalysis/train_", "") categoryName = categoryName.replace(".json", "") return [ categoryName, LR_CV_score, LR_CV_f1, LR_CV_r2, LR_my, NB_CV_score, NB_CV_f1, NB_CV_r2, NB_my, vader_acc, vader_f1, vader_r2, vader_my, ord_acc, ord_f1, ord_r2, ord_my, LAT_acc, LAT_f1, LAT_r2, LAT_my, LIT_acc, LIT_f1, LIT_r2, LIT_my, LSE_acc, LSE_f1, LSE_r2, LSE_my, ]
import re from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA import nltk from matplotlib import pyplot as plt import datetime from datetime import date import time import seaborn as sns import math import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.metrics import mean_squared_error nltk.download('vader_lexicon') sia = SIA() #=============================================================================== #dataset #=============================================================================== response = pd.read_html( "https://finance.yahoo.com/quote/GOOG/history?period1=1555525800&period2=1571337000&interval=1d&filter=history&frequency=1d" ) df = pd.DataFrame() for data in response: df = data df = df[:-1] df = df.reindex(index=df.index[::-1]) datetime.datetime.strptime(df['Date'][0], '%b %d, %Y') df.loc[:, 'Date'] = pd.to_datetime(df['Date'], format='%b %d, %Y') df.columns = [str(x).lower().replace(' ', '_') for x in df.columns] df.sort_values(by='date', inplace=True, ascending=True)
#How to perform sentiment analysis in python? import nltk nltk.download('vader_lexicon') nltk.download('punkt') from nltk.sentiment.vader import SentimentIntensityAnalyzer sid = SentimentIntensityAnalyzer() message_text = '''kiddy is a good boy''' print(message_text) scores = sid.polarity_scores(message_text) for key in sorted(scores): print('{0}: {1}, '.format(key, scores[key]), end='') #output: #good #compound: 0.4404, neg: 0.0, neu: 0.0, pos: 1.0 #bad #compound: -0.5423, neg: 1.0, neu: 0.0, pos: 0.0 #pranay is a bad boy. he is a psycho. i hate him. i want to kill him. #compound: -0.9118, neg: 0.536, neu: 0.405, pos: 0.059,
def get_dressed(data, kb): result = [] translator = str.maketrans('', '', string.punctuation) remove_terms = ['#goldenglobes', 'golden globes', '#goldenglobe', 'golden globe', 'goldenglobes', 'goldenglobe', 'golden', 'globe', 'globes'] stop = remove_terms include_terms = ['dress', 'fashion', 'red', 'carpet', 'haute couture', 'gown', 'design', 'look'] for tweet in data: tweet = re.sub("\d+", "", tweet) #strip nums tweet = re.sub(r'http\S+', '', tweet) #strip urls tweet = re.sub(r'#\S+', '', tweet) #strip hashtags tweet = tweet.translate(translator) #strip non-alphanumeric characters for i in stop: for j in tweet: if i.lower() in j.lower(): tweet.remove(j) if any(term in tweet for term in include_terms): result.append(tweet) # extract people and put in dictionary with compound scores sentiment_analyzer = SentimentIntensityAnalyzer() score_dict = {} best_dressed_dict = {} worst_dressed_dict = {} for tweet in result: all_scores = sentiment_analyzer.polarity_scores(tweet) for k in sorted(all_scores): if k == 'compound': useful_score = all_scores[k] if tweet: # Get all possible bigrams & trigrams in a tweet gram = list(nltk.everygrams(tweet.split(), 2, 3)) # Filter through and append to list for tweet for g in gram: if len(g) == 2: if bool(re.match(r'\b[A-Z][a-z]+\b', g[0])) and bool(re.match(r'\b[A-Z][a-z]+\b', g[1])): name = ' '.join(g).lower() if useful_score > 0: if name in best_dressed_dict: best_dressed_dict[name] += useful_score else: best_dressed_dict[name] = useful_score if useful_score < 0: if name in worst_dressed_dict: worst_dressed_dict[name] += useful_score else: worst_dressed_dict[name] = useful_score else: if bool(re.match(r'\b[A-Z][a-z]+\b', g[0])) and bool(re.match(r'\b[A-Z][a-z]+\b', g[1])) and bool(re.match(r'\b[A-Z][a-z]+\b', g[2])): name = ' '.join(g).lower() if useful_score > 0: if name in best_dressed_dict: best_dressed_dict[name] += useful_score else: best_dressed_dict[name] = useful_score if useful_score < 0: if name in worst_dressed_dict: worst_dressed_dict[name] += useful_score else: worst_dressed_dict[name] = useful_score # look in kb for matches final_dict = {} for best, worst in zip(best_dressed_dict, worst_dressed_dict): if best.lower() in kb: final_dict[best] = best_dressed_dict[best] if worst.lower() in kb: final_dict[worst] = worst_dressed_dict[worst] # get key with max value best_dressed = [] worst_dressed = [] while len(best_dressed) < 5: best_person = max(final_dict.items(), key=lambda k: k[1])[0] worst_person = min(final_dict.items(), key=lambda k: k[1])[0] best_dressed.append(best_person) worst_dressed.append(worst_person) del final_dict[best_person] del final_dict[worst_person] return best_dressed, worst_dressed
import sys import re import matplotlib.pyplot as plt import nltk from utilities import cleanText from nltk.sentiment.vader import SentimentIntensityAnalyzer sentiment_analyzer = SentimentIntensityAnalyzer() def analyze(name): linesList = cleanText(name) neutral, negative, positive = 0, 0, 0 for index, sentence in enumerate(linesList): # print("Processing {0}%".format(str((index * 100) / len(linesList)))) if re.match(r'^[\w]', sentence) is None: continue scores = sentiment_analyzer.polarity_scores(sentence) scores.pop('compound', None) maxAttribute = max(scores, key=lambda k: scores[k]) if maxAttribute == "neu": neutral += 1 elif maxAttribute == "neg": negative += 1 else: positive += 1
#print(len(word_and_adjectives_hash_synset)) #print(len(lem_nouns)) new_most_common_nouns=updated_lem_nouns.most_common(20) # print("------------------------------------------------------------------------") for mcn in new_most_common_nouns: d = word_and_adjectives_hash_synset[mcn[0]] d= d.most_common(10) print(mcn[0], mcn[1]) # for i in d: # print (" ",i) #sentiment analysis from nltk.sentiment.vader import SentimentIntensityAnalyzer sid = SentimentIntensityAnalyzer() #calculate average score for each feature for mcn in new_most_common_nouns: d = word_and_adjectives_hash_synset[mcn[0]] d= d.most_common(100) average_score=0 total_adj=1 print(mcn[0]) for i in d: cmp_score=sid.polarity_scores(i[0])["compound"] #Ignore neutral compound score [0], for negetive score, consider it as 0, and positive as 1 if cmp_score!=0: if cmp_score < 0: cmp_score=0 else:
def get_vader_sentiment(textstr): # Instantiate the sentiment intensity analyzer vader = SentimentIntensityAnalyzer() return vader.polarity_scores(textstr)
import os import json import shutil import time from collections import Counter from nltk.corpus import stopwords from nltk.sentiment.vader import SentimentIntensityAnalyzer from nltk.util import ngrams from .dataset import TwitterDataSet from .tokenizer import Tokenizer default_sid = SentimentIntensityAnalyzer() DEFAULT_CONFIG_DIR = "config" DEFAULT_INPUT_DATA_DIR = "data" DEFAULT_OUTPUT_DATA_DIR = "processed_data" DEFAULT_RESULT_DIR = "results" DEFAULT_EXCEPTION_LIST = ['HASH_TAG', 'URL', 'I', 'AMP'] MAX_N_GRAM = 4 TOP_N_VALUES = 5 class Analyzer: tokenizer = Tokenizer() def __init__(self, config_dir=DEFAULT_CONFIG_DIR, data_dir=DEFAULT_INPUT_DATA_DIR,
def manageRequest(): # some useful initialisation print('--->', 'TR' in request.form.values()) theInputForm = InputTextForm() userText = "and not leave this empty!" typeText = "You should write something ..." language = "EN" language = request.form['lang'] # which language? # POST - retrieve all user submitted data if theInputForm.validate_on_submit(): userText = theInputForm.inputText.data typeText = "Your own text" # DEBUG flash('read: %s' % typeText) stemmingEnabled = request.form.get("stemming") stemmingType = TextAnalyser.NO_STEMMING if stemmingEnabled: if request.form.get("engine"): stemmingType = TextAnalyser.STEM else: stemmingType = TextAnalyser.LEMMA else: stemmingType = TextAnalyser.NO_STEMMING #flash('read: %s' % stemmingEnabled) # Which kind of user action ? if 'TR' in request.form.values(): print("888888888888888") blob = TextBlob(userText, analyzer=NaiveBayesAnalyzer()) translate_to = language print("translate_response:--------->", translate_to) print(blob, translate_to) if (translate_to.lower() == 'french'): resp = str(blob.translate(to='fr')) elif (translate_to.lower() == 'telugu'): resp = str(blob.translate(to='te')) elif (translate_to.lower() == 'hindi'): resp = str(blob.translate(to='hi')) elif (translate_to.lower() == 'german'): resp = str(blob.translate(to='de')) elif (translate_to.lower() == 'greek'): resp = str(blob.translate(to='el')) elif (translate_to.lower() == 'spanish'): resp = str(blob.translate(to='es')) return render_template('translation_results.html', title='Translation', translation=resp) elif 'BA' in request.form.values(): # GO Text Analysis print("------------") myText = TextAnalyser(userText, language) # new object myText.preprocessText( lowercase=theInputForm.ignoreCase.data, removeStopWords=theInputForm.ignoreStopWords.data, stemming=stemmingType) # display all user text if short otherwise the first fragment of it if len(userText) > 99: fragment = userText[:99] + " ..." else: fragment = userText # check that there is at least one unique token to avoid division by 0 if myText.uniqueTokens() == 0: uniqueTokensText = 1 else: uniqueTokensText = myText.uniqueTokens() # render the html page return render_template('results.html', title='Text Analysis', inputTypeText=typeText, originalText=fragment, numChars=myText.length(), numSentences=myText.getSentences(), numTokens=myText.getTokens(), uniqueTokens=uniqueTokensText, commonWords=myText.getMostCommonWords(10)) else: print("11111111111111") sid = SentimentIntensityAnalyzer() scores = sid.polarity_scores(userText) if scores['compound'] > 0: sentiment = 'Positive' elif scores['compound'] < 0: sentiment = 'Negative' else: sentiment = 'Neutral' return render_template('sentiment.html', title='Text Analysis', sentiment_scores=scores, sentiment=sentiment)
full_df=features_df, previous=True) print("Done process thread 2") features_df['following_staff_post_id'] = features_df.apply(process_thread, axis=1, full_df=features_df, filter_for_staff=True) print("Done process thread 3") features_df['previous_staff_post_id'] = features_df.apply(process_thread, axis=1, full_df=features_df, previous=True, filter_for_staff=True) print("Done process thread 4") # drop unneeded columns for model # 'root_post' and 'thread' are the same drop_features = ['body', 'root_post'] features_df = features_df.drop(drop_features, axis=1) print('-- Writing data to {} -- '.format(features_location)) features_df.to_csv(features_location, index=False) return features_df if __name__ == "__main__": vader = SentimentIntensityAnalyzer() lexicon = Empath() main()
def get_sentiments(text): analyzer = SentimentIntensityAnalyzer() return analyzer.polarity_scores(text)
import json import nltk from nltk.sentiment.vader import SentimentIntensityAnalyzer import pandas as pd import numpy as np nltk.download('vader_lexicon') pointMachine = SentimentIntensityAnalyzer() def sentiment_analyzer_scores(x): score = pointMachine.polarity_scores(x) return score def create_dataframe(): my_dataframe = pd.read_json('data/results.json', orient='records') my_dataframe['sentiment'] = my_dataframe['tweet'].apply( lambda x: 'NaN' if pd.isnull(x) else sentiment_analyzer_scores(x)) my_dataframe['num'] = my_dataframe['sentiment'].apply( lambda score_dict: score_dict['compound']) my_dataframe['num_results'] = my_dataframe['num'].apply( lambda c: 'positive' if c > 0.05 else ('negative' if c < -0.05 else 'neutral')) process_json = my_dataframe.to_json(orient='index') parsed = json.loads(process_json) parsed = json.dumps(parsed) return parsed
class Application(Frame): def __init__(self, master=None): """This class creates a window with two frames.""" Frame.__init__(self, master) self.grid() self.master.title("Reddit conversation sentiment.") # Configuring the window with rows and columns. for r in range(6): self.master.rowconfigure(r, weight=1) for c in range(6): self.master.columnconfigure(c, weight=1) # Creates two frames self.frame1 = Frame(master, height=100, width=300, bg="white") self.frame1.grid(row=0, column=0, rowspan=6, columnspan=3, sticky=W + E + N + S) self.frame2 = Frame(master, height=100, width=150) self.frame2.grid(row=0, column=3, rowspan=6, columnspan=3, sticky=W + E + N + S) # Creates labels that explain the different filters Label(self.frame2, text="Min. number of participants:").grid(column=0, row=0, sticky='w') Label(self.frame2, text="Max. number of participants:").grid(column=0, row=1, sticky='w') Label(self.frame2, text="Min. length of conversation:").grid(column=0, row=2, sticky='w') Label(self.frame2, text="Max. length of conversation:").grid(column=0, row=3, sticky='w') Label(self.frame2, text="Min. response time in minutes:").grid(column=0, row=4, sticky='w') Label(self.frame2, text="Max. response time in minutes:").grid(column=0, row=5, sticky='w') Label(self.frame2, text="Sentiment:").grid(column=0, row=6, sticky='w') # Create update button Button(self.frame2, text="Update", command=self.start).grid(column=1, row=12) # Create filter buttons self.min_participants = StringVar(self.frame2) self.min_participants.set("2") self.opt = OptionMenu(self.frame2, self.min_participants, "2", "3", "4", "5", "6", "7", "8", "9", "10").grid(column=1, row=0) self.max_participants = StringVar(self.frame2) self.max_participants.set("10") self.opt2 = OptionMenu(self.frame2, self.max_participants, "2", "3", "4", "5", "6", "7", "8", "9", "10").grid(column=1, row=1) # Create entry for min comments self.min_comments = Entry(self.frame2, width=6) self.min_comments.insert(0, "2") self.min_comments.grid(column=1, row=2) # Create entry for max comments self.max_comments = Entry(self.frame2, width=6) self.max_comments.insert(10, "10") self.max_comments.grid(column=1, row=3) # Creates spinbox for min time self.time_value = StringVar(self.frame2) self.time_value.set("0") self.min_time = Spinbox(self.frame2, from_=0, to=2880, increment=1, textvariable=self.time_value, width=4).grid(column=1, row=4) # Creates spinbox for max time self.time_value2 = IntVar(self.frame2) self.time_value2.set("2880") self.max_time = Spinbox(self.frame2, from_=-0, to=2880, increment=1, textvariable=self.time_value2, width=4).grid(column=1, row=5) # Creates an option menu where you can pick the sentiment you want self.sentiment = StringVar(self.frame2) self.sentiment.set("Neutral") self.opt3 = OptionMenu(self.frame2, self.sentiment, "Neutral", "Positive", "Negative").grid(column=1, row=6) # Create a menu instance menu = Menu(self.master) self.master.config(menu=menu) # Create the file objects menu_file = Menu(menu) menu.add_cascade(menu=menu_file, label='File') menu_file.add_command(label='Open', command=self.load) menu_file.add_command(label='Exit', command=self.master.quit) menu_edit = Menu(menu) menu.add_cascade(menu=menu_edit, label='Search') menu_edit.add_command(label='Search username', command=self.search_username) menu_3 = Menu(menu) menu.add_cascade(menu=menu_3, label='Credentials') menu_3.add_command(label='Reset', command=self.reset) menu_3.add_command(label='Change', command=self.newcredentials) # Adds a seperator between menufiles. menu_file.add_separator() # Creates text in the window with a scrollbar wich can be edited . self.S = Scrollbar(self.frame1) self.T = Text(self.frame1) self.S.pack(side=RIGHT, fill=Y) self.T.pack(side=LEFT, fill=Y) self.S.config(command=self.T.yview) self.T.config(yscrollcommand=self.S.set) # Logging in on Reddit. self.username = "******" self.password = "******" self.client_id = "CLIENT_ID" self.client_secret = "CLIENT_SECRET" self.bot_login() self.file = None self.sid = SentimentIntensityAnalyzer() def newcredentials(self): """This function asks username, password, client id and clientsecret. It then writes the answers to credentials.txt.""" self.username = simpledialog.askstring("Input", "What is the username?") self.password = simpledialog.askstring("Input", "What is the password?") self.client_id = simpledialog.askstring("Input", "What is the client id?") self.client_secret = simpledialog.askstring( "Input", "What is the client secret?") self.bot_login() def reset(self): """This function rewrites credentials.txt with the lines of the back-up.txt.""" self.username = "******" self.password = "******" self.client_id = "_J4D2EctRUBJvg" self.client_secret = "Xx0CvhFNIHhA_d5AfmDhx8sEYOw" self.bot_login() def bot_login(self): """Reads every line from credentials.txt and logs in reddit praw.""" self.r = praw.Reddit(username=self.username, password=self.password, client_id=self.client_id, client_secret=self.client_secret, user_agent="Human Interaction IK bot") try: self.r.user.me() except: self.T.delete(1.0, END) print( "Your credentials are invalid. Please fill in the correct credentials." ) self.T.insert( END, "Your credentials are invalid. Please fill in the correct credentials." ) def filterinput(self, data): """Checks which filters are set and passes the remaining conversations to the print function. It checks for number of participants, length of the conversation, amount of time in between and sentiment.""" try: min_participants = self.min_participants.get() max_participants = self.max_participants.get() min_comments = self.min_comments.get() max_comments = self.max_comments.get() sentiment = self.sentiment.get() min_time = int(self.time_value.get()) * 60 max_time = int(self.time_value2.get()) * 60 count = 1 conv_lst = [] new_conv = [] for user_dic in self.file: conversation = user_dic["Conversation"] participants = user_dic["Participants"] if (len(conversation) >= int(min_comments) and len(conversation) <= int(max_comments) and participants >= int(min_participants) and participants <= int(max_participants)): sent_analyze = self.sentimentfinder(conversation) if sent_analyze == sentiment: prev_timestamp = False for dic in conversation: datetimeFormat = '%Y-%m-%d %H:%M:%S' conv = [dic] conv.sort(key=lambda x: x["Timestamp"]) comment = self.r.comment(id=dic["Comment"]) timestamp = str( datetime.datetime.fromtimestamp( comment.created_utc)) count += 1 if prev_timestamp: diff = datetime.datetime.strptime( timestamp, datetimeFormat ) - datetime.datetime.strptime( prev_timestamp, datetimeFormat) if min_time < diff.seconds and diff.seconds < max_time: new_conv.append(dic) prev_timestamp = timestamp conv_lst.append({ "User": self.user, "Conversation": new_conv, "Participants": participants }) new_conv = [] self.print_by_order(conv_lst) self.frame2.config(state='normal', text="Filter") except TclError: pass def sentimentfinder(self, lst): """Finds the sentiment of a conversation and returns it.""" oldsentiment = 0 message = [] sentimentset = set() for dic in lst: comment = self.r.comment(id=dic["Comment"]) scores = self.sid.polarity_scores(comment.body) newsentiment = scores['compound'] if oldsentiment > newsentiment: sentimentset.add('Negative') elif newsentiment > oldsentiment: sentimentset.add('Positive') else: sentimentset.add('Neutral') oldsentiment = newsentiment if len(sentimentset) == 1: return list(sentimentset)[0] def load(self): """This function allows the user to load in json files.""" file_path = filedialog.askopenfilename(title="Select file", filetypes=[('JSON file', '.json')]) if file_path: self.T.delete(1.0, END) with open(file_path, encoding='utf-8') as f: data = json.loads(f.read()) self.print_by_order(data) self.file = data def start(self): """This function allows threading so the program is going to freeze.""" threading.Thread(target=self.get_comments, daemon=True).start() def search_username(self): """This function allows the user to look for a different username.""" check = True while check: try: self.user = simpledialog.askstring( "Input", "Please insert a username?") check = False except: print("The window glitched. Please try again.") pass self.start() def get_comments(self): """This function searches for all the comments made by the given username. It makes dictionaries as following: {User, Participants, Conversation: {Timestamp, Comment, User}}""" check = True while check: if not self.file: try: self.user = simpledialog.askstring( "Input", "Please insert a username?") if self.user: check = False except: print("The window glitched. Please try again.") pass else: break conv_lst = [] count = 0 try: self.r.redditor(self.user).fullname author_lst = [] for comment in self.r.redditor(self.user).comments.new(limit=100): count += 1 conversation = [] ancestor = comment refresh_counter = 0 while not ancestor.is_root: ancestor = ancestor.parent() if refresh_counter % 9 == 0: ancestor.refresh() refresh_counter += 1 if 2 < len(ancestor.replies) < 10: author_lst.append(ancestor.author) if ancestor.author: conversation.append({ "Timestamp": ancestor.created_utc, "Comment": str(ancestor.id), "User": ancestor.author.name }) else: conversation.append({ "Timestamp": ancestor.created_utc, "Comment": str(ancestor.id), "User": "******" }) for reply in ancestor.replies: try: author_lst.append(reply.author) conversation.append({ "Timestamp": reply.created_utc, "Comment": str(reply.id), "User": reply.author.name }) except: pass conversation = [ dict(tupleized) for tupleized in set( tuple(sorted(item.items())) for item in conversation) ] author_lst = list(set(author_lst)) conv_dic = { "User": self.user, "Conversation": conversation, "Participants": len(author_lst) } conv_lst.append(conv_dic) conversation = [] author_lst = [] self.file = conv_lst self.filterinput(conv_lst) except: print("This username is not available.") def print_by_order(self, data): """This function inserts the conversations in the correct order into the left frame. The correct order is determined by the timestamp.""" self.T.delete(1.0, END) try: dic = data[0] self.user = dic["User"] conv_file = open(self.user + '.json', 'w') json.dump(data, conv_file) self.file = data for user_dic in data: count = 0 conversation = user_dic["Conversation"] prev_timestamp = False for dic in conversation: comment = self.r.comment(id=dic["Comment"]) timestamp = str( datetime.datetime.fromtimestamp(comment.created_utc)) count += 1 datetimeFormat = '%Y-%m-%d %H:%M:%S' if prev_timestamp: diff = abs( datetime.datetime.strptime(timestamp, datetimeFormat) - datetime.datetime.strptime(prev_timestamp, datetimeFormat)) print("({0}) {1} {2}:\t{3}\n".format( count, diff, comment.author, comment.body.split('\n', 1)[0])) self.T.insert( END, "({0}) {1} {2}:\t{3}\n".format( count, diff, comment.author, comment.body.split('\n', 1)[0])) else: print("({0}{1:<9} {2}:\t{3}\n".format( count, ")", comment.author, comment.body.split('\n', 1)[0])) self.T.insert( END, "({0}{1:<9} {2}:\t{3}{2}\n".format( count, ")", comment.author, comment.body.split('\n', 1)[0])) prev_timestamp = timestamp self.T.insert(END, "\n") conv_file.close() except: self.T.insert(END, "No conversations were found.") print("No conversations were found")
data_all = json_data['data']['children'] num_of_posts = 0 while len(data_all) <= 100: time.sleep(2) last = data_all[-1]['data']['name'] url = 'https://www.reddit.com/r/politics/.json?after=' + str(last) req = requests.get(url, headers=hdr) data = json.loads(req.text) data_all += data['data']['children'] if num_of_posts == len(data_all): break else: num_of_posts = len(data_all) sia = SIA() pos_list = [] neg_list = [] for post in data_all: res = sia.polarity_scores(post['data']['title']) # Uncomment the following line to get the polarity results # for each post as shown in article's image # print(res) if res['compound'] > 0.2: pos_list.append(post['data']['title']) elif res['compound'] < -0.2: neg_list.append(post['data']['title']) with open("pos_news_titles.txt", "w", encoding='utf-8', errors='ignore') as f_pos: for post in pos_list:
def search_user(user, count): try: user_timeline = twitter.get_user_timeline(screen_name=user, count=count, tweet_mode='extended') print('Tweets succesfully retrieved!') except: raise ValueError('Tweets could not be retrived!') ################################################################################ # tweets = tw.Cursor(api.search, # q=search_words, # lang="en", # since=date_since).items(50) # Create JSON-file from tweets print('Creating JSON file...') filename = user + '.json' with open(filename, 'w', encoding='utf-8') as file: json.dump(user_timeline, file, sort_keys=True, indent=4) print('{} created'.format(filename)) print(type(filename)) tweet_file = filename # Read JSON-file print("Reading the file...") tweets_2 = read_json(tweet_file) print(type(tweets_2)) # tweets_2.items() print(type(tweets_2)) # initialize tweeter tokenizer tTokenizer = TweetTokenizer() # initialize NLTK sentiment analyzer sid = SentimentIntensityAnalyzer() # initialize cumulator for avg. tss1 = 0.0 tss2 = 0.0 # Sentiment analysis print("Analyzing...") print( """-------------------------------------------------------------------------------------------------------------------------------- TextBlob | NLTK | Tweet text | Sentences | Words | Unique Words --------------------------------------------------------------------------------------------------------------------------------""" ) for tweet in tweets_2: # get text from tweets if tweet['truncated']: line = tweet['extended_tweet']['full_text'] elif 'text' in tweet: line = tweet['text'] else: line = tweet['full_text'] # delete Leading & trailing characters line = line.strip() # Remove links line = remove_links(line) # Remove new line line = line.replace('\n', ' ') # Remove Non-ascii line = NormalizeText.remove_nonascii(line) # Tokenize text tweet_sent = sent_tokenize(line) # Tokenize sentences tweet_word = tTokenizer.tokenize(line) tweet_unique = list(set(tweet_word)) # Eliminate duplicated words # Analyse sentiment ss1 = TextBlob(line) ss2 = sid.polarity_scores(line) # Update Cumulators tss1 += ss1.sentiment.polarity tss2 += ss2['compound'] # Add updates to the JSON-file tweet.update({ 'sentiment': { 'textblob': ss1.sentiment.polarity, 'nltk': ss2['compound'] } }) # Show results print('{:8.2f} | {:5.2f} | {:72.72} | {:9d} | {:6d} | {:12d}'.format( ss1.sentiment.polarity, ss2['compound'], line, len(tweet_sent), len(tweet_word), len(tweet_unique))) print('\n-------------------------') print('Sentiment analysis summary:') print('TextBlob Average {:.2f}'.format(tss1 / len(tweets_2))) print('NLTK Average {:.2f}'.format(tss2 / len(tweets_2))) # Update JSON file print('\n') print('*****************') print('Updating JSON file...') with open(filename, 'w', encoding='utf-8') as file: json.dump(tweets_2, file, sort_keys=True, indent=4) print('File {} updated. Process complete!'.format(filename)) return tweets_2 file.close()
def __init__(self): self.analyzer = SentimentIntensityAnalyzer()