def on_data(self, data): tweetJSON = json.loads(data) lineJSON = {} try: sentiment_line = tweetJSON['text'] lineJSON['text'] = tweetJSON['text'] lineJSON['followers_count'] = tweetJSON['user']['followers_count'] lineJSON['timestamp_ms'] = tweetJSON['timestamp_ms'] lineJSON['text'] = tweetJSON['text'] except KeyError: pass sentiment_line = "" sentiment_score = sentiment.analyse(sentiment_line) lineJSON['sentiment'] = sentiment_score self.pp.pprint(lineJSON) now = time.localtime(time.time())[7] if self.today != now: self.moveToZip(self.today) self.today = now self.save_as_JSON(lineJSON, self.today) if sentiment_score != 0: self.no_of_tweets += 1 self.total_sentiment_score += sentiment_score try: print("AVG sentiment: " + str(self.total_sentiment_score / self.no_of_tweets)) except ZeroDivisionError: pass return True
def eval(file_name, choice): text = list() if choice == 1: text.append(recognize(file_name)) else: with open(file_name, "rt") as f: lines = f.readlines() print(lines) for line in lines: text.append(line) nb_count = 0 svm_count = 0 print(text) total = len(text) for i in text: if analyse(i) <= 0.1: if predict_nb(i) == 'suicidal': nb_count = nb_count + 1 if predict_svm(i) == 'suicidal': svm_count = svm_count + 1 avg_count = (nb_count + svm_count) / 2 if choice == 1: if avg_count == 1: return 'Potentially suicidal content.' else: return 'No suicidal content found.' else: print(avg_count) print(total) percentage = (avg_count / total) * 100 return percentage
if '--chunk' in flags: print('Running Tag and Chunker') news_df['chunked_text'] = chunker.tag_and_chunk(news_df['full_text']) print('Dataset dumped to: news_chunked.csv') news_df.to_csv('news_chunked.csv', index=False, encoding='utf-8') if '--stanford' in flags: print('Running StanfordParser') news_df['stanford_text'] = stanford.parse(news_df['full_text']) print('Dataset dumped to: news_stanford.csv') news_df.to_csv('news_stanford.csv', index=False, encoding='utf-8') if '--dep_parse' in flags: print('Running Dep Parser') news_df['deps_text'] = dependency_parser.parse(news_df['full_text']) print('Dataset dumped to: news_dependency.csv') news_df.to_csv('news_dependency.csv', index=False, encoding='utf-8') if '--named_ents' in flags: print('Running Named Entities Extracter') named_ents = named_entities.parse(news_df['full_text']) print('Dataset dumped to: news_named_ents.csv') named_ents.to_csv('news_named_ents.csv', index=False, encoding='utf-8') if '--sentiment' in flags: print('Running Sentiment Analysis') sentiments = sentiment.analyse(news_df['full_text'], list(news_df['news_category'])) print('Dataset dumped to: news_sentiment.csv') sentiments.to_csv('news_sentiment.csv', index=False, encoding='utf-8')
import sentiment import json # sentiment.analyse expects a list of (review string, rating) tuples. # This example uses academic dataset https://www.yelp.com/academic_dataset # It's around 200 mb so I won't be uploading it to github. labMTPath = "happiness.txt" yelp = "..\Yelp\yelp_phoenix_academic_dataset\yelp_academic_dataset_review.json" def loadYelpData(path): reviews = [] with open(path, "r") as f: for line in f: dict = json.loads(line) reviews += [(dict["text"], float(dict["stars"]))] return reviews sentiment.analyse(loadYelpData(yelp), labMTPath, "yelpSentiment.pkl") sentiment.visualise("yelpSentiment.pkl")
# sentiment.analyse expects a list of (review string, rating) tuples. # This example uses academic dataset https://www.yelp.com/academic_dataset # It's around 200 mb so I won't be uploading it to github. labMTPath = "happiness.txt" grPath = "../../data/goodreads.20130510.txt" def loadYelpData(path): reviews = [] with open(path, "r") as f: for line in f: dict = json.loads(line) reviews += [(dict["text"], float(dict["stars"]))] return reviews def loadGoodreadsData(path): reviews = [] input_ = open(path, "rb") data = pickle.load(input_) for i in range(len(data["stars"])): reviews += [(data["reviews"][i], data["stars"][i])] return reviews data = loadGoodreadsData(grPath) #print data sentiment.analyse(data, labMTPath, "goodreadsSentiment.pkl") sentiment.visualise('goodreadsSentiment.pkl')