def main(): filepath = 'example_tweets.txt' with open(filepath) as fp: line = fp.readline() cnt = 1 while line: line = fp.readline() if line == '': continue print('original message: ' + line.strip()) clean_message = (' '.join( re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", line).split())) clean_message2 = (' '.join( re.sub("(@[A-Za-z0-9]+)", " ", line).split())) clean_message3 = (' '.join( re.sub("(@[A-Za-z0-9]+)", " ", line).split())) clean_message4 = (' '.join( re.sub("(\w+:\/\/\S+)", " ", line).split())) clean_message5 = (' '.join(re.sub("(#.+)", " ", line).split())) clean_message6 = (' '.join(re.sub("(#.*$)", " ", line).split())) clean_message7 = (' '.join(re.sub("(#.*)", " ", line).split())) clean_message8 = (' '.join(re.sub("(#\w+)", " ", line).split())) clean_message9 = (' '.join( re.sub( "(#\w+)|(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", line).split())) #clean_message3 = (' '.join(re.sub((#text;"#\\w*")"," ", line).splsit()) print('cleaned message: ' + clean_message) print('cleaned message2: ' + clean_message2) print('cleaned message3: ' + clean_message3) print('cleaned message4: ' + clean_message4) print('cleaned message5: ' + clean_message5) print('cleaned message6: ' + clean_message6) print('cleaned message7: ' + clean_message7) print('cleaned message8: ' + clean_message8) print('cleaned message9: ' + clean_message9) print('original sentiment score: ' + str(sentiment_score(line))) print('cleaned sentiment score: ' + str(sentiment_score(clean_message))) analysis = TextBlob(clean_message) print('cleaned TextBlob sentiment score: ' + str(analysis.sentiment.polarity)) #print(analysis.sentiment.polarity) #print('cleaned TextBlob sentiment score: ' + (TextBlob(clean_message.sentiment.polarity))) print('\n') cnt += 1
def count_senti(row, counts): try: value = row.value coordinates = value[2]['boundingBox']['coordinates'][0][0] longitude = coordinates[0] latitude = coordinates[1] language = value[3] if language != 'en': return counts city = get_geo(latitude, longitude) if city == '': return counts score = sentiment_score(value[0]) except: return counts data_city = counts.get(city, {}) data_city['totalTweet'] = data_city.get('totalTweet', 0) + 1 data_city['totalSenti'] = data_city.get('totalSenti', 0) + score if score > 0.1: data_city['totalPos'] = data_city.get('totalPos', 0) + 1 data_city.setdefault('totalNeg', 0) else: data_city['totalNeg'] = data_city.get('totalNeg', 0) + 1 data_city.setdefault('totalPos', 0) counts[city] = data_city return counts
def preprocessTweets(data): processed = [] for i in data: tmp = [] tmp.append(i[0].decode("utf-8")) tmp.append(i[1]) tmp.append(len(i[0])) tmp.append(i[2]) tmp.append(i[-1]) tmp.append(sentiment_score(i[0].decode("utf-8"))) processed.append(tmp) return processed
def post(self): tweet = self.get_argument("tweet", default="") hashtag = self.get_argument("hashtag", default="") t = tornado.template.Template(html) if tweet: score = sentiment_score(tweet) self.write(t.generate(tweet_senti=str(score), hashtag_senti="0")) elif hashtag: tweets = api.search(hashtag, count=100) tweets = [tweet.text for tweet in tweets] scores = sentiment_scores_of_sents(tweets) for score, tweet in zip(scores, tweets): print score, tweet.encode('utf8') mean_score = np.mean(scores) self.write(t.generate(tweet_senti="0", hashtag_senti=str(mean_score))) else: self.write(t.generate(tweet_senti="0", hashtag_senti="0"))
def post(self): tweet = self.get_argument("tweet", default="") hashtag = self.get_argument("hashtag", default="") t = tornado.template.Template(html) if tweet: score = sentiment_score(tweet) self.write(t.generate(tweet_senti=str(score), hashtag_senti="0")) elif hashtag: tweets = api.search(hashtag, count=100) tweets = [tweet.text for tweet in tweets] scores = sentiment_scores_of_sents(tweets) for score, tweet in zip(scores, tweets): print score, tweet.encode('utf8') mean_score = np.mean(scores) self.write( t.generate(tweet_senti="0", hashtag_senti=str(mean_score))) else: self.write(t.generate(tweet_senti="0", hashtag_senti="0"))
from sentiment import sentiment_score __author__ = 'ravi' print( sentiment_score("go die"))
#Calculate individual and average sentiment score #Individual sentiment score from sentiment import sentiment_score import json data = [] for line in open('z_test_data.txt'): b = line a = sentiment_score(b) data.append([b,a]) with open('z_saved_data.txt', 'w') as outfile: json.dump(data, outfile)
def on_status(self, status): """ This method is called whenever new data arrives from the live stream. We asynchronously push this data to Kafka queue. """ # msg = status.text.encode('utf-8') try: twitter_json = json.dumps(status._json).encode('utf-8') twitterFilterJSON = json.loads(twitter_json) if twitterFilterJSON["entities"][ 'user_mentions'] is not None and len( twitterFilterJSON["entities"]['user_mentions']) > 0: if twitterFilterJSON['place'] is not None: if not twitterFilterJSON['text'].startswith('RT'): toUser_id = twitterFilterJSON["entities"][ 'user_mentions'][0]["id_str"] twitter_id_json = json.dumps( api.get_user(toUser_id)._json).encode('utf-8') toUserId_filter_JSON = json.loads(twitter_id_json) # print toUserId_filter_JSON if toUserId_filter_JSON[ "profile_location"] is not None: # print (twitterFilterJSON['text']) # print "TO:" + toUserId_filter_JSON["profile_location"]['name'] # print "From:" + str(twitterFilterJSON['place']['full_name']) toLocationCoordinates = self.geolocator.geocode( toUserId_filter_JSON["profile_location"] ['name'].encode('utf-8')) fromLocationCoordinates = self.geolocator.geocode( twitterFilterJSON['place']['full_name'].encode( 'utf-8')) if twitterFilterJSON["lang"].startswith("en"): sentscore = sentiment_score( twitterFilterJSON['text'].encode('utf-8')) if sentscore > 0.5: sentiment = 'Pos' else: sentiment = 'Neg' else: sentiment = 'Pos' #sentiment = "Pos"#subprocess.check_output('python', '~/nn/twitter-sentiment-cnn', '--load', 'run20170423-124859', '--custom_input', twitterFilterJSON['text'].encode('utf-8')) if toLocationCoordinates != None and fromLocationCoordinates != None: tweetJSON = "{\"text\": \"" + twitterFilterJSON['text'].encode( 'utf-8' ).strip( '\\' ) + "\", \"language\": \"" + twitterFilterJSON[ 'lang'].encode( 'utf-8' ) + "\", \"sentiment\": \"" + sentiment.encode( 'utf-8' ) + "\", \"fromLocation\": \"" + twitterFilterJSON[ 'place']['full_name'].encode( 'utf-8' ) + "\", \"fromLocationLat\": \"" + str( fromLocationCoordinates.latitude ) + "\", \"fromLocationLong\": \"" + str( fromLocationCoordinates.longitude ) + "\", \"toLocation\": \"" + toUserId_filter_JSON[ "profile_location"]['name'].encode( 'utf-8' ) + "\", \"toLocationLat\": \"" + str( toLocationCoordinates.latitude ) + "\", \"toLocationLong\": \"" + str( toLocationCoordinates.longitude ) + "\" }".encode('utf-8') print tweetJSON self.producer.send_messages( 'twitterstream', tweetJSON) except Exception as e: # Catch any unicode errors while printing to console # and just ignore them to avoid breaking application. pass print(e) return False return True
file1 = open(OUTPUT_FOLDER_NAME + '/' + keyword['word'] + '.output', 'r') lines = file1.readlines() sttime = json.loads(lines[0])['created_at'] #print(keyword['word']) all_likes = 0 all_retweets = 0 score = 0 num = 0 for line in lines: data = json.loads(line) msg = data['tweet'] likes = data['likes_count'] retweets = data['retweets_count'] like_score = likes * 0.5 rewtweet_score = retweets * 1.0 sentiment = sentiment_score(msg) if (sentiment < 0.3 or sentiment > 0.7): if multiplier == -1: sentiment = 1 - sentiment score += sentiment + like_score + rewtweet_score num += abs((1 + like_score + rewtweet_score)) #Save featured tweets if len(msg) <= featured_char_limit and (not ("@" in msg)) and (not ("https://" in msg)): featured.append(data) all_likes += likes all_retweets += retweets score = score / num averages.append(score) # print("likes: ", all_likes, ", retweets", all_retweets)
df = pd.read_csv(item_path) item_columns = df.columns for review in review_list: if review in item_columns: str_list = df[review] # print str_list score_list = [] count = 0 total_count = len(str_list) print total_count tqdm_iter = tqdm(enumerate(list(str_list))) for i in tqdm_iter: try: str_item = unicode(i[1].encode("utf-8")) count += 1 str_item_score_tmp = sentiment_score(str_item) score_list.append(str_item_score_tmp) except: log_error(f, i[0], str(i[1])) count += 1 score_list.append(str(i[1])) score_list = pd.DataFrame(score_list) df = pd.concat([df, score_list], axis=1) ori_columns = list(df.columns) ori_columns[-1] = review + u'_score' df.columns = pd.Series(ori_columns) save_path = result_path + item df.to_csv(save_path) print df
def main(): #message = "happiness is what" #print (sentiment_score(message)) print(sentiment_score(u"I love you"))
def delete_document(self, dbs_name, document_id, rev): url = 'http://{ip_address}:5984/{dbs_name}/{document_id}?rev={rev}'.format( ip_address=self.ip_address, dbs_name=dbs_name, document_id=document_id, rev=rev) response = requests.delete(url, auth=self.auth) return response.json() if __name__ == '__main__': my_couchdb = couchDb_utils('admin', 'password', 'localhost') # res = my_couchdb.insert_document('demo', {'_id': 'second_record', 'init_balance': 1500}) # res = my_couchdb.get_document('demo', 'second_record') # res = my_couchdb.delete_document('demo', 'second_record', "1-9528dce32655253d363029732a718a23") # print res with open('tinyTwitter.json', 'r') as f: tiny_twitter = json.loads(f.read()) twitters = tiny_twitter['rows'] for twitter in twitters: text = twitter['doc']['text'] matched = re.findall(r'https?://t\.co/.{10}', text, re.MULTILINE) if matched: for strip_str in matched: text = text.replace(strip_str, '') score = sentiment_score(text) data = {'positiveness': score, 'tweet': text} my_couchdb.insert_document('demo', data)