def calculateGrowthAndSentiment(self): mintime = min(map(lambda x: x[1], self.text_data)) self.created_at = mintime self.first_growth_time = mintime self.hourly_growth_rate = [0] self.hourly_sentiment = [0] self.hourly_accum_sentiment = [0] self.hourly_keywords = [['']] self.hourly_tags = [['']] hourly_tweets = dict() hourly_tweets_accum = dict() maxidx=0 for tw in zip(self.documents, self.text_data): idx = int((tw[1][1]-mintime)/3600/1000) if idx > maxidx: maxidx = idx while idx >= len(self.hourly_growth_rate): self.hourly_growth_rate.append(0) self.hourly_sentiment.append(0) self.hourly_accum_sentiment.append(0) self.hourly_keywords.append(['']) self.hourly_tags.append(['']) self.hourly_growth_rate[idx] += 1 if not idx in hourly_tweets: hourly_tweets[idx] = [] hourly_tweets[idx].append(tw) for i in range(idx+1): if not i in hourly_tweets_accum: hourly_tweets_accum[i] = [] hourly_tweets_accum[i].append(tw) for idx in range(maxidx+1): if idx in hourly_tweets_accum: hta = list(map(lambda x: x[1][0][0],(hourly_tweets_accum[idx]))) tmp = cluster_exporter.getTagsForTexts(self, hta) self.hourly_keywords[idx]=(cluster_exporter.get_keywords_for_message_list(hta, idfs)[:3]) if not tmp is None: self.hourly_tags[idx]=cluster_exporter.getTagsForTexts(self, hta)[:3] else: self.hourly_tags[idx]=['','',''] if idx in hourly_tweets: cluster_vector = np.mean(list(map(lambda x: x[0],(hourly_tweets[idx]))), axis=0) ht = list(map(lambda x: x[1][0][0],(hourly_tweets[idx]))) self.hourly_sentiment[idx]=getSentiment(cluster_vector) self.hourly_growth_rate[idx] = len(hourly_tweets[idx]) if idx in hourly_tweets_accum: if len(hourly_tweets_accum[idx]) > 0: cluster_vector = np.mean(list(map(lambda x: x[0],(hourly_tweets_accum[idx]))), axis=0) self.hourly_accum_sentiment[idx]=getSentiment(cluster_vector)
def Sentiment(): text = request.values.get("text") score = sentiment.getSentiment(text) data = {"score" : score} return json.dumps(data)
def generateReply(message): pos = getPOS(message) sentiment = getSentiment(message) # If error occurred getting POS if not pos: return "I am not functioning at the moment. Perhaps check your API keys." # If user greeted if pos[0][0].lower() in greetings: return random.choice(greetings_responses) # If user said 'You are ... {adjective} ...' youAreJJ = findYouAreJJ(pos) if youAreJJ: if sentiment >= 0.5: return "Thank you, I know I'm " + youAreJJ + "." else: return "No! I'm not " + youAreJJ + "!" # If user said 'I am ... {adjective} ...' IAmJJ = findIAmJJ(pos) if IAmJJ: if sentiment >= 0.5: return "I'm happy for you that you're " + IAmJJ + "." else: return "Don't be mean on yourself. I'm sure you're not really " + IAmJJ + "!" if sentiment > 0.5: return "I'm happy to hear that!" else: return "I feel sad about that."
def board_list(): sentiment_result = getSentiment() world_result = getWordCloud() print(sentiment_result) return render_template("spray.html")
def symbolSentiment(symbol, methods=["GET", "POST"]): if request.method == "POST": symbol = request.form["symbol"] sentimentData = getSentiment(symbol) return sentimentData
def generateExperimentGroupResponse(user_message, user_id=0): """ Given message from user, generates an answer to the user. """ user_sentiment = getSentiment(user_message) context = chatbot.create_client_context(user_id) bot_message = context.bot.ask_question(context, user_message) bot_sentiment = getSentiment(bot_message) positive_user = user_sentiment > 0 attempts = 1 # If user's sentiment is negative, the bot's response should be more negative. And vice versa for the positive while ((positive_user and (bot_sentiment >= user_sentiment)) or ((not positive_user) and (bot_sentiment <= user_sentiment))): if attempts > SYNONYM_SWAP_ATTEMPT_LIMIT: break try: candidate_message = mutateMessage(bot_message) candidate_sentiment = getSentiment(candidate_message) if positive_user: if candidate_sentiment > bot_sentiment: bot_sentiment = candidate_sentiment bot_message = candidate_message else: if candidate_sentiment < bot_sentiment: bot_sentiment = candidate_sentiment bot_message = candidate_message except SynonymNotFound: # If swapping of the synonym is NOT possible, it should not be possible on the subsequent tries neither attempts += SYNONYM_SWAP_ATTEMPT_LIMIT break attempts += 1 bot_emoji = getEmoji(bot_message) return bot_message + bot_emoji
def index(): if request.form: # Multipart Form data = request.form else: # JSON data = request.get_json() text = data.get("text") sentiment_threshold = float( data.get("sentiment_threshold", DEFAULT_SENTIMENT_THRESHOLD)) return jsonify({"words": getSentiment(text, sentiment_threshold)})
def DoodleView(request, id): response = "This is where %s doodle goes. Expect to see comments here" % Doodle.objects.get(pk=id).title template = loader.get_template("Doodlers/doodle.html") current_doodle = Doodle.objects.get(pk=id) comments = current_doodle.comment_set.all() scores = {} num_bad = 0 for comment in comments: scores[comment.id] = sentiment.getSentiment(comment.comment_text) if scores[comment.id] < 0: num_bad += 1 context = RequestContext( request, {"doodle": current_doodle, "comments": comments, "scores": scores, "num_bad": num_bad} ) return render(request, "Doodlers/doodle.html", context)
def bulkJsonData(json_file, _index, whatStuff): json_list = c.getDataFromFile(json_file) for doc in json_list: json_doc = json.loads(doc) sentiment = [0, 0, 0] # clean the text in comments and title from special character and emojies after json conversion if "data" in json_doc: my_text_location = json_doc["data"][0]["comment"] my_text = my_text_location["comment"] #get sentiment sentiment = s.getSentiment(my_text) clean_my_text = c.cleanText(my_text) my_text_location.update([("comment", clean_my_text)]) json_doc.update([("all_text", clean_my_text)]) if "group" in my_text_location: my_group = my_text_location["group"] clean_my_group = c.cleanText(my_group) my_text_location.update([("group", clean_my_group)]) my_title = json_doc["title"] clean_my_title = c.cleanText(my_title) json_doc.update([("title", clean_my_title)]) # add sentiment json_doc.update([("mySentiment", sentiment[0])]) json_doc.update([("sentPositive", sentiment[1])]) json_doc.update([("sentNegative", sentiment[2])]) # add load_type, used later for filter json_doc.update([("load_type", whatStuff)]) json_doc.update([("source_type", "facebook")]) new_doc = str(json_doc).replace("'", '"') # use a 'yield' generator so that the data isn't loaded into memory if '{"index"' not in new_doc: yield {"_index": _index, "_id": uuid.uuid4(), "_source": new_doc}
def doc_sentiment(params,data): print params['begin_date'], params['fq'] parsed = get_json(params) ret = 0 status = parsed['status'] if status == "OK": resp = parsed['response'] docs = resp['docs'] for doc in docs: sentiment = 0 for p in doc: var = doc[p] if var is not None: if p == "headline": var = var['main'] if p == "keywords": var = [e['value'] for e in var] var = " ".join(var) sentiment+= getSentiment(var) ret+=sentiment return ret
def doc_sentiment(params, data): print params['begin_date'], params['fq'] parsed = get_json(params) ret = 0 status = parsed['status'] if status == "OK": resp = parsed['response'] docs = resp['docs'] for doc in docs: sentiment = 0 for p in doc: var = doc[p] if var is not None: if p == "headline": var = var['main'] if p == "keywords": var = [e['value'] for e in var] var = " ".join(var) sentiment += getSentiment(var) ret += sentiment return ret
def tweets_handler(api, query, cur_date, prev_date, sentiments, all_locs): global count1 """ This methods handle the operations on tweets, from extracting them fro twitter api, to cleaning them and getting sentiments from them """ # Calling subroutine to get tweet objects from specified range tweets = getTweets(api, query, cur_date, prev_date) count1 += 1 if count1 == 7: print("Data Collected", flush=True) print("Calculating Sentiments", flush=True) # Calling subroutine to open up the tweet batches tweets = open_tweet_obj(tweets) # Calling subroutine to remove duplicate tweets, if given by twitter tweets = remove_duplicate_tweets(tweets) # Calling subroutine to extract tweet_text and loc of tweeter from tweet object, now tweets = {"text": ..,"loc":..} tweets = extract_data(tweets) # calling subroutine for removing promotional tweets tweets = remove_promotional_tweets(tweets) # calling subroutine cleaning the tweets # tweets_text = tweet_cleaner(tweets) cur_day_locations = make_loc_dict(tweets) # calling subroutine for getting sentiment from the tweets cur_day_sentiment = getSentiment(tweets, cur_day_locations) # updating sentiments list thread_no = current_thread().name sentiments[int(thread_no) - 1] = cur_day_sentiment all_loc = merge(all_locs, cur_day_locations)
def getSentiment(path): new_data=[] # facebook post if os.path.isfile(path+'json-facebook_data/posts/your_posts_1.json'): json_file = open(path+'json-facebook_data/posts/your_posts_1.json', encoding="utf8", errors='ignore') json_list = [line.strip() for line in json_file] for doc in json_list: json_doc = json.loads(doc) if 'all_text' in json_doc: my_text = json_doc["all_text"] my_time = json_doc["timestamp"] sentiment = s.getSentiment(my_text) new_data.append({"date": my_time , "mySentiment": sentiment[0] ,"sentPositive" : sentiment[1], "sentNegative": sentiment[2], "type": "post", "source" : "facebook"}) # facebook post group if os.path.isfile(path+'json-facebook_data/groups/your_posts_and_comments_in_groups_fixed.json'): json_file1 = open(path+'json-facebook_data/groups/your_posts_and_comments_in_groups_fixed.json', encoding="utf8", errors='ignore') json_list1 = [line.strip() for line in json_file1] for doc in json_list1: json_doc = json.loads(doc) my_time = json_doc["timestamp"] if 'data' in json_doc: for dt in json_doc['data']: if 'post' in dt: my_text = dt["post"] sentiment = s.getSentiment(my_text) new_data.append({"date": my_time , "mySentiment": sentiment[0] ,"sentPositive" : sentiment[1], "sentNegative": sentiment[2], "type": "postGroup", "source" : "facebook"}) # facebook comments if os.path.isfile(path+'json-facebook_data/comments/comments.json'): json_file2 = open(path+'json-facebook_data/comments/comments.json', encoding="utf8", errors='ignore') json_list2 = [line.strip() for line in json_file2] for doc in json_list2: json_doc = json.loads(doc) my_time = json_doc["timestamp"] if "data" in json_doc: my_text_location = json_doc["data"][0]["comment"] my_text = my_text_location["comment"] sentiment = s.getSentiment(my_text) new_data.append({"date": my_time , "mySentiment": sentiment[0] ,"sentPositive" : sentiment[1], "sentNegative": sentiment[2], "type": "comment", "source" : "facebook"}) # twitter if os.path.isfile(path+'json-twitter_data/tweet.json'): json_file3 = open(path+'json-twitter_data/tweet.json', encoding="utf8", errors='ignore') json_list3 = [line.strip() for line in json_file3] for doc in json_list3: json_doc1 = json.loads(doc) if 'tweet' in json_doc1: json_doc = json_doc1["tweet"] else: json_doc=json_doc1 if not json_doc['full_text'].startswith("RT @"): my_text = json_doc["full_text"] my_time = json_doc["created_at"] sentiment = s.getSentiment(my_text) new_data.append({"date": my_time , "mySentiment": sentiment[0] ,"sentPositive" : sentiment[1], "sentNegative": sentiment[2], "type": "tweet", "source" : "twitter"}) return new_data
#directory of emotions emotions = { 'anger': 0, 'fear': 0, 'anticipation': 0, 'surprise': 0, 'sadness': 0, 'joy': 0, 'disgust': 0, 'negative': 0, 'positive': 0 } search_results = api.search(q=trend + '-filter:retweets', count=100, lang='en') for tweet in search_results: #clean tweet, get sentiment, add it to emotion text = tweet.text text = helper.clean_tweet(text) emotion = sentiment.getSentiment(text) for e in emotion: emotions[e] = emotions.get(e) + 1 trendsEmotions.append(max( emotions, key=emotions.get)) #emotion with most tweets wins #change colour of hue hue_control.controlHue(mode(trendsEmotions)) print('done ' + mode(trendsEmotions)) time.sleep(180)
def bulkJsonData(json_file, _index, whatStuff): json_list = c.getDataFromFile(json_file) for doc in json_list: json_doc = json.loads(doc) sentiment=[0,0,0] # use a 'yield' generator so that the data isn't loaded into memory if '{"index"' not in doc: # clean the text in comments and title from special character and emojies after json conversion if 'data' in json_doc: for dt in json_doc['data']: if 'post' in dt: my_text = dt["post"] #get sentiment sentiment = s.getSentiment(my_text) clean_my_text = c.cleanText(my_text) dt.update([ ("post", clean_my_text) ]) json_doc.update([ ("all_text", clean_my_text) ]) if 'attachments' in json_doc: for att in json_doc['attachments']: if 'data' in att: for dt in att['data']: if 'external_context' in dt: if 'name' in dt["external_context"]: my_text2 = dt["external_context"]["name"] clean_my_text2 = c.cleanText(my_text2) dt["external_context"].update([ ("name", clean_my_text2) ]) if 'media' in dt: my_title2 = dt['media']['title'] clean_my_title2 = c.cleanText(my_title2) dt['media'].update([ ("title", clean_my_title2) ]) if 'description' in dt['media']: my_description = dt['media']["description"] clean_my_description = c.cleanText(my_description) dt['media'].update([ ("description", clean_my_description) ]) if 'place' in dt: my_loc = dt["place"]["coordinate"] my_lat = my_loc["latitude"] my_lon = my_loc["longitude"] new_my_loc = [my_lon,my_lat] dt["place"].update([ ("location", new_my_loc) ]) if 'title' in json_doc: my_title = json_doc["title"] clean_my_title = c.cleanText(my_title) json_doc.update([ ("title", clean_my_title) ]) # add sentiment json_doc.update([ ("mySentiment", sentiment[0]) ]) json_doc.update([ ("sentPositive", sentiment[1]) ]) json_doc.update([ ("sentNegative", sentiment[2]) ]) # add load_type, used later for filter json_doc.update([ ("load_type", whatStuff) ]) json_doc.update([ ("source_type", "facebook") ]) new_doc = str(json_doc).replace("'", '"') yield { "_index": _index, "_id": uuid.uuid4(), "_source": new_doc }
def product(id): fileName = id + ".txt" srcComment = "comments/" + fileName srcPhrase = "phrases/" + fileName # if this already been search, load the cache if os.path.exists(srcPhrase) == True: print("loading cache of phrase") else: if os.path.exists(srcComment) == True: print("loading cache of text") else: f = open('id.txt', 'w') f.write(id) f.close() scrapper('id.txt') print('********** scrapping finished *************') # move the review to the autophrase directory desComment = "../AutoPhrase/data/" copyfile(srcComment, desComment + 'review.txt') #subprocess.call(['../AutoPhrase/auto_phrase.sh']) call("./auto_phrase.sh", cwd="../AutoPhrase", shell=True) copyfile("../AutoPhrase/models/AutoPhrase.txt", "phrases/" + fileName) response = {} frequencies = [] phrase_file = 'phrases/' + fileName with open(phrase_file) as fp: lines = fp.readlines() count = 0 for line in lines: if count > 100: break lineContent = line.split() if len(lineContent) >= 2: score = float(lineContent[0]) separator = " " phrase = separator.join(lineContent[1:]) if score == 1: continue if count <= 8: frequencies.append({ "text": phrase, "size": int(score * 80) }) elif count <= 40: frequencies.append({ "text": phrase, "size": int(score * 50) }) elif count <= 70: frequencies.append({ "text": phrase, "size": int(score * 40) }) elif count <= 100 and score > 0.3: frequencies.append({ "text": phrase, "size": max(20, int(score * 20)) }) count += 1 senti = getSentiment(id) response['frequency'] = frequencies response['sentiment'] = senti return jsonify(response)
def bulkJsonData(json_file, _index, whatStuff): json_list = c.getDataFromFile(json_file) for doc in json_list: # use a 'yield' generator so that the data isn't loaded into memory if '{"index"' not in doc: json_doc1 = json.loads(doc) sentiment = [0, 0, 0] if 'tweet' in json_doc1: json_doc = json_doc1["tweet"] else: json_doc = json_doc1 my_text = json_doc["full_text"] #get sentiment if not my_text.startswith("RT"): sentiment = s.getSentiment(my_text) clean_my_text = c.cleanText(my_text) json_doc.update([("full_text", clean_my_text)]) my_text2 = json_doc["source"] clean_my_text2 = c.cleanText(my_text2) json_doc.update([("source", clean_my_text2)]) # Does not like "False", needed to be "false" !!! #my_text1 = json_doc["retweeted"] #clean_my_text1 = c.cleanText(str(my_text1)) #json_doc.update([ ("retweeted", clean_my_text1) ]) #if 'truncated' in json_doc: # my_text3 = json_doc["truncated"] # clean_my_text3 = c.cleanText(str(my_text3)) # json_doc.update([ ("truncated", clean_my_text3) ]) #if 'favorited' in json_doc: # my_text4 = json_doc["favorited"] # clean_my_text4 = c.cleanText(str(my_text4)) # json_doc.update([ ("favorited", clean_my_text4) ]) #if 'possibly_sensitive' in json_doc: # my_text5 = json_doc["possibly_sensitive"] # clean_my_text5 = c.cleanText(str(my_text5)) # json_doc.update([ ("possibly_sensitive", clean_my_text5) ]) if 'in_reply_to_screen_name' in json_doc: my_name = json_doc["in_reply_to_screen_name"] clean_my_name = c.cleanText(my_name) json_doc.update([("in_reply_to_screen_name", clean_my_name)]) if 'user_mentions' in json_doc["entities"]: for usr in json_doc["entities"]['user_mentions']: my_name1 = usr["name"] clean_my_name1 = c.cleanText(my_name1) usr.update([("name", clean_my_name1)]) my_name2 = usr["screen_name"] clean_my_name2 = c.cleanText(my_name2) usr.update([("screen_name", clean_my_name2)]) for usr in json_doc["entities"]['urls']: my_name3 = usr["url"] clean_my_name3 = c.cleanText(my_name3) usr.update([("url", clean_my_name3)]) my_name4 = usr["expanded_url"] clean_my_name4 = c.cleanText(my_name4) usr.update([("expanded_url", clean_my_name4)]) my_name5 = usr["display_url"] clean_my_name5 = c.cleanText(my_name5) usr.update([("display_url", clean_my_name5)]) #if 'media' in json_doc: # my_media = json_doc["media"] # if 'additional_media_info' in my_media: # my_name6 = my_media["additional_media_info"] # clean_my_name6 = c.cleanText(my_name6) # my_media.update([ ("additional_media_info", clean_my_name6) ]) # add sentiment json_doc.update([("mySentiment", sentiment[0])]) json_doc.update([("sentPositive", sentiment[1])]) json_doc.update([("sentNegative", sentiment[2])]) # add load_type, used later for filter json_doc.update([("load_type", whatStuff)]) json_doc.update([("source_type", "twitter")]) new_doc = str(json_doc).replace("'", '"') #print (new_doc) new_doc = new_doc.replace("False", "false") new_doc = new_doc.replace("True", "true") yield {"_index": _index, "_id": uuid.uuid4(), "_source": new_doc}
def SetValues(): ##function to return floating point representation to front end comments = request.form.getlist('comments[]') links = request.form.getlist('links[]') return jsonify({"data": getSentiment(comments, links)})#ValueArray
usernames = json.load(usernamesFile) for d, i in enumerate(usernames): print "checking", d['pageID'] con = sqlite3.connect('scraperwiki.sqlite') query = 'select * from comments where pageID="{pageID}"'.format( pageID=d['pageID']) table = sql.read_sql(query, con) # print table rows = table.iterrows() abuseTweets = [] for row in rows: sentimentResult = sentiment.getSentiment(row[1]['message']) print sentimentResult results.append({ "pol_id": i, "partyCode": d["partyCode"], "gender": d["gender"], "message": row[1]['message'], "positive": sentimentResult['positive'], "negative": sentimentResult['negative'], "createdTime": row[1]['createdTime'] }) # print results with open('facebook-sentiment-test.csv', 'w') as csvoutput: dict_writer = csv.DictWriter(csvoutput, results[0].keys()) dict_writer.writer.writerow(results[0].keys())