def find_happiest_state(sentiment_file, tweet_file): sentiment_dict = build_dict(sentiment_file) state_sentiment = {} with open(tweet_file, 'r') as tweets: for tweet in tweets: tweet = json.loads(tweet) # Coursera grader raises KeyError, wrap in try/except block try: if tweet['place'] != None: if tweet['place']['country'] == 'United States': tweet_state = tweet['place']['full_name'].split()[-1] if tweet_state in states.keys(): # print tweet_state # print tweet['text'] current_score = score_tweet(tweet['text'], sentiment_dict) # First item is the running count, second is the avg score old_count_and_score = state_sentiment.get(tweet_state, [0, 0]) weighted_old_score = old_count_and_score[0]*old_count_and_score[1] updated_count = old_count_and_score[0] + 1 updated_score = (weighted_old_score + current_score) / updated_count state_sentiment[tweet_state] = [updated_count, updated_score] except KeyError as e: pass # Sort by score (second number in the list of values) sorted_states = sorted(state_sentiment.iteritems(), key=lambda (k,v): (v[1], k), reverse=True) # Print out the score of the first entry. # Use an OrderedDict next time. ;) print sorted_states[1][0]
def report_UNK_avg_sentiment(sentiments_file_name, tweet_file_name): '''Keeps a running dictionary of the average sentiment score for any words not found in the AFINN sentiment word score text file. Calculates based on average sentiment of tweet found in. Then averaged with all other occurrences found in the twitter text file provided. Could return dictionary to keep running affect dictionary between sessions. Args: sentiments_file_name (string): Filename of a text file containing Space delimited AFINN dictionary of word:sentiment-value pairs tweet_file_name (string): Filename of a text file containing a string representation of a json object from the Twitter API on each line. Returns: (None) prints output to stdout ''' AFINN_dict = build_dict(sentiments_file_name) # Build a new dictionary of words not in AFINN # Update as we go along to update sentiment as we see more tweets # Useful if we want to save the dict in the end for later use # Keep separate from AFINN since those words are already scaled # This will have to be in the format: # key: word # value: [times seen, score] # This is to keep a running average of the word sentiment as we see more new_word_AFINN = {} with open(tweet_file_name, 'r') as tweets: for tweet in tweets: # Extract the text of tweet from json object/string tweet = extract_txt_from_json_string(tweet) tweet = strip_punct(tweet) # print 'SANITY CHECK' # print 'Original Tweet: {0}'.format(tweet.encode('utf-8')) tweet_words = [word.lower() for word in tweet.split()] # print 'Words list: {0}'.format(tweet_words) total_tweet_sentiment = score_tweet(tweet, AFINN_dict) for word in tweet_words: try: # If word is in AFINN, print word:value assert(AFINN_dict[word]) print word, AFINN_dict[word] except KeyError as e: # Try to get current tuple value of word not in AFINN, or 0 # if not found new_value_list = new_word_AFINN.get(word, [0, 0]) # update times seen new_value_list[0] += 1 # Set as average sentiment of words in tweet for unseen words word_sent = total_tweet_sentiment / float(len(tweet_words)) # update running average value new_score = (new_value_list[1] + word_sent) / new_value_list[0] new_value_list[1] = new_score new_word_AFINN[word] = new_value_list print word, new_score
def report_UNK_avg_sentiment(sentiments_file_name, tweet_file_name): '''Keeps a running dictionary of the average sentiment score for any words not found in the AFINN sentiment word score text file. Calculates based on average sentiment of tweet found in. Then averaged with all other occurrences found in the twitter text file provided. Could return dictionary to keep running affect dictionary between sessions. Args: sentiments_file_name (string): Filename of a text file containing Space delimited AFINN dictionary of word:sentiment-value pairs tweet_file_name (string): Filename of a text file containing a string representation of a json object from the Twitter API on each line. Returns: (None) prints output to stdout ''' AFINN_dict = build_dict(sentiments_file_name) # Build a new dictionary of words not in AFINN # Update as we go along to update sentiment as we see more tweets # Useful if we want to save the dict in the end for later use # Keep separate from AFINN since those words are already scaled # This will have to be in the format: # key: word # value: [times seen, score] # This is to keep a running average of the word sentiment as we see more new_word_AFINN = {} with open(tweet_file_name, 'r') as tweets: for tweet in tweets: # Extract the text of tweet from json object/string tweet = extract_txt_from_json_string(tweet) tweet = strip_punct(tweet) # print 'SANITY CHECK' # print 'Original Tweet: {0}'.format(tweet.encode('utf-8')) tweet_words = [word.lower() for word in tweet.split()] # print 'Words list: {0}'.format(tweet_words) total_tweet_sentiment = score_tweet(tweet, AFINN_dict) for word in tweet_words: try: # If word is in AFINN, print word:value assert (AFINN_dict[word]) print word, AFINN_dict[word] except KeyError as e: # Try to get current tuple value of word not in AFINN, or 0 # if not found new_value_list = new_word_AFINN.get(word, [0, 0]) # update times seen new_value_list[0] += 1 # Set as average sentiment of words in tweet for unseen words word_sent = total_tweet_sentiment / float(len(tweet_words)) # update running average value new_score = (new_value_list[1] + word_sent) / new_value_list[0] new_value_list[1] = new_score new_word_AFINN[word] = new_value_list print word, new_score