def getStateCodeFromLocation(location): location = location.upper() if len(location) == 2 and states.has_key(location): return location elif inv_states.has_key(location): return inv_states[location] else: cleanLocation = cleanString(location).lower() for shortName, longName in states.iteritems(): if cleanLocation.find(longName.lower()) != -1: return shortName continue return ''
import sys import json import re from states import states inv_states = {v.lower(): k for k, v in states.iteritems()} def cleanString(string): return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ", string).split()).lower() def parseSentimentFile(fp): scores = {} for line in fp: term, score = line.split("\t") # The file is tab-delimited. "\t" means "tab character" scores[term] = int(score) # Convert the score to an integer. return scores def getTweetState(tweet): location = '' if ( tweet['place'] and tweet['place']['country_code'] == 'US' and tweet['place']['place_type'] == 'admin' ): location = tweet['place']['name'] elif tweet['user']['location']: location = tweet['user']['location'] return getStateCodeFromLocation(location) def getStateCodeFromLocation(location): location = location.upper()