def wordCounter(dictData, tweet_clean, tweet_geo): city = getCity.getCity(tweet_geo[0][1],tweet_geo[0][0]) for word in tweet_clean: if city not in dictData: dictData[city] = {} if word not in dictData[city].keys(): dictData[city][word] = 0 # if 'USA' not in dictData: # dictData['USA'] = 0 # if word not in dictData['USA'].keys(): # dictData['USA'][word] = 0 dictData[city][word] += 1 # if city != 'USA': # dictData['USA'][word] += 1 doc = db.WordCount.find_one({'city': city}) if doc == None: return wordnums = len(doc['data']) wordnum = str(wordnums+1) # if db.WordCount.find_one({"city":"USA","data.word":word}): # db.WordCount.update({"city":"USA","data.word":word},{'$set':{'data.$.word':word, 'data.$.count':dictData['USA'][word]}},True) # else: # db.WordCount.update({"city":"USA","data.word":{'$exists':True}},{'$set':{'data.' + wordnum + '.word':word, 'data.' + wordnum + '.count':dictData['USA'][word]}}) if db.WordCount.find_one({"city":city,"data.word":word}): db.WordCount.update({"city":city,"data.word":word},{'$set':{'data.$.word':word, 'data.$.count':dictData[city][word]}},True) else: db.WordCount.update({"city":city,"data.word":{'$exists':True}},{'$set':{'data.' + wordnum + '.word':word, 'data.' + wordnum + '.count':1}})
def processWordCount(jsonFile,tweet_text,tweet_geo): wordList = tweetTokenizer(tweet_text) city = getCity.getCity(tweet_geo[1],tweet_geo[0]) json1_data = json.loads(jsonFile) for word in wordList: if word not in json1_data[city].keys(): json1_data[city][word] = 0 else: json1_data[city][word] += 1 print json1_data[city]
def processWordCount(tweet_text,tweet_geo): wordList = tweetTokenizer(tweet_text) city = getCity.getCity(tweet_geo[0],tweet_geo[1]) print city, tweet_geo[1], tweet_geo[0] json1_data = MongoToJson() for word in wordList: if city not in json1_data: json1_data[city] = {} if word not in json1_data[city].keys(): json1_data[city][word] = 0 json1_data[city][word] += 1 #db.WordCount.remove() db.WordCount2.update({},json1_data)