def main(): data.city_corpus_dict() #seeds words table with city, word, prob(w/city) data.seed_words_table() data.city_tweet_corpus_dict() for city in cities: data.create_region_tweet_count(city) feature_selection.populate_db_with_features(city) data.create_tweet_total_count()
def get_tweet_word_counts(word, city): city_tweet_corpus_dict = data.city_tweet_corpus_dict() word_tweet_count_dic = city_tweet_corpus_dict[city.name] N11 = word_tweet_count_dic.get(word,0) Ndot_1 = data.create_region_tweet_count(city) N01 = Ndot_1 - N11 N = data.create_tweet_total_count() Ndot_0 = N - Ndot_1 N1_dot = 0 for c in cities: tweet_corpus_d = city_tweet_corpus_dict[c.name] N1_dot += tweet_corpus_d.get(word, 0) N10 = N1_dot - N11 N00 = Ndot_0 - N10 N0_dot = N00 + N01 return { 'N10': float(N10), 'N11': float(N11), 'N01': float(N01), 'N00': float(N00)}
def get_tweet_word_counts(word, city): city_tweet_corpus_dict = data.city_tweet_corpus_dict() word_tweet_count_dic = city_tweet_corpus_dict[city.name] N11 = word_tweet_count_dic.get(word, 0) Ndot_1 = data.create_region_tweet_count(city) N01 = Ndot_1 - N11 N = data.create_tweet_total_count() Ndot_0 = N - Ndot_1 N1_dot = 0 for c in cities: tweet_corpus_d = city_tweet_corpus_dict[c.name] N1_dot += tweet_corpus_d.get(word, 0) N10 = N1_dot - N11 N00 = Ndot_0 - N10 N0_dot = N00 + N01 return { 'N10': float(N10), 'N11': float(N11), 'N01': float(N01), 'N00': float(N00) }