def main(): data.city_corpus_dict() #seeds words table with city, word, prob(w/city) data.seed_words_table() data.city_tweet_corpus_dict() for city in cities: data.create_region_tweet_count(city) feature_selection.populate_db_with_features(city) data.create_tweet_total_count()
def list_features(city_name): cities = data.cities city = None for i in range(0, len(cities)): if cities[i].name == city_name: feature_list = feature_selection.get_hardcoded_features(cities[i]) city = cities[i] if city: latitude = city.lat longitude = city.lon city_name = city.name city_tweet_count = data.create_region_tweet_count(city) city_word_count = data.find_leng_city_corpus(city) return render_template("city_words.html", features= feature_list, city_name=city_name, latitude=latitude, longitude=longitude, city_tweet_count=city_tweet_count, city_word_count=city_word_count)
def classify_text(): tweet = request.form['tweet'] start = datetime.datetime.now() rankings = data.create_ranking(tweet) end = datetime.datetime.now() print 'getting city rankings takes: %s' % (end - start) start = datetime.datetime.now() feature_strings_dict = {} city_corpus_leng_dict = {} city_tweet_count_dict = {} for city in cities: corpus_leng = data.find_leng_city_corpus(city) city_corpus_leng_dict[city.name] = corpus_leng city_tweet_count = data.create_region_tweet_count(city) city_tweet_count_dict[city.name] = city_tweet_count feature_strings = feature_selection.included_feature_strings( city, tweet) feature_strings_dict[city.name] = feature_strings end = datetime.datetime.now() print 'getting top 5 words takes: %s' % (end - start) start = datetime.datetime.now() cty_corpus_dict = data.city_corpus_dict() word_count_dict = cty_corpus_dict[rankings[0][0].name] end = datetime.datetime.now() print 'getting bogus word count dict takes: %s' % (end - start) start = datetime.datetime.now() names = [] for i in range(0, len(rankings)): city_name = rankings[i][0].name names.append(city_name) end = datetime.datetime.now() print 'generating lists takes: %s' % (end - start) return render_template("map.html", tweet=tweet, city_tweet_count_dict=city_tweet_count_dict, names=names, city_corpus_leng_dict=city_corpus_leng_dict, feature_strings_dict=feature_strings_dict, rankings=rankings)
def get_tweet_word_counts(word, city): city_tweet_corpus_dict = data.city_tweet_corpus_dict() word_tweet_count_dic = city_tweet_corpus_dict[city.name] N11 = word_tweet_count_dic.get(word,0) Ndot_1 = data.create_region_tweet_count(city) N01 = Ndot_1 - N11 N = data.create_tweet_total_count() Ndot_0 = N - Ndot_1 N1_dot = 0 for c in cities: tweet_corpus_d = city_tweet_corpus_dict[c.name] N1_dot += tweet_corpus_d.get(word, 0) N10 = N1_dot - N11 N00 = Ndot_0 - N10 N0_dot = N00 + N01 return { 'N10': float(N10), 'N11': float(N11), 'N01': float(N01), 'N00': float(N00)}
def list_features(city_name): cities = data.cities city = None for i in range(0, len(cities)): if cities[i].name == city_name: feature_list = feature_selection.get_hardcoded_features(cities[i]) city = cities[i] if city: latitude = city.lat longitude = city.lon city_name = city.name city_tweet_count = data.create_region_tweet_count(city) city_word_count = data.find_leng_city_corpus(city) return render_template("city_words.html", features=feature_list, city_name=city_name, latitude=latitude, longitude=longitude, city_tweet_count=city_tweet_count, city_word_count=city_word_count)
def classify_text(): tweet = request.form['tweet'] start = datetime.datetime.now() rankings = data.create_ranking(tweet) end = datetime.datetime.now() print 'getting city rankings takes: %s' % (end - start) start = datetime.datetime.now() feature_strings_dict = {} city_corpus_leng_dict = {} city_tweet_count_dict = {} for city in cities: corpus_leng = data.find_leng_city_corpus(city) city_corpus_leng_dict[city.name] = corpus_leng city_tweet_count = data.create_region_tweet_count(city) city_tweet_count_dict[city.name] = city_tweet_count feature_strings = feature_selection.included_feature_strings(city, tweet) feature_strings_dict[city.name] = feature_strings end = datetime.datetime.now() print 'getting top 5 words takes: %s' % (end - start) start = datetime.datetime.now() cty_corpus_dict = data.city_corpus_dict() word_count_dict = cty_corpus_dict[rankings[0][0].name] end = datetime.datetime.now() print 'getting bogus word count dict takes: %s' % (end - start) start = datetime.datetime.now() names = [] for i in range(0, len(rankings)): city_name = rankings[i][0].name names.append(city_name) end = datetime.datetime.now() print 'generating lists takes: %s' % (end - start) return render_template("map.html", tweet=tweet, city_tweet_count_dict=city_tweet_count_dict, names=names, city_corpus_leng_dict=city_corpus_leng_dict, feature_strings_dict=feature_strings_dict, rankings=rankings)
def get_tweet_word_counts(word, city): city_tweet_corpus_dict = data.city_tweet_corpus_dict() word_tweet_count_dic = city_tweet_corpus_dict[city.name] N11 = word_tweet_count_dic.get(word, 0) Ndot_1 = data.create_region_tweet_count(city) N01 = Ndot_1 - N11 N = data.create_tweet_total_count() Ndot_0 = N - Ndot_1 N1_dot = 0 for c in cities: tweet_corpus_d = city_tweet_corpus_dict[c.name] N1_dot += tweet_corpus_d.get(word, 0) N10 = N1_dot - N11 N00 = Ndot_0 - N10 N0_dot = N00 + N01 return { 'N10': float(N10), 'N11': float(N11), 'N01': float(N01), 'N00': float(N00) }