def import_rent_per_sqft_by_zip(): filename = dataset_dirs.ZIP_INCOME_DIR + "Zip_ZriPerSqft_AllHomes.csv" rows = util.csv_to_dict_list(filename) if rows is None: # Something went wrong return conn = sqlite3.connect(const.DB_FILENAME) c = conn.cursor() rents = [] level_mappings = util.get_level_mappings(c) for row in rows: state = row["State"] if state not in const.metro_areas_by_state: # Ignore states we don't have Yelp data for continue state_id = dao.get_id_of_name(c, "states", state) zip_code = int(row["RegionName"]) metro_id = dao.get_id_of_name(c, "metro_areas", const.metro_areas_by_state[state], state_id=state_id) if not dao.get_matching( c, "zip_codes", ["zip_code"], {"zip_code": zip_code, "state_id": state_id, "metro_id": metro_id} ): # Zip code we don't have Yelp data for, so we don't care. continue # ZRI - Zillow Rent Index # More info at http://www.zillow.com/research/data/#rental-data rent = row["2015-10"] rents.append([zip_code, state_id, metro_id, rent]) c.executemany("INSERT INTO zip_codes_rent(zip_code, state_id, metro_id, rent) VALUES " + "(?,?,?,?)", rents) conn.commit() c.close()
def get_id_of_name(c, table, name, id_field='id', name_field='name', add_nonexisting=True): return dao.get_id_of_name(c, table, name, id_field=id_field, name_field=name_field, add_nonexisting=add_nonexisting)
def import_income_by_zip(): filename = dataset_dirs.ZIP_INCOME_DIR + "13zpallagi.csv" rows = util.csv_to_dict_list(filename) if rows is None: # Something went wrong return conn = sqlite3.connect(const.DB_FILENAME) c = conn.cursor() counts = {} level_mappings = util.get_level_mappings(c) for row in rows: state = row['STATE'] if state not in const.metro_areas_by_state: # Ignore states we don't have Yelp data for continue state_id = dao.get_id_of_name(c, 'states', state) zip_code = int(row['zipcode']) metro_id = dao.get_id_of_name(c, 'metro_areas', const.metro_areas_by_state[state], state_id=state_id) if not dao.get_matching(c, 'zip_codes', ['zip_code'], {'zip_code': zip_code, 'state_id': state_id, 'metro_id': metro_id}): # Zip code we don't have Yelp data for, so we don't care. This dataset # contains zip codes that don't exist, too, for some reason... continue if zip_code not in counts: counts[zip_code] = {} level_raw = int(row['agi_stub']) level = level_mappings[level_raw] count = int(float(row['N1'])) if level in counts[zip_code]: counts[zip_code][level] += count else: counts[zip_code][level] = count values = [] total_population_values = [] for i, zip_code in enumerate(counts): total_count = sum([counts[zip_code][level] for level in counts[zip_code]]) total_population_values.append([zip_code, total_count]) for level in counts[zip_code]: percentage = (0.0 if total_count == 0 else (100*counts[zip_code][level] / float(total_count))) values.append([zip_code, level, percentage]) c.executemany( 'INSERT INTO zip_codes_income_levels (zip_code, level, percentage) VALUES ' + '(?,?,?)', values ) c.executemany( 'INSERT INTO zip_codes_population (zip_code, population) VALUES ' + '(?,?)', total_population_values ) conn.commit() c.close()
def top_zip_codes(metro_area, category): # Get features from POST or URL # name = request.form['name'] # Get into the Algorithm # Get top 3 zipcode from the algorithm return # According to the city, category, zipcode, get all the restaurant information we need # TEST try: c = get_db().cursor() feature_list = [0]*19 F = const.FEATURES data = request.form.getlist('Ambience') for t in data: ab = F['Ambience'] if t in ab: feature_list[ab.index(t)] = 1 data = request.form.getlist('Good_for') for t in data: gf = F['Good_for'] if t in gf: feature_list[gf.index(t)+9] = 1 data = request.form.getlist('Parking') for t in data: pk = F['Parking'] if t in pk: feature_list[pk.index(t)+15] = 1 feature_list.append(int(request.form['Outdoor_Seating'])) feature_list.append(int(request.form['Waiter_Service'])) feature_list.append(int(request.form['Accept_Credit_Cards'])) feature_list.append(int(request.form['Take_out'])) feature_list.append(int(request.form['Reservations'])) feature_list.append(int(request.form['Delivery'])) feature_list.append(int(request.form['Price_Range'])) feature_list.append(const.RESTAURANT_CATEGORIES.index(category)) print feature_list try: category_id = dao.get_id_of_name(c, 'restaurant_categories', category, add_nonexisting=False) metro_id = dao.get_id_of_name(c, 'metro_areas', metro_area, add_nonexisting=False) if category_id is None: print "Category \"%s\" not found in DB!" % category abort(500) if metro_id is None: print "Metro area \"%s\" not found in DB!" % metro_area abort(500) fields = ['zip_code'] constraints = {'metro_id': metro_id, 'restaurant_category_id': category_id, 'feature': feature_list} top3 = [{'zip_code': t[0]} for t in dao.get_top_scoring(c, fields=fields, constraints=constraints, count=3)] for t in top3: restaurants = [ {field: r[i] for i, field in enumerate(const.RESTAURANTS_FIELDS)} for r in dao.list_restaurants_in_zip(c, t['zip_code']) ] # Add other variables of interest # for r in restaurants: # TODO t['restaurants'] = restaurants result = { 'metro_area': metro_area, 'category': category, 'top3': top3 } information = json.dumps(result) finally: c.close() except Exception as e: print e abort(500) return render_template('mapview.html', information=information)