def import_rent_per_sqft_by_zip(): filename = dataset_dirs.ZIP_INCOME_DIR + "Zip_ZriPerSqft_AllHomes.csv" rows = util.csv_to_dict_list(filename) if rows is None: # Something went wrong return conn = sqlite3.connect(const.DB_FILENAME) c = conn.cursor() rents = [] level_mappings = util.get_level_mappings(c) for row in rows: state = row["State"] if state not in const.metro_areas_by_state: # Ignore states we don't have Yelp data for continue state_id = dao.get_id_of_name(c, "states", state) zip_code = int(row["RegionName"]) metro_id = dao.get_id_of_name(c, "metro_areas", const.metro_areas_by_state[state], state_id=state_id) if not dao.get_matching( c, "zip_codes", ["zip_code"], {"zip_code": zip_code, "state_id": state_id, "metro_id": metro_id} ): # Zip code we don't have Yelp data for, so we don't care. continue # ZRI - Zillow Rent Index # More info at http://www.zillow.com/research/data/#rental-data rent = row["2015-10"] rents.append([zip_code, state_id, metro_id, rent]) c.executemany("INSERT INTO zip_codes_rent(zip_code, state_id, metro_id, rent) VALUES " + "(?,?,?,?)", rents) conn.commit() c.close()
def import_income_by_zip(): filename = dataset_dirs.ZIP_INCOME_DIR + "13zpallagi.csv" rows = util.csv_to_dict_list(filename) if rows is None: # Something went wrong return conn = sqlite3.connect(const.DB_FILENAME) c = conn.cursor() counts = {} level_mappings = util.get_level_mappings(c) for row in rows: state = row['STATE'] if state not in const.metro_areas_by_state: # Ignore states we don't have Yelp data for continue state_id = dao.get_id_of_name(c, 'states', state) zip_code = int(row['zipcode']) metro_id = dao.get_id_of_name(c, 'metro_areas', const.metro_areas_by_state[state], state_id=state_id) if not dao.get_matching(c, 'zip_codes', ['zip_code'], {'zip_code': zip_code, 'state_id': state_id, 'metro_id': metro_id}): # Zip code we don't have Yelp data for, so we don't care. This dataset # contains zip codes that don't exist, too, for some reason... continue if zip_code not in counts: counts[zip_code] = {} level_raw = int(row['agi_stub']) level = level_mappings[level_raw] count = int(float(row['N1'])) if level in counts[zip_code]: counts[zip_code][level] += count else: counts[zip_code][level] = count values = [] total_population_values = [] for i, zip_code in enumerate(counts): total_count = sum([counts[zip_code][level] for level in counts[zip_code]]) total_population_values.append([zip_code, total_count]) for level in counts[zip_code]: percentage = (0.0 if total_count == 0 else (100*counts[zip_code][level] / float(total_count))) values.append([zip_code, level, percentage]) c.executemany( 'INSERT INTO zip_codes_income_levels (zip_code, level, percentage) VALUES ' + '(?,?,?)', values ) c.executemany( 'INSERT INTO zip_codes_population (zip_code, population) VALUES ' + '(?,?)', total_population_values ) conn.commit() c.close()