def extract_counties(db): counties= db.execute('select county_fips_code, state_fips_code, name as county_name, state.abbr as state_abbr from county, state where state_fips_code=fips_code') for county in counties: county_fips, state_fips, county_name, state_abbr = county destination_dir='extracts/county/%s' % state_abbr mkdir_p(destination_dir) outfile_name = destination_dir +'/' + clean_filename(county_name) + '.csv' print "writing county %s(%s) to %s"% (county_name, state_abbr, outfile_name) with open(outfile_name, 'w') as outfile: writer=csv.writer(outfile) hmda_records=db.execute('select * from hmda where state_code= %s and county_code = %s' % (state_fips, county_fips)) writer.writerow(HMDA_COLUMNS) for row in hmda_records: writer.writerow(row)
reader = csv.reader(csvfile, dialect='excel-tab') headers = map(string.strip, reader.next()) for row in reader: row = csv_row_to_dict(headers, row) geoid = row['fipscd'] state_fips = int(geoid[0:2]) county_fips = int(geoid[2:5]) if row['CBSA']: conn.execute(county.update(). where(county.c.state_fips_code==state_fips). where(county.c.county_fips_code==county_fips). values(cbsa_code=int(row['CBSA']))) if __name__ == "__main__": mkdir_p('tmp') gaz_zip_file = 'tmp/Gaz_counties_national.zip' gaz_txt_file = 'Gaz_counties_national.txt' crosswalk_file = 'tmp/crosswalk.txt' create_database(args.conn_str) download_gazetteer(gaz_zip_file) unzip_gazetteer(gaz_zip_file, gaz_txt_file) insert_gaz_data(gaz_txt_file) os.remove(gaz_txt_file) download_crosswalk(crosswalk_file) insert_crosswalk_data(crosswalk_file)
writer=csv.writer(outfile) hmda_records=db.execute('select * from hmda where state_code= %s and county_code = %s' % (state_fips, county_fips)) writer.writerow(HMDA_COLUMNS) for row in hmda_records: writer.writerow(row) def extract_msas(db): for cbsa in db.cbsa.all(): outfile_name=clean_filename(cbsa.name) print "writing MSA %s to %s.csv" % (cbsa.name, outfile_name) with open('extracts/msa/%s.csv' % outfile_name, 'w') as outfile: writer=csv.writer(outfile) hmda_records=db.execute('select * from hmda where msa_md = %s' % cbsa.cbsa_code) writer.writerow(HMDA_COLUMNS) for row in hmda_records: writer.writerow(row) if __name__ == '__main__': parser = argparse.ArgumentParser(description='extract CSV for every MSA, county, and State') parser.add_argument('conn_str', help='connection string for the database') args = parser.parse_args() db = sqlsoup.SQLSoup(args.conn_str) mkdir_p('extracts/county') extract_counties(db) mkdir_p('extracts/msa') extract_msas(db)