def create_jobs(session): js_created = 0 county_polling_data = pd.read_csv(JOBS_DATASET, encoding="ISO-8859-1") session.query(CountyJobStats).delete() session.query(DistrictJobStats).delete() session.query(StateJobStats).delete() for _, row in county_polling_data.iterrows(): # stateAbbr,geoType,name,geoid,sourceURL js = None state_abbr = row['stateAbbr'] geo_type = row['geoType'] region_name = _clean_region_name(row['name']) if geo_type == 'county': county_shortcode = CountyFragment.to_shortcode( state_abbr, region_name) js = CountyJobStats(county_shortcode=county_shortcode) elif geo_type == 'sldl': district_num = District.to_district_num(state_abbr, region_name) district_shortcode = District.to_shortcode( state_abbr, DistrictType.STATE_HOUSE, district_num) js = DistrictJobStats(district_shortcode=district_shortcode) elif geo_type == 'sldu': district_num = District.to_district_num(state_abbr, region_name) district_shortcode = District.to_shortcode( state_abbr, DistrictType.STATE_SENATE, district_num) js = DistrictJobStats(district_shortcode=district_shortcode) elif geo_type == 'cd': district_num = District.to_district_num(state_abbr, region_name) district_shortcode = District.to_shortcode( state_abbr, DistrictType.CONGRESSIONAL, district_num) js = DistrictJobStats(district_shortcode=district_shortcode) elif geo_type == 'state': js = StateJobStats(state_abbr=row['stateAbbr']) for csv_key, sql_fieldname in JOBS_STATS.items(): setattr(js, sql_fieldname, clean_float(row[csv_key])) session.add(js) try: session.commit() except Exception as e: print("OOPS: ", state_abbr, district_num, region_name) js_created += 1 print("Job stats records created: %d" % js_created, end="\r") print()
def create_counties_and_districts(session, folder, keys, district_type): """Yields counties and districts""" districts_created = 0 counties_created = 0 for filename in os.listdir(folder): state_abbr = os.path.splitext(filename)[0] file_path = os.path.join(folder, filename) csv_data = pd.read_csv(file_path, encoding="ISO-8859-1") (county_name_key, district_num_key, county_pop_key, percent_key) = keys for _, row in csv_data.iterrows(): raw_district_num = row.get(district_num_key) # Exclude floterial districts in New Hampshire that have no # associated geographical area # (https://en.wikipedia.org/wiki/Floterial_district) if type(raw_district_num) == str and "(F)" in raw_district_num: continue district_num = District.to_district_num(state_abbr, raw_district_num) county_fullname = row.get(county_name_key) county_population = row.get(county_pop_key) county_percent_of_whole = row.get(percent_key) if district_num is None or not county_fullname: continue district = session.query(District).filter_by( state=state_abbr, district_number=district_num).first() district_shortcode = District.to_shortcode(state_abbr, district_type, district_num) if not district: district = District(shortcode=district_shortcode, state=state_abbr, district_type=district_type, district_number=district_num) districts_created += 1 print( "%s District records created: %d County fragment records created: %d" % (district_type.value, districts_created, counties_created), end="\r") session.add(district) county_shortcode = CountyFragment.to_shortcode( state_abbr, county_fullname) cf = CountyFragment(shortcode=county_shortcode, district_shortcode=district_shortcode, fullname=county_fullname, population=county_population, percent_of_whole=county_percent_of_whole) counties_created += 1 print( "%s District records created: %d County fragment records created: %d" % (district_type.value, districts_created, counties_created), end="\r") session.add(cf) print() session.commit()