Exemple #1
0
def create_jobs(session):
    js_created = 0
    county_polling_data = pd.read_csv(JOBS_DATASET, encoding="ISO-8859-1")

    session.query(CountyJobStats).delete()
    session.query(DistrictJobStats).delete()
    session.query(StateJobStats).delete()

    for _, row in county_polling_data.iterrows():
        # stateAbbr,geoType,name,geoid,sourceURL

        js = None
        state_abbr = row['stateAbbr']
        geo_type = row['geoType']
        region_name = _clean_region_name(row['name'])

        if geo_type == 'county':
            county_shortcode = CountyFragment.to_shortcode(
                state_abbr, region_name)
            js = CountyJobStats(county_shortcode=county_shortcode)
        elif geo_type == 'sldl':
            district_num = District.to_district_num(state_abbr, region_name)
            district_shortcode = District.to_shortcode(
                state_abbr, DistrictType.STATE_HOUSE, district_num)
            js = DistrictJobStats(district_shortcode=district_shortcode)
        elif geo_type == 'sldu':
            district_num = District.to_district_num(state_abbr, region_name)
            district_shortcode = District.to_shortcode(
                state_abbr, DistrictType.STATE_SENATE, district_num)
            js = DistrictJobStats(district_shortcode=district_shortcode)
        elif geo_type == 'cd':
            district_num = District.to_district_num(state_abbr, region_name)
            district_shortcode = District.to_shortcode(
                state_abbr, DistrictType.CONGRESSIONAL, district_num)
            js = DistrictJobStats(district_shortcode=district_shortcode)
        elif geo_type == 'state':
            js = StateJobStats(state_abbr=row['stateAbbr'])

        for csv_key, sql_fieldname in JOBS_STATS.items():
            setattr(js, sql_fieldname, clean_float(row[csv_key]))

        session.add(js)
        try:
            session.commit()
        except Exception as e:
            print("OOPS: ", state_abbr, district_num, region_name)
        js_created += 1
        print("Job stats records created: %d" % js_created, end="\r")

    print()
def create_counties_and_districts(session, folder, keys, district_type):
    """Yields counties and districts"""
    districts_created = 0
    counties_created = 0

    for filename in os.listdir(folder):
        state_abbr = os.path.splitext(filename)[0]
        file_path = os.path.join(folder, filename)
        csv_data = pd.read_csv(file_path, encoding="ISO-8859-1")

        (county_name_key, district_num_key, county_pop_key, percent_key) = keys

        for _, row in csv_data.iterrows():
            raw_district_num = row.get(district_num_key)

            # Exclude floterial districts in New Hampshire that have no
            # associated geographical area
            # (https://en.wikipedia.org/wiki/Floterial_district)
            if type(raw_district_num) == str and "(F)" in raw_district_num:
                continue

            district_num = District.to_district_num(state_abbr,
                                                    raw_district_num)
            county_fullname = row.get(county_name_key)
            county_population = row.get(county_pop_key)
            county_percent_of_whole = row.get(percent_key)

            if district_num is None or not county_fullname:
                continue

            district = session.query(District).filter_by(
                state=state_abbr, district_number=district_num).first()
            district_shortcode = District.to_shortcode(state_abbr,
                                                       district_type,
                                                       district_num)
            if not district:
                district = District(shortcode=district_shortcode,
                                    state=state_abbr,
                                    district_type=district_type,
                                    district_number=district_num)
                districts_created += 1
                print(
                    "%s District records created: %d County fragment records created: %d"
                    %
                    (district_type.value, districts_created, counties_created),
                    end="\r")
                session.add(district)

            county_shortcode = CountyFragment.to_shortcode(
                state_abbr, county_fullname)
            cf = CountyFragment(shortcode=county_shortcode,
                                district_shortcode=district_shortcode,
                                fullname=county_fullname,
                                population=county_population,
                                percent_of_whole=county_percent_of_whole)
            counties_created += 1
            print(
                "%s District records created: %d County fragment records created: %d"
                % (district_type.value, districts_created, counties_created),
                end="\r")
            session.add(cf)

    print()
    session.commit()