コード例 #1
0
def make_division_name(state, district):
    if state in AT_LARGE_DISTRICTS and district == 1:
        return states.lookup(state).name
    else:
        return "{state}'s {district} congressional district".format(
            state=states.lookup(state).name,
            district=ordinalize(district))
コード例 #2
0
def get_counties_in_state(census_request,
                          state_fips_code,
                          max_number_of_counties=math.inf,
                          specific_counties_only=None):
    requested_counties = census_request.sf1.get(
        fields='NAME',
        geo={
            'for': 'county:*',
            'in': 'state:{0}'.format(state_fips_code)
        })

    requested_state = states.lookup(state_fips_code)
    state_name = requested_state.name
    for requested_county in requested_counties:
        county_name = requested_county['NAME'].replace(
            ', {0}'.format(state_name), '')
        requested_county['NAME'] = county_name

    if specific_counties_only is not None:
        list_of_specific_counties = []
        for specific_county in specific_counties_only:
            matching_county = next(
                (item for item in requested_counties
                 if item['NAME'] == '{0} County'.format(specific_county)),
                None)
            list_of_specific_counties.append(matching_county)
        requested_counties = list_of_specific_counties

    if max_number_of_counties == math.inf:
        max_number_of_counties = len(requested_counties)
        requested_counties = requested_counties[:max_number_of_counties]

    return requested_counties
コード例 #3
0
    def __call__(self, form, field):
        # Get the selected state
        form_state = State(int(form.state.data))

        # Parse the phone number into constituent parts
        try:
            phone = phonenumbers.parse(field.data, 'US')
        except phonenumbers.NumberParseException:
            raise ValidationError(self.ERROR_MSG)

        # Check if parsed number is a valid pattern foregin (US)
        if not phonenumbers.is_valid_number(phone):
            raise ValidationError(self.ERROR_MSG)

        # Check area prefix is valid for US states, and matches the selected state
        longest_prefix = 4  # Only return results for state name description (1XXX) (excl. city names)
        _state = _prefix_description_for_number(US_PHONE_GEODATA, longest_prefix, phone, 'en')
        try:
            assert _state
            lookup_state = states.lookup(_state)
            assert lookup_state
            state = State[lookup_state.abbr]  # Raise Key error if state abbreviation not in local states
            assert state == form_state  # Check that specified state same as parsed state
        except (AssertionError, KeyError):
            raise ValidationError('Invalid area code prefix')
コード例 #4
0
ファイル: household.py プロジェクト: bertomartin/us_census
    def read(self, geo, params):
        '''
        Queries Census API using the query variables filtered in self.api_variable
        :param geo: geography filters. e.g. {'state': 'OH', 'county': '*'} use '*' for 'all', which would return all
        as individual rows.
        :type geo: dict
        :param params: household parameters. e.g. {'type': 'husband_wife', 'has_children': True, 'children_age':
        'under_6', 'race': ['black', 'asian']}
        :type params: dict
        :return: DataFrame of results from query
        :rtype: pandas.DataFrame
        '''

        self.geo = geo
        self.params = params

        self.filter_api_variable()

        logger.info('Looking up the following variables\n%s' % self.api_variables)

        data = self.query_census(self.api_variables['row_id'].tolist())
        dataframe = pandas.DataFrame(data)
        if 'state' in self.geo and not dataframe.empty:
            dataframe['state'] = dataframe['state'].apply(lambda state_fips: states.lookup(state_fips).abbr)

        if 'county' in self.geo and not dataframe.empty:
            dataframe['county'] = dataframe['county'].astype(float)

        # horizontal sum of queried tables
        dataframe['households'] = dataframe.filter(regex=('P038.*')).astype(float).sum(axis=1)

        cols_keep = list(self.geo.keys())
        cols_keep.insert(0, 'households')

        return dataframe[cols_keep]
コード例 #5
0
ファイル: population.py プロジェクト: bertomartin/us_census
    def read(self, geo, params):
        '''
        Queries Census API using the query variables filtered in self.api_variable
        :param geo: geography filters. e.g. {'state': 'OH', 'county': '*'} use '*' for 'all', which would return all
        as individual rows.
        :type geo: dict
        :param params: population parameters. a dictionary of only 'sex', 'age', 'race' allowed. e.g. {'sex': 'male',
        'age': range(20, 25), 'race': ['asian', 'white']}
        :type params: dict
        :return: DataFrame of results from query
        :rtype: pandas.DataFrame
        '''
        self.geo = geo
        self.params = params

        self.filter_api_variable()

        logger.info('Looking up the following variables\n%s' % self.api_variables)

        data = self.query_census(self.api_variables['row_id'].tolist())
        dataframe = pandas.DataFrame(data)
        if 'state' in self.geo and not dataframe.empty:
            dataframe['state'] = dataframe['state'].apply(lambda state_fips: states.lookup(state_fips).abbr)

        if 'county' in self.geo and not dataframe.empty:
            dataframe['county'] = dataframe['county'].astype(float)

        # horizontal sum of queried tables
        dataframe['population'] = dataframe.filter(regex=('PCT.*')).astype(float).sum(axis=1)

        cols_keep = list(self.geo.keys())
        cols_keep.insert(0, 'population')

        return dataframe[cols_keep]
コード例 #6
0
def getCountiesInState(stateFIPSCode,
                       maxNumberOfCounties=math.inf,
                       specificCountiesOnly=None):
    requestedCounties = censusRequest.sf1.get(
        fields=('NAME'),
        geo={
            'for': 'county:*',
            'in': 'state:{0}'.format(stateFIPSCode)
        })

    # clean up county names after API update
    ## remove ", StateName"
    requestedState = states.lookup(stateFIPSCode)
    stateName = requestedState.name
    for requestedCounty in requestedCounties:
        countyName = requestedCounty['NAME'].replace(', {0}'.format(stateName),
                                                     '')
        requestedCounty['NAME'] = countyName

    if specificCountiesOnly != None:
        listOfSpecificCounties = []
        for specificCounty in specificCountiesOnly:
            matchingCounty = next(
                (item for item in requestedCounties
                 if item['NAME'] == '{0} County'.format(specificCounty)), None)
            listOfSpecificCounties.append(matchingCounty)
        requestedCounties = listOfSpecificCounties

    if maxNumberOfCounties == math.inf:
        maxNumberOfCounties = len(requestedCounties)
        requestedCounties = requestedCounties[:maxNumberOfCounties]

    return requestedCounties
コード例 #7
0
ファイル: dataload.py プロジェクト: sebajara/covid19
def covidtracking_ustates():
    ## ========== Import
    # import covidtracking data
    # see: data/covidtracking_update.py
    with open("../data/covidtracking/covidtracking_dfs.pickle", "rb") as file:
        covidtracking_dfs = pickle.load(file)
    state_census = pd.read_csv(
        '../data/usa_census/SCPRC-EST2019-18+POP-RES.csv')
    ## ========== Cleaning
    # convert date to datetime
    tables = ['states_daily']  # see covidtracking_dfs.keys()
    for table in tables:
        covidtracking_dfs[table]['date'] = pd.to_datetime(
            covidtracking_dfs[table]['date'], format="%Y%m%d")
    # we are goint to use for now just a few columns
    columns = ['state', 'date', 'negative', 'positive', 'death']
    states_df = covidtracking_dfs['states_daily'].loc[:, columns]
    # Let's get the estimated 2019 population for each state
    state_abrs = states_df['state'].unique()
    state_names = [states.lookup(abr).name for abr in list(state_abrs)]
    states_info = pd.DataFrame(dict(abbreviation=state_abrs, name=state_names))
    states_info = states_info.merge(
        state_census.loc[:, ['NAME', 'POPESTIMATE2019']],
        right_on='NAME',
        left_on='name',
        how='left').sort_values('POPESTIMATE2019')
    states_info.drop(columns=['NAME'], inplace=True)
    return (states_df, states_info)
コード例 #8
0
def find_state(text):

    for pattern in state_names:
        if  search(pattern,  text,  IGNORECASE):
            return ['United States', 'USA', pattern,  lookup(str(pattern)).abbr]

    matches = match(city_to_state_pattern, text.lower())
    if matches:
        k =  matches.group(0)
        tokens = [city_to_state_dict.get(k.title(), np.nan)]
    else:
        tokens = [j for j in  split("\s|,", text) if j not in ['in', 'la', 'me', 'oh', 'or']]

    for i in tokens:
        if  match('\w+', str(i)):
            if lookup(str(i)):
                return ['United States', 'USA', i,  lookup(str(i)).abbr]
コード例 #9
0
ファイル: household.py プロジェクト: bertomartin/us_census
    def query_census(self, symbols):
        '''
        Queries US census using the census python API
        :param symbols: variables from http://api.census.gov/data/2010/sf1/variables.html to query
        :type symbols:
        :return: list[str]
        :rtype: pandas.DataFrame
        '''
        state_fips = '*'
        if 'state' in self.geo and self.geo['state'] != '*':
            state_fips = states.lookup(self.geo['state']).fips

        geo_keys = set(self.geo.keys())

        logger.info('Querying using filters: ' + str(self.geo))

        if set(['state']) == geo_keys:
            return self.census_api.state(symbols, state_fips)

        if set(['state', 'county']) == geo_keys:
            county = self.geo['county']
            return self.census_api.state_county(symbols, state_fips, county)

        if set(['state', 'county', 'subdivision']) == geo_keys:
            county = self.geo['county']
            subdivision_fips = self.geo['subdivision']
            return self.census_api.state_county_subdivision(symbols, state_fips, county, subdivision_fips)

        if set(['state', 'county', 'tract']) == geo_keys:
            county = self.geo['county']
            tract = self.geo['tract']
            return self.census_api.state_county_tract(symbols, state_fips, county, tract)

        if set(['state', 'place']) == geo_keys:
            place = self.geo['place']
            return self.census_api.state_place(symbols, state_fips, place)

        if set(['state', 'district']) == geo_keys:
            district = self.geo['district']
            return self.census_api.state_district(symbols, state_fips, district)

        if set(['state', 'msa']) == geo_keys:
            msa = self.geo['msa']
            return self.census_api.state_msa(symbols, state_fips, msa)

        if set(['state', 'csa']) == geo_keys:
            csa = self.geo['csa']
            return self.census_api.state_csa(symbols, state_fips, csa)

        if set(['state', 'district', 'place']) == geo_keys:
            district = self.geo['district']
            place = self.geo['place']
            return self.census_api.state_district_place(symbols, state_fips, district, place)

        if set(['state', 'zipcode']) == geo_keys:
            zipcode = self.geo['zipcode']
            return self.census_api.state_zipcode(symbols, state_fips, zipcode)
コード例 #10
0
def main(state_abbreviation):
    api_key = "78ae8c422513eb7551e52f2adf65ee6b51847b9d"
    state_info = states.lookup(state_abbreviation)

    get_block_data(api_key, state_info)
    format_block_data(state_info)

    num_congressional_districts = get_num_congressional_districts(
        api_key, state_info)
    districts = create_districts(state_info, num_congressional_districts, 0.03)
    save_geojson(districts, state_info)
コード例 #11
0
def get_census_data():
    cong = C.acs5.state_congressional_district(list(CODES.keys()), "*", "*")
    cong = pd.DataFrame(cong)
    cong['Name'] = cong['state'].apply(lambda x: str(states.lookup(x)))
    #converting FIPS to state names
    cong['Name'] = cong['state'].apply(lambda x: str(states.lookup(x)))
    #Renaming code columns
    cong.rename(columns=CODES, inplace=True)
    #dropping obsolete congressional districts and Puerto Rico
    cong = cong[cong['congressional district'] != 'ZZ']
    cong = cong[cong['Name'] != 'Puerto Rico']
    #string cleaning with function above
    cong['Namelsad'] = \
        cong['congressional district'].apply(generate_inputs.format_district)
    cong.sort_values(by=['Name', 'congressional district'], inplace=True)
    df = cong[[
        'Mexican-American Population', 'Latino Population', 'Total Population',
        'Name', 'Namelsad'
    ]]

    return df
コード例 #12
0
def _special_case_nameattr_equivalence(nameattr_a: x509.NameAttribute, nameattr_b: x509.NameAttribute):
    """
    Return true if the two name attributes are equivalent for some special case. Assumes both nameattr have same oid
    :param nameattr_a:
    :param nameattr_b:
    :return:
    """
    # if is a US state, and state names are just long or short form (i.e. CA == California)
    if nameattr_a.oid == NameOID.STATE_OR_PROVINCE_NAME and states.lookup(nameattr_a.value) == states.lookup(
            nameattr_b.value):
        return True
    # add in more special cases here
    return False
コード例 #13
0
def main():
    with open('output_archive/state_abbrs.txt') as fin:
        state_abbrs = [x.strip() for x in fin.readlines()]
    output = []
    for state in state_abbrs:
        print(state)
        state_fips_code = states.lookup(state).fips
        state_fips = pd.read_csv('output_archive/fips_codes.txt',
                                 sep='\t',
                                 encoding='utf-16')
        state_fips = state_fips[state_fips['State Code (FIPS)'] == int(
            state_fips_code)]
        output.append(
            load_cities(get_city_list(state_fips), state, state_fips_code,
                        state_fips))
    export_output(output)
コード例 #14
0
ファイル: __init__.py プロジェクト: ZippeyKeys12/NaNoGenMo19
    def generate_details(self, **kwargs) -> dict:
        details: Dict[str, Any] = {'genre': random.choice(['fantasy'])}

        details.update(self.fake.profile(sex=None))

        details['pronouns'] = get_pronouns(details['sex'])

        details['is_gay'] = random.random() < .0195
        details['is_married'] = random.random() < .43
        details['has_kids'] = random.random() < .74

        if details['is_married']:
            if details['is_gay']:
                details['spouse'] = {
                    'F': 'wife',
                    'M': 'husband'
                }[details['sex']]
            else:
                details['spouse'] = {
                    'F': 'husband',
                    'M': 'wife'
                }[details['sex']]

        addr = self.state_pattern.search(details['residence'])
        while not addr:
            details['residence'] = self.fake.address()

            addr = self.state_pattern.search(details['residence'])

        details['state'] = states.lookup(addr.group(1)).name

        details['signs'] = {
            'astrological': get_astrological_sign(details['birthdate']),
            'zodiac': get_zodiac_sign(details['birthdate'])
        }

        if random.random() < .3:
            details['inspired_by'] = {'sex': random.choice(['F', 'M'])}

            details['inspired_by']['relation'] = random.choice({
                'F': ['mother', 'sister', 'daughter'],
                'M': ['father', 'brother', 'son']
            }[details['inspired_by']['sex']])
        else:
            details['inspired_by'] = None

        return details
コード例 #15
0
def generate_2018_fips_df(path: str = FIPS_2018_URL) -> pd.DataFrame:
    """Downloads raw FIPS data from the 2018 source and transforms to the proper format."""
    fips_df = pd.read_excel(path,
                            dtype=str,
                            engine="openpyxl",
                            skiprows=range(4))

    fips_df = fips_df.drop([FIPS_2018_SUMMARY_COL], axis="columns")

    # Filter to counties only
    fips_df = fips_df[(
        # Filter out sub-county level fips
        (fips_df[FIPS_2018_COUNTY_SUBDIVISION_COL] == "00000")
        & (fips_df[FIPS_2018_PLACE_COL] == "00000")
        & (fips_df[FIPS_2018_CITY_COL] == "00000")
        # Filter out state fips
        & (fips_df[FIPS_2018_COUNTY_COL] != "000"))]
    fips_df = fips_df.reset_index(drop=True)

    # Drop unnecessary columns
    fips_df = fips_df.drop(
        [
            FIPS_2018_COUNTY_SUBDIVISION_COL, FIPS_2018_PLACE_COL,
            FIPS_2018_CITY_COL
        ],
        axis="columns",
    )

    # Rename columns
    fips_df = fips_df.rename(
        columns={
            FIPS_2018_STATE_COL: STATE_CODE_COL,
            FIPS_2018_COUNTY_COL: COUNTY_CODE_COL,
            FIPS_2018_AREA_NAME_COL: COUNTY_NAME_COL,
        })

    # Add column with state abbreviation
    abbrev_col = fips_df.state_code.apply(
        lambda code: states.lookup(code, field="fips").abbr)
    fips_df.insert(loc=0, column=STATE_ABBREV_COL, value=abbrev_col)

    # Add columns with concatenated fips
    fips_df[FIPS_COL] = fips_df[STATE_CODE_COL] + fips_df[COUNTY_CODE_COL]

    return fips_df
コード例 #16
0
def transform_population_df(pops_df: pd.DataFrame) -> pd.DataFrame:
    """Transforms the population data int to a more usable format.

    Adds fips information and unpivots the year columns into a single year column."""
    # First column is missing name, name it "location"
    pops_df = pops_df.rename(columns={pops_df.columns[0]: TEMP_LOCATION_COL})

    # Just keep the 2010 to 2019 estimated columns
    pops_df = pops_df.drop(["Census", "Estimates Base"], axis="columns")

    # County rows start with ".", remove it
    pops_df = pops_df[pops_df[TEMP_LOCATION_COL].str.startswith(".")]
    pops_df[TEMP_LOCATION_COL] = pops_df[TEMP_LOCATION_COL].str[1:]

    # Location is of form "county name, state name", pull these into their own columns
    location_col = pops_df.pop(TEMP_LOCATION_COL).str.split(",", expand=True)
    pops_df[TEMP_COUNTY_NAME_COL] = location_col[0].str.strip()
    pops_df[TEMP_STATE_NAME_COL] = location_col[1].str.strip()

    # Get the state code to group by
    pops_df[TEMP_STATE_CODE_COL] = pops_df[TEMP_STATE_NAME_COL].apply(
        lambda state_name: "US_" + states.lookup(state_name, field="name"
                                                 ).abbr)

    # Group by state, add fips to each row in each group
    pops_df = pops_df.groupby([TEMP_STATE_CODE_COL
                               ]).apply(add_fips_to_state_df)

    # Only keep fips id column
    if not pops_df[FIPS_COL].is_unique:
        duplicate_rows = pops_df[pops_df[FIPS_COL].duplicated(keep=False)]
        raise ValueError(
            f"Dataframe contains duplicate fips:\n{duplicate_rows}", )
    pops_df = pops_df.drop(
        [TEMP_COUNTY_NAME_COL, TEMP_STATE_NAME_COL, TEMP_STATE_CODE_COL],
        axis="columns")

    # Unpivot 2010 to 2019 columns into single year column
    pops_df = pops_df.melt(id_vars=[FIPS_COL],
                           var_name=YEAR_COL,
                           value_name=POPULATION_COL)
    pops_df[YEAR_COL] = pops_df[YEAR_COL].astype(int)

    return pops_df
コード例 #17
0
ファイル: geoloc.py プロジェクト: poliquin/geoloc
def build_search(state, place):
    """Combine and normalize place name and state into search."""
    state, place = state.strip(), place.strip().lower()
    if state == '':
        return None, place

    place = re.sub(r'^close to\s*', '', place)

    state = states.lookup(state.strip())
    if place.endswith(state.name.lower()):
        # search string contains full state name already
        return state.abbr, place
    elif place.endswith(', ' + state.abbr.lower()):
        # search string contains state abbreviation already
        return state.abbr, place
    else:
        # add state abbreviation to search string
        place = place.strip(',')
        return state.abbr, place + ', ' + state.abbr.lower()
コード例 #18
0
ファイル: __init__.py プロジェクト: chrisroat/perfectunion
def data():
    fips = flask.request.args.get('fips')
    if not fips:
        flask.abort(400)

    fips = fips.zfill(4)

    state_fips = fips[:2]
    state = states.lookup(state_fips)
    state_name = state.name
    state_abbr = state.abbr

    district_fips = fips[2:]
    result = {
        'state_name': state_name,
        'state_abbr': state_abbr,
        'district': district_name(district_fips)
    }

    query = CommentData.query
    query = query.filter(CommentData.state_fips == state_fips)
    query = query.filter(CommentData.district_fips == district_fips)

    query = query.order_by(func.random())  # NOTE: may be slow for large tables
    comment_data = query.first()

    if comment_data:
        result.update({
            'fcc_link':
            'https://www.fcc.gov/ecfs/filing/{}'.format(comment_data.id),
            'name':
            comment_data.name.title(),
            'city':
            comment_data.city.title(),
            'comment':
            comment_data.comment.replace('\n', '<br>'),
        })

    return flask.jsonify(result)
コード例 #19
0
    def __init__(self, census_json_data):
        self.state = states.lookup(census_json_data['state'])
        self.county = census_json_data['county']
        self.tract = census_json_data['tract']

        population_total_variable_name = CensusTractRacePopulation.CENSUS_VARIABLE_TOTAL_POPULATION + \
                                         CensusTractRacePopulation.VARIABLE_SUFFIX_ESTIMATE
        self.population_total_est = int(census_json_data[population_total_variable_name])

        race_variable_prefixes = CensusTractRacePopulation.get_all_races()

        self.population_by_race_est = {}
        self.population_by_race_pctg = {}

        for race_prefix in race_variable_prefixes:
            est_variable_name = race_prefix + CensusTractRacePopulation.VARIABLE_SUFFIX_ESTIMATE
            pctg_variable_name = race_prefix + CensusTractRacePopulation.VARIABLE_SUFFIX_ESTIMATE_PERCENT

            self.population_by_race_est[race_prefix] = int(census_json_data[est_variable_name])

            percent = census_json_data[pctg_variable_name]
            percent = percent if percent > 0.0 else 0.0
            self.population_by_race_pctg[race_prefix] = percent
コード例 #20
0
def make_division_name(state, district):
    if state in AT_LARGE_DISTRICTS and district == 1:
        return states.lookup(state).name
    else:
        return "{state}'s {district} congressional district".format(
            state=states.lookup(state).name, district=ordinalize(district))
コード例 #21
0
ファイル: __init__.py プロジェクト: chrisroat/perfectunion
 def title(state_fips, district_fips, count):
     state_name = states.lookup(state_fips).name
     return '%s\'s %s District\n%d comments' % (
         state_name, district_name(district_fips), count)
コード例 #22
0
    districtGeometries = EsriDumper(
        url='https://tigerweb.geo.census.gov/arcgis/rest/services/Generalized_ACS2017/Legislative/MapServer/5',
        extra_query_args={'where': 'STATE=\'{0}\''.format(stateFIPSCode)})
    # https://github.com/openaddresses/pyesridump

    existingDistricts = []
    for districtGeometry in districtGeometries:
        geoJSONGeometry = districtGeometry['geometry']
        districtNumber = districtGeometry['properties']['BASENAME']
        existingDistrict = ExistingDistrict(districtNumber=districtNumber, geoJSONGeometry=geoJSONGeometry)
        existingDistricts.append(existingDistrict)

    return existingDistricts


stateAbbreviation = 'MI'
stateInfo = states.lookup(stateAbbreviation)
censusYear = 2010
descriptionToWorkWith = 'All'

allCongressionalDistrictGeosInState = getAllGeoDataForFederalCongressionalDistricts(stateFIPSCode=stateInfo.fips)
# save county data to file
saveDataToFileWithDescription(data=allCongressionalDistrictGeosInState,
                              censusYear=censusYear,
                              stateName=stateInfo.name,
                              descriptionOfInfo='{0}CurrentFederalCongressionalDistricts'.format(descriptionToWorkWith))
saveGeoJSONToDirectoryWithDescription(geographyList=allCongressionalDistrictGeosInState,
                                      censusYear=censusYear,
                                      stateName=stateInfo.name,
                                      descriptionOfInfo='CurrentFederalCongressionalDistricts')
コード例 #23
0
zero_zip = 0
no_fips = 0
bad_state = 0

with open(infile) as fin, open(outfile, 'w') as fout:
  reader = csv.DictReader(fin)
  writer = csv.writer(fout)

  num_rows = 0
  for row in reader:
    num_rows += 1
    if not num_rows % 20000:
      print(num_rows)

    state = row['state']
    state_lookup = states.lookup(state)

    if state_lookup in [states.AK, states.DC, states.DE, states.MT, states.ND, states.PR, states.SD, states.VT, states.WY]:
      state_fips, district_fips = state_lookup.fips, '00'
    else:
      zip_code = row['zip_code'].zfill(5)
      if zip_code == '00000':
        zero_zip += 1
        continue

      fips = ZIP2FIPS.get(zip_code)
      if fips:
        state_fips, district_fips = fips
      else:
        no_fips += 1
        continue
コード例 #24
0
def chooseState():

    if debugMode:
        processState('NH', 5)  # Testing mode does NH with 5 districts

    else:

        # User input: choose to run all states or custom single state
        modeChoice = raw_input(
            'How would you like to run?\n1) Run all states using 2010 House apportionments\n2) Choose a single state\n'
        )

        # Run all states mode
        if modeChoice == '1':

            # 2010 apportionments of US House districts
            districtCounts = (('AL', 7), ('AK', 1), ('AZ', 9), ('AR', 4),
                              ('CA', 53), ('CO', 7), ('CT', 5), ('DE', 1),
                              ('FL', 27), ('GA', 14), ('HI', 2), ('ID', 2),
                              ('IL', 18), ('IN', 9), ('IA', 4), ('KS', 4),
                              ('KY', 6), ('LA', 6), ('ME', 2), ('MD', 8),
                              ('MA', 9), ('MI', 14), ('MN', 8), ('MS', 4),
                              ('MO', 8), ('MT', 1), ('NE', 3), ('NV',
                                                                4), ('NH', 2),
                              ('NJ', 12), ('NM', 3), ('NY', 27), ('NC', 13),
                              ('ND', 1), ('OH', 16), ('OK', 5), ('OR', 5),
                              ('PA', 18), ('RI', 2), ('SC', 7), ('SD',
                                                                 1), ('TN', 9),
                              ('TX', 36), ('UT', 4), ('VT', 1), ('VA', 11),
                              ('WA', 10), ('WV', 3), ('WI', 8), ('WY', 1))

            # Run processState on each state, skipping states with 1 district
            for state in districtCounts:
                if state[1] > 1:
                    processState(*state)
                else:
                    print "Skipping single-district state"

        # Run single state mode
        elif modeChoice == '2':

            # User input: state to process
            state = raw_input(
                'Which state would you like to process? (Two letter abbreviation) '
            )
            if not states.lookup(state):
                print "Invalid state entered!"
                exit()

            # User input: number of districts to create
            maxDistricts = raw_input('Number of districts to create? ')
            try:
                int(maxDistricts)
            except ValueError:
                print "Invalid number of districts chosen!"
                exit()
            else:
                maxDistricts = int(maxDistricts)

            # Run processState on selected state with selected number of districts
            processState(state, maxDistricts)

        else:
            print "Invalid run mode chosen!"
            exit()
コード例 #25
0
def processState(state, maxDistricts):
    def getNeighbors(d):
        allNeighbors = [adjacencyMatrix.neighbors[x] for x in d]
        allNeighbors = [
            m for n in allNeighbors for m in n if m not in assignedList
        ]
        return list(set(allNeighbors))

    fips = states.lookup(state).fips
    print "Beginning districting on %s (FIPS %s) with %s districts" % (
        state, fips, maxDistricts)

    # Check if geometry file exists; download if not
    geomDir = 'data-raw/%s/geometry/' % cellularUnit
    geomFile = ('%stl_2010_%s_tract10.shp') % (geomDir, fips)

    if os.path.isfile(geomFile):
        print "Found geometry file"
    else:
        if not os.path.exists(geomDir): os.makedirs(geomDir)
        print "Acquiring geometry file from Census"
        urllib.urlretrieve(
            states.lookup(state).shapefile_urls('tract'),
            ('%s%s.zip') % (geomDir, state))
        with zipfile.ZipFile(('%s%s.zip') % (geomDir, state)) as zip:
            zip.extractall(geomDir)
        os.remove(('%s%s.zip') % (geomDir, state))

    # Check if gazeteer file exists; download if not
    gazDir = 'data-raw/%s/gazetteer/' % cellularUnit
    gazFile = ('%scensus_tracts_list_%s.txt') % (gazDir, fips)

    if os.path.isfile(gazFile):
        print "Found gazetteer file"
    else:
        if not os.path.exists(gazDir): os.makedirs(gazDir)
        print "Acquiring gazeteer file from Census"
        urllib.urlretrieve((
            'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/census_tracts_list_%s.txt'
        ) % (fips), ('%scensus_tracts_list_%s.txt') % (gazDir, fips))

    # Read in the geometry file with GeoPandas
    try:
        geometry = gpd.read_file(geomFile)
        geometry = geometry[[
            'GEOID10', 'geometry'
        ]]  # Lose everything except FIPS code and geometry
        print "Successfully read geometry file"
    except:
        print "Problem reading geometry file"
        exit()

    # Read in the gazeteer file with Pandas
    try:
        gazetteer = pd.read_table(gazFile, dtype={'GEOID': 'object'})
        gazetteer = gazetteer[[
            'GEOID', 'POP10', 'ALAND'
        ]]  # Lose everything except FIPS code, pop, land area
        gazetteer['density'] = gazetteer['POP10'] / gazetteer[
            'ALAND']  # Compute density across all tracts
        print "Successfully read gazetteer file"
    except:
        print "Problem reading gazetteer file"
        exit()

    # Create a joined data frame
    dataFrame = geometry.merge(gazetteer, left_on='GEOID10', right_on='GEOID')
    dataFrame['district'] = 0  # New blank variable for district assignment

    if holeFiller:
        fullShape = dataFrame.unary_union

    # Create an adjanceny matrix using pysal
    if contiguityType == 'rook':
        adjacencyFunction = Contiguity.Rook.from_dataframe
    elif contiguityType == 'queen':
        adjacencyFunction = Contiguity.Queen.from_dataframe
    else:
        print 'Invalid contiguity type set!'
        exit()

    adjacencyMatrix = adjacencyFunction(geometry)
    print "Adjacency matrix built"

    popThreshold = dataFrame['POP10'].sum(
    ) / maxDistricts  # How many people should be in each district

    assignedList = []  # List to hold indices all assigned tracts, for speed

    for d in range(1, maxDistricts + 1):

        districtPop = 0
        districtMembers = [
        ]  # List to hold indices of tracts assigned to this district, for speed
        seed = dataFrame[dataFrame['district'] ==
                         0]['density'].idxmax()  # Find the densest unassigned

        districtPop = districtPop + dataFrame['POP10'][seed]
        dataFrame.set_value(seed, 'district', d)
        districtMembers.append(seed)
        assignedList.append(seed)

        print "Beginning district %d, seeding with %s %s, running population %d" % (
            d, cellularUnit, dataFrame['GEOID10'][seed], districtPop)

        while districtPop < popThreshold:
            possibleNeighbors = getNeighbors(districtMembers)
            if len(possibleNeighbors) == 0:
                print "No possible neighbors to add!"
                break
            bestNeighbor = dataFrame.iloc[possibleNeighbors]['density'].idxmax(
            )

            districtPop = districtPop + dataFrame['POP10'][bestNeighbor]
            dataFrame.set_value(bestNeighbor, 'district', d)
            districtMembers.append(bestNeighbor)
            assignedList.append(bestNeighbor)

            print "Adding %s, running population %d" % (
                dataFrame['GEOID10'][bestNeighbor], districtPop)

            if holeFiller:
                fullShape = fullShape.difference(
                    dataFrame.iloc[bestNeighbor].geometry)
                if fullShape.geom_type == 'MultiPolygon':
                    print "A hole or exclave has been created!"

                    for part in fullShape:

                        partTracts = dataFrame[dataFrame.geometry.within(
                            part)].index.tolist()

                        partPop = dataFrame.iloc[partTracts].POP10.sum()

                        if partPop < popThreshold - districtPop:
                            dataFrame.loc[partTracts, "district"] = d
                            districtPop = districtPop + partPop
                            assignedList.extend(partTracts)
                            districtMembers.extend(partTracts)

                            fullShape = fullShape.difference(part)

                            print "Filled a hole or exclave"

    # Build a choropleth map
    p = dataFrame.plot(column='district', categorical=True, legend=True)
    plt.show()
コード例 #26
0
 def label(self):
     return str(states.lookup(self.name))
コード例 #27
0
ファイル: apis.py プロジェクト: CalebAtHeadstorm/nightingale
def get_state_abbr(state_name):
    state = states.lookup(state_name)
    if state is not None:
        return state.abbr
    else:
        raise NameError(f'No state found for {state_name}.')
コード例 #28
0
## Import API census
from census import Census
## Import library to decode FIPS to places
from us import states

## Paste your api key in the line bellow
api_key = "PASTE_YOUR_API_KEY_HERE"

c = Census(api_key)
c.acs5.get(('NAME', 'B25034_010E'), {'for': 'state:{}'.format(states.MD.fips)})

## 36 = New York
print(states.lookup('36').abbr)
print(c.acs5.tables())
コード例 #29
0
class PhiladelphiaVaccine(TableauDashboard):
    state_fips = int(states.lookup("Pennsylvania").fips)
    has_location = True
    location_type = "county"
    provider = "county"
    source = (
        "https://www.phila.gov/programs/coronavirus-disease-2019-covid-19/data/vaccine/"
    )
    source_name = "Philadelphia Department of Public Health"
    baseurl = "https://healthviz.phila.gov/t/PublicHealth/"
    viewPath = "COVIDVaccineDashboard/COVID_Vaccine"
    data_tableau_table = "Residents Percentage {dose_type}"
    variables = {
        "Residents Receiving At Least 1 Dose* ":
        variables.INITIATING_VACCINATIONS_ALL,
        "Fully Vaccinated Residents*": variables.FULLY_VACCINATED_ALL,
    }

    def fetch(self) -> pd.DataFrame:
        # create a dict of the 2 dose type tables
        # which are titled "Residents Percentage New" and "... Full"
        return {
            dose_type: self.get_tableau_view(
                dose_type=dose_type)[self.data_tableau_table.format(
                    dose_type=dose_type)]
            for dose_type in ["New", "Full"]
        }

    def normalize(self, data: pd.DataFrame) -> pd.DataFrame:
        dataframes = []
        for dose_type in ["New", "Full"]:
            dose_data = (data[dose_type].rename(
                columns={
                    "Measure Values-alias": "value",
                    "Measure Names-alias": "variable",
                }
            ).loc[:, ["value", "variable"]].query(
                "variable in"
                "['Residents Receiving At Least 1 Dose* ', 'Fully Vaccinated Residents*']"
            ).assign(
                location=42101,
                value=lambda x: pd.to_numeric(x["value"].str.replace(",", "")),
                vintage=self._retrieve_vintage(),
            ).pipe(
                self._rename_or_add_date_and_location,
                location_column="location",
                timezone="US/Eastern",
            ))
            dataframes.append(dose_data)

        data = (self.extract_CMU(
            df=pd.concat(dataframes), cmu=self.variables).drop(
                columns={"variable"}).reset_index(drop=True))
        # break scraper if both init and completed variables are not included in data
        vars = {"total_vaccine_initiated", "total_vaccine_completed"}
        assert vars <= set(data["category"])
        return data

    # could not find a way to select the "Demographics New" dashboard tab in the usual manner,
    # so edit request body to manually select Demographic tab/sheets
    # this is the default function with only form_data["sheet_id"] altered
    def get_tableau_view(self, dose_type, url=None):
        def onAlias(it, value, cstring):
            return value[it] if (it >= 0) else cstring["dataValues"][abs(it) -
                                                                     1]

        req = requests_retry_session()
        fullURL = self.baseurl + "/views/" + self.viewPath
        reqg = req.get(
            fullURL,
            params={
                ":language": "en",
                ":display_count": "y",
                ":origin": "viz_share_link",
                ":embed": "y",
                ":showVizHome": "n",
                ":jsdebug": "y",
                ":apiID": "host4",
                "#navType": "1",
                "navSrc": "Parse",
            },
            headers={"Accept": "text/javascript"},
        )
        soup = BeautifulSoup(reqg.text, "html.parser")
        tableauTag = soup.find("textarea", {"id": "tsConfigContainer"})
        tableauData = json.loads(tableauTag.text)
        parsed_url = urllib.parse.urlparse(fullURL)
        dataUrl = f'{parsed_url.scheme}://{parsed_url.hostname}{tableauData["vizql_root"]}/bootstrapSession/sessions/{tableauData["sessionid"]}'

        # copy over some additional headers from tableauData
        form_data = {}
        form_map = {
            "sheetId": "sheet_id",
            "showParams": "showParams",
            "stickySessionKey": "stickySessionKey",
        }
        for k, v in form_map.items():
            if k in tableauData:
                form_data[v] = tableauData[k]

        # set sheet manually to access the subsheets we need
        form_data["sheet_id"] = f"Demographics {dose_type}"
        resp = req.post(
            dataUrl,
            data=form_data,
            headers={"Accept": "text/javascript"},
        )
        # Parse the response.
        # The response contains multiple chuncks of the form
        # `<size>;<json>` where `<size>` is the number of bytes in `<json>`
        resp_text = resp.text
        data = []
        while len(resp_text) != 0:
            size, rest = resp_text.split(";", 1)
            chunck = json.loads(rest[:int(size)])
            data.append(chunck)
            resp_text = rest[int(size):]

        # The following section (to the end of the method) uses code from
        # https://stackoverflow.com/questions/64094560/how-do-i-scrape-tableau-data-from-website-into-r
        presModel = data[1]["secondaryInfo"]["presModelMap"]
        metricInfo = presModel["vizData"]["presModelHolder"]
        metricInfo = metricInfo["genPresModelMapPresModel"]["presModelMap"]
        data = presModel["dataDictionary"]["presModelHolder"]
        data = data["genDataDictionaryPresModel"]["dataSegments"]["0"][
            "dataColumns"]

        scrapedData = {}

        for metric in metricInfo:
            metricsDict = metricInfo[metric]["presModelHolder"][
                "genVizDataPresModel"]
            columnsData = metricsDict["paneColumnsData"]

            result = [{
                "fieldCaption":
                t.get("fieldCaption", ""),
                "valueIndices":
                columnsData["paneColumnsList"][t["paneIndices"][0]]
                ["vizPaneColumns"][t["columnIndices"][0]]["valueIndices"],
                "aliasIndices":
                columnsData["paneColumnsList"][t["paneIndices"][0]]
                ["vizPaneColumns"][t["columnIndices"][0]]["aliasIndices"],
                "dataType":
                t.get("dataType"),
                "paneIndices":
                t["paneIndices"][0],
                "columnIndices":
                t["columnIndices"][0],
            } for t in columnsData["vizDataColumns"] if t.get("fieldCaption")]
            frameData = {}
            cstring = [t for t in data if t["dataType"] == "cstring"][0]
            for t in data:
                for index in result:
                    if t["dataType"] == index["dataType"]:
                        if len(index["valueIndices"]) > 0:
                            frameData[f'{index["fieldCaption"]}-value'] = [
                                t["dataValues"][abs(it)]
                                for it in index["valueIndices"]
                            ]
                        if len(index["aliasIndices"]) > 0:
                            frameData[f'{index["fieldCaption"]}-alias'] = [
                                onAlias(it, t["dataValues"], cstring)
                                for it in index["aliasIndices"]
                            ]

            df = pd.DataFrame.from_dict(frameData, orient="index").fillna(0).T

            scrapedData[metric] = df

        return scrapedData
コード例 #30
0
ファイル: place.py プロジェクト: districtr/districtr-api
 def lookup_state(self, data):
     state = states.lookup(data["state"])
     data["state"] = state.name
     return data
census_client = Census(open('API.txt').readline().strip())


# census package uses old endpoints for years before 2015
# it's a workaround monkeypatch
def _switch_endpoints(year):
    census_client.acs5.endpoint_url = 'https://api.census.gov/data/%s/acs/%s'
    census_client.acs5.definitions_url = 'https://api.census.gov/data/%s/acs/%s/variables.json'
    census_client.acs5.definition_url = 'https://api.census.gov/data/%s/acs/%s/variables/%s.json'
    census_client.acs5.groups_url = 'https://api.census.gov/data/%s/acs/%s/groups.json'


census_client.acs5._switch_endpoints = _switch_endpoints

state = states.lookup('Texas')

total_population = 'B01001_001E'
household_income = 'B19001_001E'
median_home_value = 'B25077_001E'
median_income_value = 'B06011_001E'

# all data below in only for "in labor force"
male_below_poverty = 'B17005_004E'
unemployed_male_below_poverty = 'B17005_006E'
female_below_poverty = 'B17005_009E'
unemployed_female_below_poverty = 'B17005_011E'
male_above_poverty = 'B17005_015E'
unemployed_male_above_poverty = 'B17005_017E'
female_above_poverty = 'B17005_020E'
unemployed_female_above_poverty = 'B17005_022E'
コード例 #32
0
def fromStateToAbbr(state):
    search = states.lookup(state)
    if search is not None:
        return search.abbr
    else:
        return np.nan