Exemple #1
0
    def _call_census(state_fip, var_ids, src, year, tabletype, api_key):
        """
        Queries census API for county-level data
            geos (list[list[str, str]]): List of state, county name pairs
            census_vars (list[dict]): List of variable specification dicts
            key (str): data.census.gov api key
        """

        # build list of var ids, and dict of id-name mappings

        county_data = censusdata.download(
            src,
            year,
            censusdata.censusgeo([("state", state_fip), ("county", "*")]),
            var_ids,
            key=api_key,
            tabletype=tabletype,
        )
        
        state_data = censusdata.download(
            src,
            year,
            censusdata.censusgeo([("state", state_fip)]),
            var_ids,
            key=api_key,
            tabletype=tabletype,
        )

        return county_data, state_data, year
def DownloadTable(year, state_num, fields, counties, geo="County"):
    """Returns a pandas dataframe containing population estimates from a list of fields, for a certain year and geography
    
    Args:
        year (int): input year
        state_num (str): state FIPS number
        fields (list): list of field IDs for ACS data
        counties (list or str): either a list containing either a list of county FIPS numbers or 'All fields'
        geo (str): Geography: County, Tract, or Block group"""
    def GetGeoArgs(geo):
        """generates the general portion of the arguments for each geograpy level"""
        if geo == "County":
            geo_arg = []

        elif geo == "Tract":
            geo_arg = [("tract", "*")]

        elif geo == "Block group":
            geo_arg = [("block group", "*")]

        return geo_arg

    if counties == "'All counties'":

        acs_df = cd.download(
            "acs5", year,
            cd.censusgeo([("state", state_num), ("county", "*")] +
                         GetGeoArgs(geo)), ["GEO_ID"] + fields)

    else:

        acs_df = pd.DataFrame(columns=["GEO_ID"] + fields)
        for county in counties:
            county = str(county).zfill(3)

            county_df = cd.download(
                "acs5", year,
                cd.censusgeo([("state", state_num), ("county", county)] +
                             GetGeoArgs(geo)), ["GEO_ID"] + fields)
            acs_df = acs_df.append(county_df)

    acs_df["Geography"] = acs_df.index.to_series()

    acs_df.rename(columns={"GEO_ID": "GEOID"}, inplace=True)
    acs_df = acs_df.set_index("GEOID")
    acs_df.columns = [
        c + "_" + str(year) for c in acs_df.columns if c not in ["Geography"]
    ] + ["Geography"]
    out_cols = ["Geography"
                ] + [c for c in acs_df.columns if c not in ["Geography"]]
    acs_df = acs_df[out_cols]
    return acs_df
	def test_download_acs5_2016(self):
		assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('state', '06'), ('place', '53000')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 412040, 'B01002_001E': 36.2, 'B19013_001E': 57778}, [censusdata.censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California')]))
		assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('state', '15'), ('county', '*')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': [193680, 986999, 91, 70447, 162456], 'B01002_001E': [41.8, 37.4, 56.5, 42.0, 40.5], 'B19013_001E': [53936, 77161, 65625, 68224, 68777]}, 
				[censusdata.censusgeo([('state', '15'), ('county', '001')], 'Hawaii County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '003')], 'Honolulu County, Hawaii'),
				censusdata.censusgeo([('state', '15'), ('county', '005')], 'Kalawao County, Hawaii'),
				censusdata.censusgeo([('state', '15'), ('county', '007')], 'Kauai County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '009')], 'Maui County, Hawaii')]))
		assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')]), ['B01001_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 1374, 'B19013_001E': 44044}, [censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')], 'Block Group 2, Census Tract 3501, Cook County, Illinois')]))
		assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')]), ['B01001_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 9528396, 'B19013_001E': 63327}, [censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')], 'Chicago-Naperville-Elgin, IL-IN-WI Metro Area')]))
		assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('state', '06')]), ['DP03_0021PE'], tabletype='profile'),
			pd.DataFrame({'DP03_0021PE': 5.2}, [censusdata.censusgeo([('state', '06')], 'California')]))
	def test_download_acs5_2015(self):
		assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '06'), ('place', '53000')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 408073, 'B01002_001E': 36.3, 'B19013_001E': 54618}, [censusdata.censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California')]))
		assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '15'), ('county', '*')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': [191482, 984178, 85, 69691, 160863], 'B01002_001E': [41.1, 36.9, 51.9, 41.6, 40], 'B19013_001E': [52108, 74460, 66250, 65101, 66476]}, 
				[censusdata.censusgeo([('state', '15'), ('county', '001')], 'Hawaii County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '003')], 'Honolulu County, Hawaii'),
				censusdata.censusgeo([('state', '15'), ('county', '005')], 'Kalawao County, Hawaii'),
				censusdata.censusgeo([('state', '15'), ('county', '007')], 'Kauai County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '009')], 'Maui County, Hawaii')]))
		assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')]), ['B01001_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 1293, 'B19013_001E': 49375}, [censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')], 'Block Group 2, Census Tract 3501, Cook County, Illinois')]))
		assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')]), ['B01001_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 9534008, 'B19013_001E': 61828}, [censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')], 'Chicago-Naperville-Elgin, IL-IN-WI Metro Area')]))
		assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '06')]), ['DP03_0021PE'], tabletype='profile'),
			pd.DataFrame({'DP03_0021PE': 5.2}, [censusdata.censusgeo([('state', '06')], 'California')]))
	def test_download_acs5_2017(self):
		assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('state', '06'), ('place', '53000')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 417442, 'B01002_001E': 36.4, 'B19013_001E': 63251}, [censusdata.censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California')]))
		assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('state', '15'), ('county', '*')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': [196325, 990060, 164094, 71093, 86], 'B01002_001E': [42.1, 37.6, 40.9, 42.1, 57.6], 'B19013_001E': [56395, 80078, 72762, 72330, 61750]}, 
				[censusdata.censusgeo([('state', '15'), ('county', '001')], 'Hawaii County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '003')], 'Honolulu County, Hawaii'),
				censusdata.censusgeo([('state', '15'), ('county', '009')], 'Maui County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '007')], 'Kauai County, Hawaii'),
				censusdata.censusgeo([('state', '15'), ('county', '005')], 'Kalawao County, Hawaii'),]))
		assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')]), ['B01001_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 1476, 'B19013_001E': 41638}, [censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')], 'Block Group 2, Census Tract 3501, Cook County, Illinois')]))
		assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')]), ['B01001_001E', 'B19013_001E']),
			pd.DataFrame({'B01001_001E': 9549229, 'B19013_001E': 65757}, [censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')], 'Chicago-Naperville-Elgin, IL-IN-WI Metro Area')]))
		assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('state', '06')]), ['DP03_0021PE'], tabletype='profile'),
			pd.DataFrame({'DP03_0021PE': 5.2}, [censusdata.censusgeo([('state', '06')], 'California')]))
def create_dataframe():
    column_names = ['total_workforce', 'total_unemployed', 'total_transpo', 'drove_alone', 'carpooled', 'total_public_transpo', 'bus', 'streetcar', 'subway', 'railroad', 'ferry', 'taxi', 'motorcycle', 'bike','walked', 'other', 'wfh']
    #descriptive column names to

    df = censusdata.download('acs5', 2015,
                                 censusdata.censusgeo([('state', '36'), ('county', '*')]),
                                 ['B23025_002E', 'B23025_005E', 'B08301_001E', 'B08301_003E', 'B08301_004E', 'B08301_010E', 'B08301_011E', 'B08301_012E', 'B08301_013E','B08301_014E', 'B08301_015E',
                                    'B08301_016E', 'B08301_017E', 'B08301_018E','B08301_019E', 'B08301_020E', 'B08301_021E'])
    #uses the censusdata package to download census information
    #the state numbers is a FIPS code that indicates New York
    #the * in county indicates all counties should be selected
    #the list of codes like 'B23025_002E' refers to specfic tables of census date that were looked up beforehand


    df = pd.DataFrame(df)
    df.columns = column_names
    df['percent_unemployed'] = df.apply(lambda row: row['total_unemployed']/row['total_workforce'] , axis = 1)
    #create percentage unemployment column
    df['percent_public_transpo'] = df.apply(lambda row: row['total_public_transpo']/row['total_transpo'] , axis = 1)
    #create percentage public transit use

    index_list = df.index.tolist()
    new_indices = []
    county_names = []

    for index in index_list:
        new_index = index.geo[0][1] + index.geo[1][1]
        new_indices.append(new_index)

        county_name = index.name.split(',')[0]
        county_names.append(county_name)
Exemple #7
0
def download_data(vars):
    """
    function to download data from the ACS website

    :param:
        geo_level (geoLevel object): which geophical granularity to obtain for the data
        vars (string): a file name that holds 3-tuples of the variables,
            (in the format returned by censusdata.search()),
            where first is the variable id, and second is the variable header.
    :return:
        a pandas.DataFrame object
    """
    gl = geoLevel(geo_level_name)
    print(f"Getting {gl.name} level geographies...")
    geographies = get_censusgeos(gl)
    vars, headers = get_variables(vars)
    data = []
    print("Downloading selected variables for these geographies...")
    for geo in tqdm(geographies):
        local_data = censusdata.download(data_source, year, geo, vars, tabletype=tabletype, key=API_KEY)
        data.append(local_data)
    data = pd.concat(data)
    data.columns = headers
    data = fix_index(data)
    return data
Exemple #8
0
def gather_census():
    years = [2013, 2014, 2015, 2016, 2017, 2018]
    tables = ['B02001_001E', 'B01002_001E', 'B02001_002E', 'B02001_003E',
            'B19013_001E', 'B25001_001E', 'B25002_003E', 'B25018_001E',
            'B25035_001E', 'B25064_001E', 'B25010_001E', 'B23007_002E',
            'B23007_001E']
    df = pd.DataFrame()
    for t in tables:
        table = pd.DataFrame()
        for y in years:
            temp = pd.DataFrame(censusdata.download(
                    "acs5", y, censusdata.censusgeo(
                    [("state", "17"), ("county", "031"), ("block group", "*")]),
                    [t, "GEO_ID"])).reset_index(drop=True)
            temp['Year'] = y
            table = table.append(temp)
            
        if df.shape[0] == 0:
            df = table
        else:
            df = pd.merge(df, table, how="inner", on=["GEO_ID", "Year"])
    df = df.rename(columns={"B02001_001E": "Total Population", "B01002_001E": "Median Age",
                    "B02001_002E": "Total White", "B02001_003E": "Total Black",
                    "B19013_001E": "Median HH Income", "B25001_001E": "Total Housing Units",
                    "B25002_003E": "Total Vacant Units", "B25018_001E": "Median Number of Rooms",
                    "B25035_001E": "Median Year Built", "B25064_001E": "Median Gross Rent",
                    "B25010_001E": "Mean HH Size", "B23007_002E": "HH with Children",
                    "B23007_001E": "Total HH"})
    print(df.head())
    return df
def read_acs_data():
    '''
    Read in census data via censusdata api
    '''
    census_tables = {
        'B02001_001E': 'Race Total',
        'B02001_002E': 'White',
        'B02001_003E': 'Black',
        'B19013_001E': 'Median Income',
        'B01002A_001E': "White_Age",
        'B01002B_001E': "Black_Age"
    }
    keys = list(census_tables.keys())
    acs_df = censusdata.download('acs5', 2018,
                                 censusdata.censusgeo([('state', '*')]), keys)
    acs_df.rename(columns=census_tables, inplace=True)
    acs_df["black_pct"] = acs_df["Black"] / acs_df["Race Total"]
    acs_df["white_pct"] = acs_df["White"] / acs_df["Race Total"]
    acs_df["age"] = (acs_df["White_Age"] + acs_df["Black_Age"]) / 2
    acs_df = acs_df.reset_index()
    acs_df["State"] = acs_df["index"].apply(
        lambda x: str(x)[:str(x).index(":")])
    acs_df = acs_df[[
        "State", "Median Income", "black_pct", "white_pct", "age"
    ]]
    return acs_df
def make_api_call(varlist, survey, year, geo, tabletype="detail"):
    ''' Takes list of variables, survey to query, year to query, 
        and list of tuples specifying geography
        Returns: data frame with resulting data
    '''

    dfs = []

    # API accepts maximum 50 variable requests at a time
    i = 0
    for j in range(49, len(varlist) + 50, 50):

        try:
            dfs.append(
                census.download(survey,
                                year,
                                census.censusgeo(geo),
                                list(islice(varlist, i, j)),
                                tabletype=tabletype).reset_index())
            i = j

        except Exception as e:
            print("API call failed")
            print(e)

    return reduce(lambda x, y: pd.merge(x, y, on="index"), dfs)
def create_dataframe():

    df = censusdata.download(
        'acs5', 2015, censusdata.censusgeo([('state', '36'), ('county', '*')]),
        ['B08301_001E', 'B08301_010E'])
    df = pd.DataFrame(df)
    #download relevant data and store it in a dataframe

    df.columns = ['total_transpo', 'total_public_transpo']
    #set column names

    df['percent_public_transpo'] = df.apply(lambda row: round(
        row['total_public_transpo'] / row['total_transpo'] * 100, 2),
                                            axis=1)
    #define a new column that represents the % of public transportation used

    new_indices = []
    county_names = []
    for index in df.index.tolist():
        new_index = index.geo[0][1] + index.geo[1][1]
        new_indices.append(new_index)

        county_name = index.name.split(',')[0]
        county_names.append(county_name)
    #loop through the indices to extract coherent FIPS ids to use as better indices
    df.index = new_indices
    #set new indices
    df['county_name'] = county_names
    #and add a county name column

    return df
Exemple #12
0
def fetch_acs(year, acs_dataset, state, county, table, columns):
    """
    Internal function to hit the CENSUS api and extract a pandas DataFrame for
    the requested Table, State, County

    Args:
        year (int): year of interest
        acs_dataset (str): Census data source: 'acs1' for ACS 1-year estimates, 'acs5' for ACS 5-year estimates,
            'acs3' for ACS 3-year estimates, 'acsse' for ACS 1-year supplemental estimates, 'sf1' for SF1 data.
        state (str): two letter state abbreviation
        county (str): three digit FIPS code as a string
        table (str): string code for the Census table of interest ex: "B03002"
        columns (dict): key, value pairs of Census table columns and rename
            (ex: {"002E": "Total_Non_Hisp", "012E": "Total_Hispanic")

    Returns:
        pandas.DataFrame: Data frame with columns corresponding to designated variables, and row
            index of censusgeo objects representing Census geographies.
    """
    variables = [f"{table}_{c}" for c in list(columns.keys())]
    # Reconstruct dictionary with explicit ordering
    values = [columns[c.split("_")[1]] for c in variables]
    rename = dict(zip(variables, values))
    # Set the geography object
    geo = census.censusgeo([("state", state), ("county", county)])
    # Fetch data
    data = census.download(src=acs_dataset, year=year, geo=geo, var=variables)
    # Rename columns
    data.rename(columns=rename, inplace=True)
    return data
def get_data(fips_dict, table_code, start_shell, end_shell, name):
    shells = createShells(table_code, start_shell, end_shell)
    raw_data = pd.DataFrame()
    for data in fips_dict.values():
        to_append = censusdata.download('acs5', 2013, censusdata.censusgeo([('state', data['state']), ('county', data['county'])]), shells)
        raw_data = raw_data.append(to_append)
    raw_data.to_csv('raw_' + name + '.csv')
Exemple #14
0
def make_dists(acs_tables):

    tables = []

    for k, v in acs_tables.items():
        tables.append(v)

    #censusdata.printtable(censusdata.censustable('acs5', '2015', 'B16009'))

    dists = pd.DataFrame
    state_df = []

    state_FIPS = [
        1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23,
        24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
        42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56
    ]

    for st in state_FIPS:
        state_df.append(
            censusdata.download(
                'acs5', '2015',
                censusdata.censusgeo([('state', str(st)),
                                      ('congressional district', '*')]),
                tables))

    dists = pd.concat(state_df[x] for x in range(50))

    return dists
def main():
    

    pd.set_option('display.expand_frame_repr', False)
    pd.set_option('display.precision', 2)



    #to download we must identify the tables containing the variables interest to us.
    #use ACS documentation, in particular Table Shells (https://www.census.gov/programs-surveys/acs/technical-documentation/summary-file-documentation.html)
    #can use cenusdata.search to find given text patterns. We can limit the output to the relevenant variables

    censusdata.search('acs5', 2015, 'label', 'unemploy')[160:170]
    censusdata.search('acs5', 2015, 'concept', 'education')[730:790]



    #using censusdata.printtable to show vars in table

    censusdata.printtable(censusdata.censustable('acs5', 2015, 'B23025'))
    censusdata.printtable(censusdata.censustable('acs5', 2015, 'B15003'))



    #after getting relevant variables, we need to identify the geographies.
    #we are going to get block groups in Cook County IL
    #1. look for FIPS code
    #2. find identifiers for all counties within IL to find Cook

    #1
    #print(censusdata.geographies(censusdata.censusgeo([('state','*')]), 'acs5', 2015)) #IL is 17

    #2
    #print(censusdata.geographies(censusdata.censusgeo([('state','17'), ('county', '*')]), 'acs5', 2015)) #cook is 031




    #once we have identified variables and geos of interest,
    #we can download the data using censusdata.download. compute variables for the percent unemployed and the percent w no hs degree

    cook_cnty = censusdata.download('acs5', 2015, censusdata.censusgeo([('state','17'), ('county','031'), ('block group','*')]), ['B23025_003E', 'B23025_005E', 'B15003_001E', 'B15003_002E', 'B15003_003E','B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', 'B15003_008E','B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E','B15003_014E', 'B15003_015E', 'B15003_016E'])
    cook_cnty['percent_unemployed'] = cook_cnty.B23025_005E / cook_cnty.B23025_003E * 100

    cook_cnty['percent_nohs'] = (cook_cnty.B15003_002E + cook_cnty.B15003_003E + cook_cnty.B15003_004E + cook_cnty.B15003_005E + cook_cnty.B15003_006E + cook_cnty.B15003_007E + cook_cnty.B15003_008E + cook_cnty.B15003_009E + cook_cnty.B15003_010E + cook_cnty.B15003_011E + cook_cnty.B15003_012E + cook_cnty.B15003_013E + cook_cnty.B15003_014E + cook_cnty.B15003_015E + cook_cnty.B15003_016E) / cook_cnty.B15003_001E * 100



    cook_cnty = cook_cnty[['percent_unemployed', 'percent_nohs']]
    print(cook_cnty.describe())


    #to show the 30 block groups in cook w highest rate of unemployment and the percent w no hs degree
    print(cook_cnty.sort_values('percent_unemployed', ascending=False).head(30))

    #show correlation
    print(cook_cnty.corr())

    censusdata.exportcsv('cook_data.csv', cook_cnty)
 def test_download_acs1_2015(self):
     assert_frame_equal(
         censusdata.download('acs1', 2015,
                             censusdata.censusgeo([('state', '17')]),
                             ['B19013_001E']),
         pd.DataFrame(
             {'B19013_001E': 59588},
             [censusdata.censusgeo([('state', '17')], 'Illinois')]))
 def test_download_sf1_2010(self):
     assert_frame_equal(
         censusdata.download(
             'sf1', 2010,
             censusdata.censusgeo([('state', '17'), ('place', '14000')]),
             ['P001001']),
         pd.DataFrame(
             {'P001001': 2695598},
             [censusdata.censusgeo([('state', '17'), ('place', '14000')])]))
def create_census_datapull(table_name, items, year):
    output_list = []
    for item in items:
        item_exam = str(item)
        output_list.append(create_census_value(table_name, item_exam))

    new_data = censusdata.download('acs1', year,
                                   censusdata.censusgeo([('county', '*')]),
                                   output_list)
    return new_data
 def test_download_acsse(self):
     nocomputer = {2014: 731135, 2015: 658047, 2016: 522736, 2017: 464053}
     for year in range(2014, 2017 + 1):
         assert_frame_equal(
             censusdata.download('acsse', year,
                                 censusdata.censusgeo([('state', '17')]),
                                 ['K202801_006E']),
             pd.DataFrame(
                 {'K202801_006E': nocomputer[year]},
                 [censusdata.censusgeo([('state', '17')], 'Illinois')]))
 def test_download_acs3_detail(self):
     medhhinc = {2012: 55231, 2013: 55799}
     for year in medhhinc:
         assert_frame_equal(
             censusdata.download('acs3', year,
                                 censusdata.censusgeo([('state', '17')]),
                                 ['B19013_001E']),
             pd.DataFrame(
                 {'B19013_001E': medhhinc[year]},
                 [censusdata.censusgeo([('state', '17')], 'Illinois')]))
 def test_download_acs1_201214(self):
     medhhinc = {2012: 55137, 2013: 56210, 2014: 57444}
     for year in range(2012, 2014 + 1):
         assert_frame_equal(
             censusdata.download('acs1', year,
                                 censusdata.censusgeo([('state', '17')]),
                                 ['B19013_001E']),
             pd.DataFrame(
                 {'B19013_001E': medhhinc[year]},
                 [censusdata.censusgeo([('state', '17')], 'Illinois')]))
Exemple #22
0
def get_acs_5_data(year, state, data_aliases):
    '''
    Get American Community Survey 5-year data at block group level

    Inputs:
        year (integer): year from which to pull data
        state (string): encoding of state for which to pull data
        data_aliases (dictionary; keys and values both strings): mapping of
            encoded data columns to pull from ACS with their descriptive names.
            Note that these descriptive names will be the headers of the output
            DataFrame. See below links for 2018 column encodings:
            https://api.census.gov/data/2018/acs/acs5/variables.html
            https://api.census.gov/data/2018/acs/acs1/variables.html
            https://api.census.gov/data/2018/acs/acsse/variables.html

    (For more information on Census geographies, please visit this link:
        https://www.census.gov/data/developers/geography.html)

    Output:
        A pandas dataframe with ACS data
    '''
    # Initialize dataframe
    if data_aliases:
        results_df = pd.DataFrame(columns=data_aliases.values())
    else:
        results_df = pd.DataFrame(columns=data_columns)

    # print("Data columns are...", data_aliases.keys())

    results_df['year'] = ""

    # Get Census data and load into dataframe
    geographies = censusdata.geographies(
        censusdata.censusgeo([('state', state), ('county', '*')]), 'acs5',
        year)

    for v in list(geographies.values()):
        ((_, _), (_, county_code)) = v.params()

        df = censusdata.download(
            "acs5",
            year,
            censusdata.censusgeo([("state", state), ("county", county_code),
                                  ("tract", "*")]),
            list(data_aliases.keys()),
            key="e62f1cebce1c8d3afece25fc491fbec7271a588b").reset_index()

        df = df.rename(columns=data_aliases)
        df['year'] = year

        results_df = results_df.append(df, ignore_index=True)

    results_df = results_df.infer_objects()

    return results_df
def block_pull(state_id, variable_list):
    c_list = county_list(state_id)
    for i in range(0, len(c_list)):
        geo = censusdata.censusgeo([('state', state_id), ('county', c_list[i]),
                                    ('tract', '*'), ('block group', '*')])
        county_df = censusdata.download('acs5', 2018, geo, variable_list)
        if i == 0:
            data = county_df
        else:
            data = pd.concat([data, county_df])
    return data
def DownloadTable(year, fields, counties="'All counties'", geo="County"):
    """Returns a pandas dataframe containing population estimates from a list of fields, for a certain year and geography
    
    Parameters:
        year (int): input year
        fields (list): list of field IDs for ACS data
        counties (list or str): either a list containing either a list of county FIPS numbers or 'All fields'
        geo (str): Geography: County, Tract, or Block group"""
    def GetGeoArgs(geo):

        if geo == "County":
            geo_arg = []

        elif geo == "Tract":
            geo_arg = [("tract", "*")]

        elif geo == "Block group":
            geo_arg = [("block group", "*")]

        return geo_arg

    if counties == "'All counties'":

        acs_df = cd.download(
            "acs5", year,
            cd.censusgeo([("state", "47"), ("county", "*")] + GetGeoArgs(geo)),
            ["GEO_ID"] + fields)

    else:

        acs_df = pd.DataFrame(columns=["GEO_ID"] + fields)
        for county in counties:
            county = str(county).zfill(3)

            county_df = cd.download(
                "acs5", year,
                cd.censusgeo([("state", "47"), ("county", county)] +
                             GetGeoArgs(geo)), ["GEO_ID"] + fields)
            acs_df = acs_df.append(county_df)

    return acs_df
 def test_download_acs3_profile(self):
     insured = {2012: 78.3, 2013: 78.5}
     for year in insured:
         assert_frame_equal(
             censusdata.download('acs3',
                                 year,
                                 censusdata.censusgeo([('state', '17')]),
                                 ['DP03_0115PE'],
                                 tabletype='profile'),
             pd.DataFrame(
                 {'DP03_0115PE': insured[year]},
                 [censusdata.censusgeo([('state', '17')], 'Illinois')]))
Exemple #26
0
def getAllCounties():
    states = censusdata.geographies(censusdata.censusgeo([('state', '*')]),
                                    'acs5', 2018)

    all_states = pd.DataFrame()

    # For every state, get all counties
    for state in states:
        print("getting: ", state)
        state_fips = states[state].geo[0][1]
        counties = censusdata.geographies(
            censusdata.censusgeo([('state', state_fips), ('county', '*')]),
            'acs5', 2018)

        subject_data = censusdata.download('acs5',
                                           2018,
                                           censusdata.censusgeo([
                                               ('state', state_fips),
                                               ('county', '*')
                                           ]),
                                           list(subject_tables.keys()),
                                           tabletype='subject').reset_index()
        detail_data = censusdata.download(
            'acs5', 2018,
            censusdata.censusgeo([('state', state_fips), ('county', '*')]),
            list(detail_tables.keys())).reset_index()

        # Get correct fips for index
        subject_data['index'] = subject_data.apply(
            lambda row: getFips(row['index']), axis=1)
        detail_data['index'] = detail_data.apply(
            lambda row: getFips(row['index']), axis=1)

        # Join the tables and add to master table
        data = detail_data.merge(subject_data)
        all_states = pd.concat([all_states, data])

    # Set column names to human readable names
    all_states.set_axis(columns, axis=1, inplace=True)
    return all_states
Exemple #27
0
def get_state_data(state_fips):
    geo = censusdata.censusgeo([
        ("state", str(state_fips).zfill(2)),
        ("county", "*"),
        ("tract", "*"),
        ("block", "*"),
    ])
    return censusdata.download(
        "sf1",
        2010,
        geo,
        ["STATE", "COUNTY", "TRACT", "BLOCK", "P001001"],
    )
Exemple #28
0
def get_census_data():
    ''' pull down ACS data for Cook County IL on race, income, unemployment, single-parent families, edu attainment
      tables B02001 (race), B19013 (median household income), B11001 (household type), B15003 (edu) '''

    # construct lists of variables for the API query
    hhold_vars = list(map(lambda x: 'B11001_00' + str(x) + 'E', range(1, 10)))
    race_vars = list(map(lambda x: 'B03002_' + str(x).zfill(3) + 'E', range(1, 21)))
    edu_vars = list(map(lambda x: 'B15003_' + str(x).zfill(3) + 'E', range(1, 26)))


    # make the query for all block groups in Cook County, in two chunks because the API only takes 
    data = census.download('acs5', 2017,
        census.censusgeo([('state', '17'), ('county', '031'), ('block group', '*')]),
        hhold_vars + race_vars).reset_index()
    edu = census.download('acs5', 2017,
        census.censusgeo([('state', '17'), ('county', '031'), ('block group', '*')]),
        edu_vars + ['B19013_001E']).reset_index()
    edu['pct_nohs'] = edu.iloc[:,HSBEGIN:HSEND].sum(axis=1) / edu['B15003_001E']
    edu['pct_BA'] = edu.iloc[:,BABEGIN:EDUEND].sum(axis=1) / edu['B15003_001E']

    data = pd.merge(data, edu, on='index')
    # print(data.head())

    # create unique FIPS ID
    data['bg'] = data['index'].apply(lambda x: '17031' + x.geo[TRACTCODE][1] + x.geo[BLKGRPCODE][1])

    # compute variables of interest
    data['pct_1parent'] = data['B11001_004E'] / data['B11001_001E']
    data['pct_alone'] = data['B11001_008E'] / data['B11001_001E']
    data['pct_white'] = data['B03002_003E'] / data['B03002_001E']
    data['pct_black'] = data['B03002_004E'] / data['B03002_001E']
    data['pct_hisp'] = data['B03002_012E'] / data['B03002_001E']
    
    data['medinc'] = data['B19013_001E']

    rv = data[['bg', 'pct_1parent', 'pct_alone', 'pct_white', 'pct_black', 'pct_hisp', 'B15003_001E', 'pct_nohs', 'pct_BA', 'medinc']]
    print(rv.head())

    return rv
Exemple #29
0
def downloadBlockgroupData(state, year, key, tableIDs):
    """Download ACS 5-year data at the block group level."""
    newTable = pd.DataFrame()
    allCounties = getAllCounties(state, year, key)
    for county in allCounties:
        table = censusdata.download('acs5',
                                    year,
                                    censusdata.censusgeo(
                                        list(allCounties[county].geo) +
                                        [('block group', '*')]),
                                    tableIDs,
                                    key=key)
        newTable = newTable.append(table)
    return newTable
Exemple #30
0
    def _get_acs_dfs(self, tables, **kwargs):
        """Get American Community Survey data"""

        hierarchy = self._parse_hierarchy(kwargs)

        df = censusdata.download(
            self.survey,
            self.year,
            censusdata.censusgeo(hierarchy),
            tables,
        )

        df = self._parse_geo_index(df)

        return df