def _call_census(state_fip, var_ids, src, year, tabletype, api_key): """ Queries census API for county-level data geos (list[list[str, str]]): List of state, county name pairs census_vars (list[dict]): List of variable specification dicts key (str): data.census.gov api key """ # build list of var ids, and dict of id-name mappings county_data = censusdata.download( src, year, censusdata.censusgeo([("state", state_fip), ("county", "*")]), var_ids, key=api_key, tabletype=tabletype, ) state_data = censusdata.download( src, year, censusdata.censusgeo([("state", state_fip)]), var_ids, key=api_key, tabletype=tabletype, ) return county_data, state_data, year
def DownloadTable(year, state_num, fields, counties, geo="County"): """Returns a pandas dataframe containing population estimates from a list of fields, for a certain year and geography Args: year (int): input year state_num (str): state FIPS number fields (list): list of field IDs for ACS data counties (list or str): either a list containing either a list of county FIPS numbers or 'All fields' geo (str): Geography: County, Tract, or Block group""" def GetGeoArgs(geo): """generates the general portion of the arguments for each geograpy level""" if geo == "County": geo_arg = [] elif geo == "Tract": geo_arg = [("tract", "*")] elif geo == "Block group": geo_arg = [("block group", "*")] return geo_arg if counties == "'All counties'": acs_df = cd.download( "acs5", year, cd.censusgeo([("state", state_num), ("county", "*")] + GetGeoArgs(geo)), ["GEO_ID"] + fields) else: acs_df = pd.DataFrame(columns=["GEO_ID"] + fields) for county in counties: county = str(county).zfill(3) county_df = cd.download( "acs5", year, cd.censusgeo([("state", state_num), ("county", county)] + GetGeoArgs(geo)), ["GEO_ID"] + fields) acs_df = acs_df.append(county_df) acs_df["Geography"] = acs_df.index.to_series() acs_df.rename(columns={"GEO_ID": "GEOID"}, inplace=True) acs_df = acs_df.set_index("GEOID") acs_df.columns = [ c + "_" + str(year) for c in acs_df.columns if c not in ["Geography"] ] + ["Geography"] out_cols = ["Geography" ] + [c for c in acs_df.columns if c not in ["Geography"]] acs_df = acs_df[out_cols] return acs_df
def test_download_acs5_2016(self): assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('state', '06'), ('place', '53000')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 412040, 'B01002_001E': 36.2, 'B19013_001E': 57778}, [censusdata.censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California')])) assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('state', '15'), ('county', '*')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': [193680, 986999, 91, 70447, 162456], 'B01002_001E': [41.8, 37.4, 56.5, 42.0, 40.5], 'B19013_001E': [53936, 77161, 65625, 68224, 68777]}, [censusdata.censusgeo([('state', '15'), ('county', '001')], 'Hawaii County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '003')], 'Honolulu County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '005')], 'Kalawao County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '007')], 'Kauai County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '009')], 'Maui County, Hawaii')])) assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')]), ['B01001_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 1374, 'B19013_001E': 44044}, [censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')], 'Block Group 2, Census Tract 3501, Cook County, Illinois')])) assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')]), ['B01001_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 9528396, 'B19013_001E': 63327}, [censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')], 'Chicago-Naperville-Elgin, IL-IN-WI Metro Area')])) assert_frame_equal(censusdata.download('acs5', 2016, censusdata.censusgeo([('state', '06')]), ['DP03_0021PE'], tabletype='profile'), pd.DataFrame({'DP03_0021PE': 5.2}, [censusdata.censusgeo([('state', '06')], 'California')]))
def test_download_acs5_2015(self): assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '06'), ('place', '53000')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 408073, 'B01002_001E': 36.3, 'B19013_001E': 54618}, [censusdata.censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California')])) assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '15'), ('county', '*')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': [191482, 984178, 85, 69691, 160863], 'B01002_001E': [41.1, 36.9, 51.9, 41.6, 40], 'B19013_001E': [52108, 74460, 66250, 65101, 66476]}, [censusdata.censusgeo([('state', '15'), ('county', '001')], 'Hawaii County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '003')], 'Honolulu County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '005')], 'Kalawao County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '007')], 'Kauai County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '009')], 'Maui County, Hawaii')])) assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')]), ['B01001_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 1293, 'B19013_001E': 49375}, [censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')], 'Block Group 2, Census Tract 3501, Cook County, Illinois')])) assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')]), ['B01001_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 9534008, 'B19013_001E': 61828}, [censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')], 'Chicago-Naperville-Elgin, IL-IN-WI Metro Area')])) assert_frame_equal(censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '06')]), ['DP03_0021PE'], tabletype='profile'), pd.DataFrame({'DP03_0021PE': 5.2}, [censusdata.censusgeo([('state', '06')], 'California')]))
def test_download_acs5_2017(self): assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('state', '06'), ('place', '53000')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 417442, 'B01002_001E': 36.4, 'B19013_001E': 63251}, [censusdata.censusgeo([('state', '06'), ('place', '53000')], 'Oakland city, California')])) assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('state', '15'), ('county', '*')]), ['B01001_001E', 'B01002_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': [196325, 990060, 164094, 71093, 86], 'B01002_001E': [42.1, 37.6, 40.9, 42.1, 57.6], 'B19013_001E': [56395, 80078, 72762, 72330, 61750]}, [censusdata.censusgeo([('state', '15'), ('county', '001')], 'Hawaii County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '003')], 'Honolulu County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '009')], 'Maui County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '007')], 'Kauai County, Hawaii'), censusdata.censusgeo([('state', '15'), ('county', '005')], 'Kalawao County, Hawaii'),])) assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')]), ['B01001_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 1476, 'B19013_001E': 41638}, [censusdata.censusgeo([('state', '17'), ('county', '031'), ('tract', '350100'), ('block group', '2')], 'Block Group 2, Census Tract 3501, Cook County, Illinois')])) assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')]), ['B01001_001E', 'B19013_001E']), pd.DataFrame({'B01001_001E': 9549229, 'B19013_001E': 65757}, [censusdata.censusgeo([('metropolitan statistical area/micropolitan statistical area', '16980')], 'Chicago-Naperville-Elgin, IL-IN-WI Metro Area')])) assert_frame_equal(censusdata.download('acs5', 2017, censusdata.censusgeo([('state', '06')]), ['DP03_0021PE'], tabletype='profile'), pd.DataFrame({'DP03_0021PE': 5.2}, [censusdata.censusgeo([('state', '06')], 'California')]))
def create_dataframe(): column_names = ['total_workforce', 'total_unemployed', 'total_transpo', 'drove_alone', 'carpooled', 'total_public_transpo', 'bus', 'streetcar', 'subway', 'railroad', 'ferry', 'taxi', 'motorcycle', 'bike','walked', 'other', 'wfh'] #descriptive column names to df = censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '36'), ('county', '*')]), ['B23025_002E', 'B23025_005E', 'B08301_001E', 'B08301_003E', 'B08301_004E', 'B08301_010E', 'B08301_011E', 'B08301_012E', 'B08301_013E','B08301_014E', 'B08301_015E', 'B08301_016E', 'B08301_017E', 'B08301_018E','B08301_019E', 'B08301_020E', 'B08301_021E']) #uses the censusdata package to download census information #the state numbers is a FIPS code that indicates New York #the * in county indicates all counties should be selected #the list of codes like 'B23025_002E' refers to specfic tables of census date that were looked up beforehand df = pd.DataFrame(df) df.columns = column_names df['percent_unemployed'] = df.apply(lambda row: row['total_unemployed']/row['total_workforce'] , axis = 1) #create percentage unemployment column df['percent_public_transpo'] = df.apply(lambda row: row['total_public_transpo']/row['total_transpo'] , axis = 1) #create percentage public transit use index_list = df.index.tolist() new_indices = [] county_names = [] for index in index_list: new_index = index.geo[0][1] + index.geo[1][1] new_indices.append(new_index) county_name = index.name.split(',')[0] county_names.append(county_name)
def download_data(vars): """ function to download data from the ACS website :param: geo_level (geoLevel object): which geophical granularity to obtain for the data vars (string): a file name that holds 3-tuples of the variables, (in the format returned by censusdata.search()), where first is the variable id, and second is the variable header. :return: a pandas.DataFrame object """ gl = geoLevel(geo_level_name) print(f"Getting {gl.name} level geographies...") geographies = get_censusgeos(gl) vars, headers = get_variables(vars) data = [] print("Downloading selected variables for these geographies...") for geo in tqdm(geographies): local_data = censusdata.download(data_source, year, geo, vars, tabletype=tabletype, key=API_KEY) data.append(local_data) data = pd.concat(data) data.columns = headers data = fix_index(data) return data
def gather_census(): years = [2013, 2014, 2015, 2016, 2017, 2018] tables = ['B02001_001E', 'B01002_001E', 'B02001_002E', 'B02001_003E', 'B19013_001E', 'B25001_001E', 'B25002_003E', 'B25018_001E', 'B25035_001E', 'B25064_001E', 'B25010_001E', 'B23007_002E', 'B23007_001E'] df = pd.DataFrame() for t in tables: table = pd.DataFrame() for y in years: temp = pd.DataFrame(censusdata.download( "acs5", y, censusdata.censusgeo( [("state", "17"), ("county", "031"), ("block group", "*")]), [t, "GEO_ID"])).reset_index(drop=True) temp['Year'] = y table = table.append(temp) if df.shape[0] == 0: df = table else: df = pd.merge(df, table, how="inner", on=["GEO_ID", "Year"]) df = df.rename(columns={"B02001_001E": "Total Population", "B01002_001E": "Median Age", "B02001_002E": "Total White", "B02001_003E": "Total Black", "B19013_001E": "Median HH Income", "B25001_001E": "Total Housing Units", "B25002_003E": "Total Vacant Units", "B25018_001E": "Median Number of Rooms", "B25035_001E": "Median Year Built", "B25064_001E": "Median Gross Rent", "B25010_001E": "Mean HH Size", "B23007_002E": "HH with Children", "B23007_001E": "Total HH"}) print(df.head()) return df
def read_acs_data(): ''' Read in census data via censusdata api ''' census_tables = { 'B02001_001E': 'Race Total', 'B02001_002E': 'White', 'B02001_003E': 'Black', 'B19013_001E': 'Median Income', 'B01002A_001E': "White_Age", 'B01002B_001E': "Black_Age" } keys = list(census_tables.keys()) acs_df = censusdata.download('acs5', 2018, censusdata.censusgeo([('state', '*')]), keys) acs_df.rename(columns=census_tables, inplace=True) acs_df["black_pct"] = acs_df["Black"] / acs_df["Race Total"] acs_df["white_pct"] = acs_df["White"] / acs_df["Race Total"] acs_df["age"] = (acs_df["White_Age"] + acs_df["Black_Age"]) / 2 acs_df = acs_df.reset_index() acs_df["State"] = acs_df["index"].apply( lambda x: str(x)[:str(x).index(":")]) acs_df = acs_df[[ "State", "Median Income", "black_pct", "white_pct", "age" ]] return acs_df
def make_api_call(varlist, survey, year, geo, tabletype="detail"): ''' Takes list of variables, survey to query, year to query, and list of tuples specifying geography Returns: data frame with resulting data ''' dfs = [] # API accepts maximum 50 variable requests at a time i = 0 for j in range(49, len(varlist) + 50, 50): try: dfs.append( census.download(survey, year, census.censusgeo(geo), list(islice(varlist, i, j)), tabletype=tabletype).reset_index()) i = j except Exception as e: print("API call failed") print(e) return reduce(lambda x, y: pd.merge(x, y, on="index"), dfs)
def create_dataframe(): df = censusdata.download( 'acs5', 2015, censusdata.censusgeo([('state', '36'), ('county', '*')]), ['B08301_001E', 'B08301_010E']) df = pd.DataFrame(df) #download relevant data and store it in a dataframe df.columns = ['total_transpo', 'total_public_transpo'] #set column names df['percent_public_transpo'] = df.apply(lambda row: round( row['total_public_transpo'] / row['total_transpo'] * 100, 2), axis=1) #define a new column that represents the % of public transportation used new_indices = [] county_names = [] for index in df.index.tolist(): new_index = index.geo[0][1] + index.geo[1][1] new_indices.append(new_index) county_name = index.name.split(',')[0] county_names.append(county_name) #loop through the indices to extract coherent FIPS ids to use as better indices df.index = new_indices #set new indices df['county_name'] = county_names #and add a county name column return df
def fetch_acs(year, acs_dataset, state, county, table, columns): """ Internal function to hit the CENSUS api and extract a pandas DataFrame for the requested Table, State, County Args: year (int): year of interest acs_dataset (str): Census data source: 'acs1' for ACS 1-year estimates, 'acs5' for ACS 5-year estimates, 'acs3' for ACS 3-year estimates, 'acsse' for ACS 1-year supplemental estimates, 'sf1' for SF1 data. state (str): two letter state abbreviation county (str): three digit FIPS code as a string table (str): string code for the Census table of interest ex: "B03002" columns (dict): key, value pairs of Census table columns and rename (ex: {"002E": "Total_Non_Hisp", "012E": "Total_Hispanic") Returns: pandas.DataFrame: Data frame with columns corresponding to designated variables, and row index of censusgeo objects representing Census geographies. """ variables = [f"{table}_{c}" for c in list(columns.keys())] # Reconstruct dictionary with explicit ordering values = [columns[c.split("_")[1]] for c in variables] rename = dict(zip(variables, values)) # Set the geography object geo = census.censusgeo([("state", state), ("county", county)]) # Fetch data data = census.download(src=acs_dataset, year=year, geo=geo, var=variables) # Rename columns data.rename(columns=rename, inplace=True) return data
def get_data(fips_dict, table_code, start_shell, end_shell, name): shells = createShells(table_code, start_shell, end_shell) raw_data = pd.DataFrame() for data in fips_dict.values(): to_append = censusdata.download('acs5', 2013, censusdata.censusgeo([('state', data['state']), ('county', data['county'])]), shells) raw_data = raw_data.append(to_append) raw_data.to_csv('raw_' + name + '.csv')
def make_dists(acs_tables): tables = [] for k, v in acs_tables.items(): tables.append(v) #censusdata.printtable(censusdata.censustable('acs5', '2015', 'B16009')) dists = pd.DataFrame state_df = [] state_FIPS = [ 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56 ] for st in state_FIPS: state_df.append( censusdata.download( 'acs5', '2015', censusdata.censusgeo([('state', str(st)), ('congressional district', '*')]), tables)) dists = pd.concat(state_df[x] for x in range(50)) return dists
def main(): pd.set_option('display.expand_frame_repr', False) pd.set_option('display.precision', 2) #to download we must identify the tables containing the variables interest to us. #use ACS documentation, in particular Table Shells (https://www.census.gov/programs-surveys/acs/technical-documentation/summary-file-documentation.html) #can use cenusdata.search to find given text patterns. We can limit the output to the relevenant variables censusdata.search('acs5', 2015, 'label', 'unemploy')[160:170] censusdata.search('acs5', 2015, 'concept', 'education')[730:790] #using censusdata.printtable to show vars in table censusdata.printtable(censusdata.censustable('acs5', 2015, 'B23025')) censusdata.printtable(censusdata.censustable('acs5', 2015, 'B15003')) #after getting relevant variables, we need to identify the geographies. #we are going to get block groups in Cook County IL #1. look for FIPS code #2. find identifiers for all counties within IL to find Cook #1 #print(censusdata.geographies(censusdata.censusgeo([('state','*')]), 'acs5', 2015)) #IL is 17 #2 #print(censusdata.geographies(censusdata.censusgeo([('state','17'), ('county', '*')]), 'acs5', 2015)) #cook is 031 #once we have identified variables and geos of interest, #we can download the data using censusdata.download. compute variables for the percent unemployed and the percent w no hs degree cook_cnty = censusdata.download('acs5', 2015, censusdata.censusgeo([('state','17'), ('county','031'), ('block group','*')]), ['B23025_003E', 'B23025_005E', 'B15003_001E', 'B15003_002E', 'B15003_003E','B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', 'B15003_008E','B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E','B15003_014E', 'B15003_015E', 'B15003_016E']) cook_cnty['percent_unemployed'] = cook_cnty.B23025_005E / cook_cnty.B23025_003E * 100 cook_cnty['percent_nohs'] = (cook_cnty.B15003_002E + cook_cnty.B15003_003E + cook_cnty.B15003_004E + cook_cnty.B15003_005E + cook_cnty.B15003_006E + cook_cnty.B15003_007E + cook_cnty.B15003_008E + cook_cnty.B15003_009E + cook_cnty.B15003_010E + cook_cnty.B15003_011E + cook_cnty.B15003_012E + cook_cnty.B15003_013E + cook_cnty.B15003_014E + cook_cnty.B15003_015E + cook_cnty.B15003_016E) / cook_cnty.B15003_001E * 100 cook_cnty = cook_cnty[['percent_unemployed', 'percent_nohs']] print(cook_cnty.describe()) #to show the 30 block groups in cook w highest rate of unemployment and the percent w no hs degree print(cook_cnty.sort_values('percent_unemployed', ascending=False).head(30)) #show correlation print(cook_cnty.corr()) censusdata.exportcsv('cook_data.csv', cook_cnty)
def test_download_acs1_2015(self): assert_frame_equal( censusdata.download('acs1', 2015, censusdata.censusgeo([('state', '17')]), ['B19013_001E']), pd.DataFrame( {'B19013_001E': 59588}, [censusdata.censusgeo([('state', '17')], 'Illinois')]))
def test_download_sf1_2010(self): assert_frame_equal( censusdata.download( 'sf1', 2010, censusdata.censusgeo([('state', '17'), ('place', '14000')]), ['P001001']), pd.DataFrame( {'P001001': 2695598}, [censusdata.censusgeo([('state', '17'), ('place', '14000')])]))
def create_census_datapull(table_name, items, year): output_list = [] for item in items: item_exam = str(item) output_list.append(create_census_value(table_name, item_exam)) new_data = censusdata.download('acs1', year, censusdata.censusgeo([('county', '*')]), output_list) return new_data
def test_download_acsse(self): nocomputer = {2014: 731135, 2015: 658047, 2016: 522736, 2017: 464053} for year in range(2014, 2017 + 1): assert_frame_equal( censusdata.download('acsse', year, censusdata.censusgeo([('state', '17')]), ['K202801_006E']), pd.DataFrame( {'K202801_006E': nocomputer[year]}, [censusdata.censusgeo([('state', '17')], 'Illinois')]))
def test_download_acs3_detail(self): medhhinc = {2012: 55231, 2013: 55799} for year in medhhinc: assert_frame_equal( censusdata.download('acs3', year, censusdata.censusgeo([('state', '17')]), ['B19013_001E']), pd.DataFrame( {'B19013_001E': medhhinc[year]}, [censusdata.censusgeo([('state', '17')], 'Illinois')]))
def test_download_acs1_201214(self): medhhinc = {2012: 55137, 2013: 56210, 2014: 57444} for year in range(2012, 2014 + 1): assert_frame_equal( censusdata.download('acs1', year, censusdata.censusgeo([('state', '17')]), ['B19013_001E']), pd.DataFrame( {'B19013_001E': medhhinc[year]}, [censusdata.censusgeo([('state', '17')], 'Illinois')]))
def get_acs_5_data(year, state, data_aliases): ''' Get American Community Survey 5-year data at block group level Inputs: year (integer): year from which to pull data state (string): encoding of state for which to pull data data_aliases (dictionary; keys and values both strings): mapping of encoded data columns to pull from ACS with their descriptive names. Note that these descriptive names will be the headers of the output DataFrame. See below links for 2018 column encodings: https://api.census.gov/data/2018/acs/acs5/variables.html https://api.census.gov/data/2018/acs/acs1/variables.html https://api.census.gov/data/2018/acs/acsse/variables.html (For more information on Census geographies, please visit this link: https://www.census.gov/data/developers/geography.html) Output: A pandas dataframe with ACS data ''' # Initialize dataframe if data_aliases: results_df = pd.DataFrame(columns=data_aliases.values()) else: results_df = pd.DataFrame(columns=data_columns) # print("Data columns are...", data_aliases.keys()) results_df['year'] = "" # Get Census data and load into dataframe geographies = censusdata.geographies( censusdata.censusgeo([('state', state), ('county', '*')]), 'acs5', year) for v in list(geographies.values()): ((_, _), (_, county_code)) = v.params() df = censusdata.download( "acs5", year, censusdata.censusgeo([("state", state), ("county", county_code), ("tract", "*")]), list(data_aliases.keys()), key="e62f1cebce1c8d3afece25fc491fbec7271a588b").reset_index() df = df.rename(columns=data_aliases) df['year'] = year results_df = results_df.append(df, ignore_index=True) results_df = results_df.infer_objects() return results_df
def block_pull(state_id, variable_list): c_list = county_list(state_id) for i in range(0, len(c_list)): geo = censusdata.censusgeo([('state', state_id), ('county', c_list[i]), ('tract', '*'), ('block group', '*')]) county_df = censusdata.download('acs5', 2018, geo, variable_list) if i == 0: data = county_df else: data = pd.concat([data, county_df]) return data
def DownloadTable(year, fields, counties="'All counties'", geo="County"): """Returns a pandas dataframe containing population estimates from a list of fields, for a certain year and geography Parameters: year (int): input year fields (list): list of field IDs for ACS data counties (list or str): either a list containing either a list of county FIPS numbers or 'All fields' geo (str): Geography: County, Tract, or Block group""" def GetGeoArgs(geo): if geo == "County": geo_arg = [] elif geo == "Tract": geo_arg = [("tract", "*")] elif geo == "Block group": geo_arg = [("block group", "*")] return geo_arg if counties == "'All counties'": acs_df = cd.download( "acs5", year, cd.censusgeo([("state", "47"), ("county", "*")] + GetGeoArgs(geo)), ["GEO_ID"] + fields) else: acs_df = pd.DataFrame(columns=["GEO_ID"] + fields) for county in counties: county = str(county).zfill(3) county_df = cd.download( "acs5", year, cd.censusgeo([("state", "47"), ("county", county)] + GetGeoArgs(geo)), ["GEO_ID"] + fields) acs_df = acs_df.append(county_df) return acs_df
def test_download_acs3_profile(self): insured = {2012: 78.3, 2013: 78.5} for year in insured: assert_frame_equal( censusdata.download('acs3', year, censusdata.censusgeo([('state', '17')]), ['DP03_0115PE'], tabletype='profile'), pd.DataFrame( {'DP03_0115PE': insured[year]}, [censusdata.censusgeo([('state', '17')], 'Illinois')]))
def getAllCounties(): states = censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2018) all_states = pd.DataFrame() # For every state, get all counties for state in states: print("getting: ", state) state_fips = states[state].geo[0][1] counties = censusdata.geographies( censusdata.censusgeo([('state', state_fips), ('county', '*')]), 'acs5', 2018) subject_data = censusdata.download('acs5', 2018, censusdata.censusgeo([ ('state', state_fips), ('county', '*') ]), list(subject_tables.keys()), tabletype='subject').reset_index() detail_data = censusdata.download( 'acs5', 2018, censusdata.censusgeo([('state', state_fips), ('county', '*')]), list(detail_tables.keys())).reset_index() # Get correct fips for index subject_data['index'] = subject_data.apply( lambda row: getFips(row['index']), axis=1) detail_data['index'] = detail_data.apply( lambda row: getFips(row['index']), axis=1) # Join the tables and add to master table data = detail_data.merge(subject_data) all_states = pd.concat([all_states, data]) # Set column names to human readable names all_states.set_axis(columns, axis=1, inplace=True) return all_states
def get_state_data(state_fips): geo = censusdata.censusgeo([ ("state", str(state_fips).zfill(2)), ("county", "*"), ("tract", "*"), ("block", "*"), ]) return censusdata.download( "sf1", 2010, geo, ["STATE", "COUNTY", "TRACT", "BLOCK", "P001001"], )
def get_census_data(): ''' pull down ACS data for Cook County IL on race, income, unemployment, single-parent families, edu attainment tables B02001 (race), B19013 (median household income), B11001 (household type), B15003 (edu) ''' # construct lists of variables for the API query hhold_vars = list(map(lambda x: 'B11001_00' + str(x) + 'E', range(1, 10))) race_vars = list(map(lambda x: 'B03002_' + str(x).zfill(3) + 'E', range(1, 21))) edu_vars = list(map(lambda x: 'B15003_' + str(x).zfill(3) + 'E', range(1, 26))) # make the query for all block groups in Cook County, in two chunks because the API only takes data = census.download('acs5', 2017, census.censusgeo([('state', '17'), ('county', '031'), ('block group', '*')]), hhold_vars + race_vars).reset_index() edu = census.download('acs5', 2017, census.censusgeo([('state', '17'), ('county', '031'), ('block group', '*')]), edu_vars + ['B19013_001E']).reset_index() edu['pct_nohs'] = edu.iloc[:,HSBEGIN:HSEND].sum(axis=1) / edu['B15003_001E'] edu['pct_BA'] = edu.iloc[:,BABEGIN:EDUEND].sum(axis=1) / edu['B15003_001E'] data = pd.merge(data, edu, on='index') # print(data.head()) # create unique FIPS ID data['bg'] = data['index'].apply(lambda x: '17031' + x.geo[TRACTCODE][1] + x.geo[BLKGRPCODE][1]) # compute variables of interest data['pct_1parent'] = data['B11001_004E'] / data['B11001_001E'] data['pct_alone'] = data['B11001_008E'] / data['B11001_001E'] data['pct_white'] = data['B03002_003E'] / data['B03002_001E'] data['pct_black'] = data['B03002_004E'] / data['B03002_001E'] data['pct_hisp'] = data['B03002_012E'] / data['B03002_001E'] data['medinc'] = data['B19013_001E'] rv = data[['bg', 'pct_1parent', 'pct_alone', 'pct_white', 'pct_black', 'pct_hisp', 'B15003_001E', 'pct_nohs', 'pct_BA', 'medinc']] print(rv.head()) return rv
def downloadBlockgroupData(state, year, key, tableIDs): """Download ACS 5-year data at the block group level.""" newTable = pd.DataFrame() allCounties = getAllCounties(state, year, key) for county in allCounties: table = censusdata.download('acs5', year, censusdata.censusgeo( list(allCounties[county].geo) + [('block group', '*')]), tableIDs, key=key) newTable = newTable.append(table) return newTable
def _get_acs_dfs(self, tables, **kwargs): """Get American Community Survey data""" hierarchy = self._parse_hierarchy(kwargs) df = censusdata.download( self.survey, self.year, censusdata.censusgeo(hierarchy), tables, ) df = self._parse_geo_index(df) return df