def test_printtable(self): testtable = censusdata.censustable('acs5', 2015, 'B19013') printedtable = io.StringIO() sys.stdout = printedtable censusdata.printtable(testtable) sys.stdout = sys.__stdout__ self.assertEqual( printedtable.getvalue(), textwrap.dedent('''\ Variable | Table | Label | Type ------------------------------------------------------------------------------------------------------------------- B19013_001E | MEDIAN HOUSEHOLD INCOME IN THE | !! Estimate Median household income in the past 12 month | int ------------------------------------------------------------------------------------------------------------------- ''')) printedtable.close() printedtable = io.StringIO() sys.stdout = printedtable censusdata.printtable(testtable, moe=True) sys.stdout = sys.__stdout__ self.assertEqual( printedtable.getvalue(), textwrap.dedent('''\ Variable | Table | Label | Type ------------------------------------------------------------------------------------------------------------------- B19013_001E | MEDIAN HOUSEHOLD INCOME IN THE | !! Estimate Median household income in the past 12 month | int ------------------------------------------------------------------------------------------------------------------- ''')) printedtable.close()
def main(): pd.set_option('display.expand_frame_repr', False) pd.set_option('display.precision', 2) #to download we must identify the tables containing the variables interest to us. #use ACS documentation, in particular Table Shells (https://www.census.gov/programs-surveys/acs/technical-documentation/summary-file-documentation.html) #can use cenusdata.search to find given text patterns. We can limit the output to the relevenant variables censusdata.search('acs5', 2015, 'label', 'unemploy')[160:170] censusdata.search('acs5', 2015, 'concept', 'education')[730:790] #using censusdata.printtable to show vars in table censusdata.printtable(censusdata.censustable('acs5', 2015, 'B23025')) censusdata.printtable(censusdata.censustable('acs5', 2015, 'B15003')) #after getting relevant variables, we need to identify the geographies. #we are going to get block groups in Cook County IL #1. look for FIPS code #2. find identifiers for all counties within IL to find Cook #1 #print(censusdata.geographies(censusdata.censusgeo([('state','*')]), 'acs5', 2015)) #IL is 17 #2 #print(censusdata.geographies(censusdata.censusgeo([('state','17'), ('county', '*')]), 'acs5', 2015)) #cook is 031 #once we have identified variables and geos of interest, #we can download the data using censusdata.download. compute variables for the percent unemployed and the percent w no hs degree cook_cnty = censusdata.download('acs5', 2015, censusdata.censusgeo([('state','17'), ('county','031'), ('block group','*')]), ['B23025_003E', 'B23025_005E', 'B15003_001E', 'B15003_002E', 'B15003_003E','B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', 'B15003_008E','B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E','B15003_014E', 'B15003_015E', 'B15003_016E']) cook_cnty['percent_unemployed'] = cook_cnty.B23025_005E / cook_cnty.B23025_003E * 100 cook_cnty['percent_nohs'] = (cook_cnty.B15003_002E + cook_cnty.B15003_003E + cook_cnty.B15003_004E + cook_cnty.B15003_005E + cook_cnty.B15003_006E + cook_cnty.B15003_007E + cook_cnty.B15003_008E + cook_cnty.B15003_009E + cook_cnty.B15003_010E + cook_cnty.B15003_011E + cook_cnty.B15003_012E + cook_cnty.B15003_013E + cook_cnty.B15003_014E + cook_cnty.B15003_015E + cook_cnty.B15003_016E) / cook_cnty.B15003_001E * 100 cook_cnty = cook_cnty[['percent_unemployed', 'percent_nohs']] print(cook_cnty.describe()) #to show the 30 block groups in cook w highest rate of unemployment and the percent w no hs degree print(cook_cnty.sort_values('percent_unemployed', ascending=False).head(30)) #show correlation print(cook_cnty.corr()) censusdata.exportcsv('cook_data.csv', cook_cnty)
def genTable(table): return censusdata.printtable(censusdata.censustable('acs1', 2018, table))
def main(verbose=False, data_dir='../data/'): if verbose: pd.set_option('display.expand_frame_repr', False) pd.set_option('display.precision', 2) print("Available race variables:") print(censusdata.search('acs5', 2015, 'label', 'race')) print("Table to download:") censusdata.printtable(censusdata.censustable('acs5', 2015, 'B02001')) variables = list(censusdata.censustable('acs5', 2015, 'B02001').keys()) # remove variables for margin of errors variables = list(filter(lambda x: x[-1] != 'M', variables)) if verbose: print("Variables:") print(variables) illinois_demo = censusdata.download( 'acs5', 2015, censusdata.censusgeo([('state', '17'), ('tract', '*')]), variables) illinois_demo.rename( { 'B02001_001E': 'total', 'B02001_002E': 'white', 'B02001_003E': 'black', 'B02001_004E': 'native', 'B02001_005E': 'asian', 'B02001_006E': 'pacific', 'B02001_007E': 'other', 'B02001_008E': 'two_or_more', 'B02001_009E': 'two_or_more_including_other', 'B02001_010E': 'two_or_more_excluding_other' }, axis='columns', inplace=True) illinois_demo.other = illinois_demo.other + \ illinois_demo['two_or_more_including_other'] + \ illinois_demo['two_or_more_excluding_other'] illinois_demo = illinois_demo[[ 'total', 'white', 'black', 'native', 'asian', 'pacific', 'other' ]] total = illinois_demo.total illinois_demo.white /= total illinois_demo.black /= total illinois_demo.native /= total illinois_demo.asian /= total illinois_demo.pacific /= total illinois_demo.other /= total illinois_demo['censusgeo'] = illinois_demo.index illinois_demo.reset_index(level=0, drop=True, inplace=True) illinois_demo['tract'] = illinois_demo['censusgeo'].apply( lambda x: x.geo[2][1]).astype(str) illinois_demo['county'] = illinois_demo['censusgeo'].apply( lambda x: x.geo[1][1]) illinois_demo['county_name'] = illinois_demo['censusgeo'].apply( lambda x: x.name.split(',')[1][1:-7]) illinois_demo.drop('censusgeo', axis='columns', inplace=True) if verbose: print(illinois_demo.sample(frac=10 / len(illinois_demo))) print(illinois_demo.describe()) illinois_demo = illinois_demo.loc[illinois_demo.county_name == 'Cook'] illinois_demo.to_csv(data_dir + 'Illinois2015CensusTractsDemographics.csv') print("Successfully downloaded Illinois demographic data.") url = "https://github.com/uscensusbureau/citysdk/raw/master/v2/GeoJSON/500k/2015/17/tract.json" fname = 'Illinois2015CensusTracts.json' target = data_dir + fname download_file(url, target) print("Successfully downloaded Illinois census tract shapefile.")
input_drive = 'data/raw' table_shell = os.path.join(input_drive, 'ACS2017_Table_Shells.xlsx') xl = pd.ExcelFile(table_shell) table_shell_df = xl.parse(xl.sheet_names[0]) # variables I've flagged to use use_vars = table_shell_df[table_shell_df.Use == 1] print(use_vars[['TableID', 'Stub', 'Use']]) use_vars.to_csv(os.path.join(input_drive, 'ACS_variables.csv')) variables = use_vars.TableID.tolist() # Use the census data package # Examples of functionality censusdata.search('acs5', 2017, 'label', 'unemploy') # censusdata.search('acs5', 2017, 'concept', 'education') censusdata.printtable(censusdata.censustable('acs5', 2017, 'B23025')) censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2017) censusdata.geographies( censusdata.censusgeo([('state', '08'), ('county', '*')]), 'acs5', 2017) # doesn't seem like the C variables work, so remove them variables = [var for var in variables if 'C' not in var] variables = [var for var in variables if "B17002" not in var] # loop through all variables and merge data together count = 0 for variable in variables: print(variable) data = censusdata_pull(variable) if count == 0:
# %% import censusdata import pandas as pd import sqlite3 import ssl import re import os pd.set_option('display.expand_frame_repr', False) pd.set_option('display.precision', 2) # %% # Aggregate Population by Age censusdata.printtable(censusdata.censustable('acs5', 2018, 'B01001')) # %% # Geographies by state>place censusdata.geographies(censusdata.censusgeo([('state', '12'), ('place', '*')]), 'acs5', 2018) # %% # By County censusdata.geographies( censusdata.censusgeo([('state', '12'), ('county', '*')]), 'acs5', 2018) # %% # By County Subdivision censusdata.geographies( censusdata.censusgeo([('state', '12'), ('county', '057'), ('county subdivision', '*')]), 'acs5', 2018) # %%
import censusdata pd.set_option('display.expand_frame_repr', False) pd.set_option('display.precision', 2) # sample = censusdata.search('acs5', 2015, 'concept', 'transportation')#[160:170] # # print(len(sample)) # # for item in sample: # print(item) censusdata.printtable(censusdata.censustable('acs5', 2015, 'B08301')) # # states = censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2015) # # # counties = censusdata.geographies(censusdata.censusgeo([('state', '36'), ('county', '*')]), 'acs5', 2015) # # #print(counties) # # # data = censusdata.download('acs5', 2015, # # censusdata.censusgeo([('state', '36'), ('county', '081'), ('block group', '*')]), # # ['B23025_001E', 'B23025_002E', 'B23025_003E', 'B23025_004E', 'B23025_005E', # # 'B23025_006E', 'B23025_007E']) # data = censusdata.download('acs5', 2015, censusdata.censusgeo([('state', '36'), ('county', '*')]),