예제 #1
0
    def test_printtable(self):
        testtable = censusdata.censustable('acs5', 2015, 'B19013')
        printedtable = io.StringIO()
        sys.stdout = printedtable
        censusdata.printtable(testtable)
        sys.stdout = sys.__stdout__
        self.assertEqual(
            printedtable.getvalue(),
            textwrap.dedent('''\
			Variable     | Table                          | Label                                                    | Type 
			-------------------------------------------------------------------------------------------------------------------
			B19013_001E  | MEDIAN HOUSEHOLD INCOME IN THE | !! Estimate Median household income in the past 12 month | int  
			-------------------------------------------------------------------------------------------------------------------
			'''))
        printedtable.close()
        printedtable = io.StringIO()
        sys.stdout = printedtable
        censusdata.printtable(testtable, moe=True)
        sys.stdout = sys.__stdout__
        self.assertEqual(
            printedtable.getvalue(),
            textwrap.dedent('''\
			Variable     | Table                          | Label                                                    | Type 
			-------------------------------------------------------------------------------------------------------------------
			B19013_001E  | MEDIAN HOUSEHOLD INCOME IN THE | !! Estimate Median household income in the past 12 month | int  
			-------------------------------------------------------------------------------------------------------------------
			'''))
        printedtable.close()
def main():
    

    pd.set_option('display.expand_frame_repr', False)
    pd.set_option('display.precision', 2)



    #to download we must identify the tables containing the variables interest to us.
    #use ACS documentation, in particular Table Shells (https://www.census.gov/programs-surveys/acs/technical-documentation/summary-file-documentation.html)
    #can use cenusdata.search to find given text patterns. We can limit the output to the relevenant variables

    censusdata.search('acs5', 2015, 'label', 'unemploy')[160:170]
    censusdata.search('acs5', 2015, 'concept', 'education')[730:790]



    #using censusdata.printtable to show vars in table

    censusdata.printtable(censusdata.censustable('acs5', 2015, 'B23025'))
    censusdata.printtable(censusdata.censustable('acs5', 2015, 'B15003'))



    #after getting relevant variables, we need to identify the geographies.
    #we are going to get block groups in Cook County IL
    #1. look for FIPS code
    #2. find identifiers for all counties within IL to find Cook

    #1
    #print(censusdata.geographies(censusdata.censusgeo([('state','*')]), 'acs5', 2015)) #IL is 17

    #2
    #print(censusdata.geographies(censusdata.censusgeo([('state','17'), ('county', '*')]), 'acs5', 2015)) #cook is 031




    #once we have identified variables and geos of interest,
    #we can download the data using censusdata.download. compute variables for the percent unemployed and the percent w no hs degree

    cook_cnty = censusdata.download('acs5', 2015, censusdata.censusgeo([('state','17'), ('county','031'), ('block group','*')]), ['B23025_003E', 'B23025_005E', 'B15003_001E', 'B15003_002E', 'B15003_003E','B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', 'B15003_008E','B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E','B15003_014E', 'B15003_015E', 'B15003_016E'])
    cook_cnty['percent_unemployed'] = cook_cnty.B23025_005E / cook_cnty.B23025_003E * 100

    cook_cnty['percent_nohs'] = (cook_cnty.B15003_002E + cook_cnty.B15003_003E + cook_cnty.B15003_004E + cook_cnty.B15003_005E + cook_cnty.B15003_006E + cook_cnty.B15003_007E + cook_cnty.B15003_008E + cook_cnty.B15003_009E + cook_cnty.B15003_010E + cook_cnty.B15003_011E + cook_cnty.B15003_012E + cook_cnty.B15003_013E + cook_cnty.B15003_014E + cook_cnty.B15003_015E + cook_cnty.B15003_016E) / cook_cnty.B15003_001E * 100



    cook_cnty = cook_cnty[['percent_unemployed', 'percent_nohs']]
    print(cook_cnty.describe())


    #to show the 30 block groups in cook w highest rate of unemployment and the percent w no hs degree
    print(cook_cnty.sort_values('percent_unemployed', ascending=False).head(30))

    #show correlation
    print(cook_cnty.corr())

    censusdata.exportcsv('cook_data.csv', cook_cnty)
예제 #3
0
def genTable(table):
    return censusdata.printtable(censusdata.censustable('acs1', 2018, table))
def main(verbose=False, data_dir='../data/'):
    if verbose:
        pd.set_option('display.expand_frame_repr', False)
        pd.set_option('display.precision', 2)

        print("Available race variables:")
        print(censusdata.search('acs5', 2015, 'label', 'race'))
        print("Table to download:")
        censusdata.printtable(censusdata.censustable('acs5', 2015, 'B02001'))

    variables = list(censusdata.censustable('acs5', 2015, 'B02001').keys())
    # remove variables for margin of errors
    variables = list(filter(lambda x: x[-1] != 'M', variables))
    if verbose:
        print("Variables:")
        print(variables)

    illinois_demo = censusdata.download(
        'acs5', 2015, censusdata.censusgeo([('state', '17'), ('tract', '*')]),
        variables)

    illinois_demo.rename(
        {
            'B02001_001E': 'total',
            'B02001_002E': 'white',
            'B02001_003E': 'black',
            'B02001_004E': 'native',
            'B02001_005E': 'asian',
            'B02001_006E': 'pacific',
            'B02001_007E': 'other',
            'B02001_008E': 'two_or_more',
            'B02001_009E': 'two_or_more_including_other',
            'B02001_010E': 'two_or_more_excluding_other'
        },
        axis='columns',
        inplace=True)

    illinois_demo.other = illinois_demo.other + \
        illinois_demo['two_or_more_including_other'] + \
        illinois_demo['two_or_more_excluding_other']

    illinois_demo = illinois_demo[[
        'total', 'white', 'black', 'native', 'asian', 'pacific', 'other'
    ]]
    total = illinois_demo.total
    illinois_demo.white /= total
    illinois_demo.black /= total
    illinois_demo.native /= total
    illinois_demo.asian /= total
    illinois_demo.pacific /= total
    illinois_demo.other /= total

    illinois_demo['censusgeo'] = illinois_demo.index
    illinois_demo.reset_index(level=0, drop=True, inplace=True)

    illinois_demo['tract'] = illinois_demo['censusgeo'].apply(
        lambda x: x.geo[2][1]).astype(str)
    illinois_demo['county'] = illinois_demo['censusgeo'].apply(
        lambda x: x.geo[1][1])
    illinois_demo['county_name'] = illinois_demo['censusgeo'].apply(
        lambda x: x.name.split(',')[1][1:-7])
    illinois_demo.drop('censusgeo', axis='columns', inplace=True)

    if verbose:
        print(illinois_demo.sample(frac=10 / len(illinois_demo)))
        print(illinois_demo.describe())

    illinois_demo = illinois_demo.loc[illinois_demo.county_name == 'Cook']
    illinois_demo.to_csv(data_dir + 'Illinois2015CensusTractsDemographics.csv')
    print("Successfully downloaded Illinois demographic data.")

    url = "https://github.com/uscensusbureau/citysdk/raw/master/v2/GeoJSON/500k/2015/17/tract.json"
    fname = 'Illinois2015CensusTracts.json'
    target = data_dir + fname
    download_file(url, target)
    print("Successfully downloaded Illinois census tract shapefile.")
예제 #5
0
input_drive = 'data/raw'

table_shell = os.path.join(input_drive, 'ACS2017_Table_Shells.xlsx')
xl = pd.ExcelFile(table_shell)
table_shell_df = xl.parse(xl.sheet_names[0])
# variables I've flagged to use
use_vars = table_shell_df[table_shell_df.Use == 1]
print(use_vars[['TableID', 'Stub', 'Use']])
use_vars.to_csv(os.path.join(input_drive, 'ACS_variables.csv'))
variables = use_vars.TableID.tolist()

# Use the census data package
# Examples of functionality
censusdata.search('acs5', 2017, 'label', 'unemploy')
# censusdata.search('acs5', 2017, 'concept', 'education')
censusdata.printtable(censusdata.censustable('acs5', 2017, 'B23025'))
censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2017)
censusdata.geographies(
    censusdata.censusgeo([('state', '08'), ('county', '*')]), 'acs5', 2017)

# doesn't seem like the C variables work, so remove them
variables = [var for var in variables if 'C' not in var]
variables = [var for var in variables if "B17002" not in var]

# loop through all variables and merge data together
count = 0
for variable in variables:
    print(variable)
    data = censusdata_pull(variable)

    if count == 0:
예제 #6
0
# %%
import censusdata
import pandas as pd
import sqlite3
import ssl
import re
import os
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)

# %%
# Aggregate Population by Age
censusdata.printtable(censusdata.censustable('acs5', 2018, 'B01001'))

# %%
# Geographies by state>place
censusdata.geographies(censusdata.censusgeo([('state', '12'), ('place', '*')]),
                       'acs5', 2018)

# %%
# By County
censusdata.geographies(
    censusdata.censusgeo([('state', '12'), ('county', '*')]), 'acs5', 2018)

# %%
# By County Subdivision
censusdata.geographies(
    censusdata.censusgeo([('state', '12'), ('county', '057'),
                          ('county subdivision', '*')]), 'acs5', 2018)

# %%
예제 #7
0
import censusdata

pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.precision', 2)

# sample = censusdata.search('acs5', 2015, 'concept', 'transportation')#[160:170]
#
# print(len(sample))
#
# for item in sample:
#     print(item)




censusdata.printtable(censusdata.censustable('acs5', 2015, 'B08301'))
#
# states = censusdata.geographies(censusdata.censusgeo([('state', '*')]), 'acs5', 2015)
#
#
# counties = censusdata.geographies(censusdata.censusgeo([('state', '36'), ('county', '*')]), 'acs5', 2015)
#
# #print(counties)
#
# # data = censusdata.download('acs5', 2015,
# #                              censusdata.censusgeo([('state', '36'), ('county', '081'), ('block group', '*')]),
# #                              ['B23025_001E', 'B23025_002E', 'B23025_003E', 'B23025_004E', 'B23025_005E',
# #                               'B23025_006E', 'B23025_007E'])
#
data = censusdata.download('acs5', 2015,
                             censusdata.censusgeo([('state', '36'), ('county', '*')]),