def get_acs_data(years, tuple_val): ''' Gets ACS data Input: years(list): list of years we want to get tuple_val(tuple): tuple with all the feature names we want to obtain Output: returns dataframe with acs data ''' c = Census("782594777378b4dae32651de41285b8430095ed4") df_acs = None for i,yr in enumerate(years): data = c.acs5.zipcode(tuple_val, Census.ALL, year = yr) df_data = pd.DataFrame(data) df_data['year'] = int(yr) if df_acs is None: df_acs = df_data else: lst = [df_acs, df_data] df_acs = pd.concat(lst) return df_acs
def retrieve_us_data(cities_geo=None, API_key="9320d66d590a2cc9bb46c24b1c4144d1bc7eccfb", year=None): '''Retrieve variables of interest data from the US census API. Args: cities_geo (list of lists) : state and city FIPS code for each city of interest, for example [[17, 14000], [36, 36061]] for Chicago and NYC, (use [[]] for single city) API_key (string) : API_key for US census (default is key of Leonardo Nicoletti) year (int) : year of interest (default is latest year in acs5 dataset) Returns: list : list of census GeoDataFrames ''' name = ['NAME'] pop_metrics = ['B00001_001E', 'B01002_001E'] us_citizen = [ 'B05001_002E', 'B05001_003E', 'B05001_004E', 'B05001_005E', 'B05001PR_002E', 'B05001PR_003E', 'B05001PR_004E', 'B05001PR_005E' ] immigrant = ['B05001_006E', 'B05001PR_006E'] white = ['B02001_002E'] minority = [ 'B02001_003E', 'B02001_004E', 'B02001_005E', 'B02001_006E', 'B02001_007E', 'B02001_008E', 'B02001_009E' ] tot_households = ['B11001_001E'] average_household_size = ['B25010_001E'] married_households = ['B11001_003E'] single_parent = ['B11001_005E', 'B11001_006E'] nonfamily_households = ['B11001_007E'] median_household_income = ['B19013_001E'] aggregate_household_income = ['B19025_001E'] per_capita_income = ['B19301_001E'] only_english = ['B16001_002E', 'B06007_002E'] other_languages_bad_english = ['B06007_008E'] other_languages_good_english = ['B06007_007E'] tot_pop_in_housing = ['B25008_001E'] owner = ['B25008_002E'] renter = ['B25008_003E'] median_monthly_housing_costs = ['B25105_001E'] median_house_value = ['B25107_001E'] aggregate_house_value = ['B25108_001E'] total_gross_rent = ['B25063_001E'] median_gross_rent = ['B25064_001E'] aggregate_gross_rent = ['B25065_001E'] tot_gross_rent_as_percent_of_income = ['B25070_001E'] less_than_30_of_income = [ 'B25070_002E', 'B25070_003E', 'B25070_004E', 'B25070_005E', 'B25070_006E' ] more_than_30_of_income = [ 'B25070_007E', 'B25070_008E', 'B25070_009E', 'B25070_010E' ] median_gross_rent_as_percent_of_income = ['B25071_001E'] median_n_rooms = ['B25018_001E'] aggregate_n_rooms = ['B25019_001E'] tot_edu_attainment = ['B15003_001E'] less_than_high_school = [ 'B15003_002E', 'B15003_003E', 'B15003_004E', 'B15003_005E', 'B15003_006E', 'B15003_007E', 'B15003_008E', 'B15003_009E', 'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E', 'B15003_014E', 'B15003_015E', 'B15003_016E' ] high_school = ['B15003_017E'] associates_degree = ['B15003_021E'] bachelors_degree = ['B15003_022E'] masters_degree = ['B15003_023E'] professional_school_degree = ['B15003_024E'] doctorate_degree = ['B15003_025E'] tot_employment_status = ['B23025_001E'] employed = ['B23025_004E'] unemployed = ['B23025_005E'] categories_of_choice = [ name, pop_metrics, us_citizen, immigrant, white, minority, tot_households, average_household_size, married_households, single_parent, nonfamily_households, median_household_income, aggregate_household_income, per_capita_income, only_english, other_languages_bad_english, other_languages_good_english, tot_pop_in_housing, owner, renter, median_monthly_housing_costs, median_house_value, aggregate_house_value, total_gross_rent, median_gross_rent, aggregate_gross_rent, tot_gross_rent_as_percent_of_income, less_than_30_of_income, more_than_30_of_income, median_gross_rent_as_percent_of_income, median_n_rooms, aggregate_n_rooms, tot_edu_attainment, less_than_high_school, high_school, associates_degree, bachelors_degree, masters_degree, professional_school_degree, doctorate_degree, tot_employment_status, employed, unemployed ] categories_of_choice = list( itertools.chain.from_iterable(categories_of_choice)) # API key for US census and year of interest c = Census(API_key, year=year) cities_census = [] for city in cities_geo: city_census = c.acs5.state_place_blockgroup( tuple(categories_of_choice), city[0], city[1], return_geometry=True) city_gdf = gpd.GeoDataFrame.from_features(city_census['features']) cities_census.append(city_gdf) print("City collected successfully!") return [city_census for city_census in cities_census]
import csv import json import sys from census_area import Census from config import CENSUS_API_KEY c = Census(CENSUS_API_KEY) writer = csv.writer(sys.stdout) with open('raw/cpd_district_boundaries.geojson', 'r') as f: police_districts = json.load(f) # ACS5 SEX BY AGE (B01001) SUBTABLE DEFINITIONS # # RACE/ETHNICITY # B - Black only # H - White, not Hispanic # I - Hispanic # # SEX/AGE # 005 - Boys, ages 10-14 # 006 - Boys, ages 15-17 # 020 - Girls, ages 10-14 # 021 - Girls, ages 15-17 table_map = { 'black_male': ['B01001B_005E', 'B01001B_006E'], 'black_female': ['B01001B_020E', 'B01001B_021E'],
import json import sys import scrapelib import tqdm from scrapelib.cache import FileCache from census_area import Census s = scrapelib.Scraper(raise_errors=False, requests_per_minute=0) API_KEY = 'ac94ba69718a7e1da4f89c6d218b8f6b5ae9ac49' geographies = json.load(sys.stdin) c = Census(API_KEY, session=s, year=int(sys.argv[1])) VARS = {'B03002_001E': 0, # total population, 'B03002_003E': 0, # Not Hispanic or Latino white 'B03002_004E': 0, # Not Hispanic or Latino black 'B03002_006E': 0, # Not Hispanic or Latino asian 'B03002_012E': 0, # Hispanic or Latino 'B25120_001E': 0, # Aggregate household income in the past 12 months 'B19001_001E': 0, # Households, 'B05003_009E': 0, # Native Born, Male, Over 18 'B05003_011E': 0, # Naturalized U.S. Citizen, Male, Over 18 'B05003_020E': 0, # Native Born, female, Over 18 'B05003_022E': 0, # Naturalized U.S. Citizen, Female, Over 18 } READABLE_VARS = {'geography number': None, 'B03002_001E': 'Total Population', 'B03002_003E': 'Not Hispanic or Latino Origin, Whites',
import sys import os from census_area import Census from secrets import CENSUS_API_KEY # Default to total population if no environment variable set. # See https://api.census.gov/data/2018/acs/acs5/variables.html for variable definitions. tables = os.environ.get('ACS_TABLES', 'B01003_001E').split(',') writer = csv.writer(sys.stdout) writer.writerow(['community_area', 'ward', *tables]) c = Census(CENSUS_API_KEY, year=2017) for feature in json.load(sys.stdin): community_area = feature['properties']['community_area'].title() ward = feature['properties']['ward'] table_values = [] for table in tables: data_by_tract = c.acs5.geo_tract(('NAME', table), feature) # data_by_tract contains a three-tuple of the tract feature, properties # of the tract, and the proportion of the tract that overlaps with the # ward. Multiply the tract figure by the overlap in order to estimate # the portion of the value that belongs to the ward. count = sum(tract_data[table] * percent_overlap for _, tract_data, percent_overlap in data_by_tract)
import scrapelib import tqdm from scrapelib.cache import FileCache from census_area import Census s = scrapelib.Scraper(raise_errors=False, requests_per_minute=0) cache = FileCache('cache') s.cache_storage = cache s.cache_write_only = False API_KEY = 'ac94ba69718a7e1da4f89c6d218b8f6b5ae9ac49' geographies = json.load(sys.stdin) c = Census(API_KEY, session=s) VARS = { 'B03002_001E': 0, # total population, 'B03002_003E': 0, # Not Hispanic or Latino white 'B03002_004E': 0, # Not Hispanic or Latino black 'B03002_006E': 0, # Not Hispanic or Latino asian 'B03002_012E': 0, # Hispanic or Latino 'B25120_001E': 0, # Aggregate household income in the past 12 months 'B19001_001E': 0, # Households } TRACT_LEVEL_VARS = {'B19001_001E', 'B25120_001E'} READABLE_VARS = { 'geography number': None,
import sys import json import csv from census_area import Census from secrets import API_KEY writer = csv.writer(sys.stdout) writer.writerow(('community', 'population')) geojson = json.load(sys.stdin) features = geojson["features"] c = Census(API_KEY) fields = {'total_pop': 'B01003_001E'} for feature in features: community = feature["properties"]["community"] tracts = c.acs5.geo_tract(('NAME', fields['total_pop']), feature["geometry"]) total_pop = sum(int(tract[1]['B01003_001E']) for tract in tracts) # Remove population of Cook County Jail # (source: https://performance.cookcountyil.gov/reports/Sheriff-DOC) if community == "SOUTH LAWNDALE": total_pop -= 9000 writer.writerow((community, total_pop))
# # PART I<br>DATA WRANGLING & EXPLANATORY ANALYSIS # # 1 Demographic Variables of Census Tracts # ## 1.1 Data Collection Through API # In[1]: from census_area import Census # In[2]: # Initialize API key api_key = "56ee867dc43e9c68de842ea51d8b52130c9ea382" c = Census(key=api_key) # In[3]: # Set FIPS codes of PA and Philly PA_code = 42 Philly_code = 60000 # In[8]: # Codes for demographics of interest codes = [ 'NAME', 'B01001_002E', 'B01001_026E', 'B02001_002E', 'B01002_001E', 'B06001_002E', 'B19001_001E', 'B06009_005E' ] # Request ACS data