def get_acs_data(years, tuple_val):
    '''
    Gets ACS data 
    Input:
        years(list): list of years we want to get
        tuple_val(tuple): tuple with all the feature names we want to obtain
    Output:
        returns dataframe with acs data
    '''
    c = Census("782594777378b4dae32651de41285b8430095ed4")
    df_acs = None
    for i,yr in enumerate(years):    
        data = c.acs5.zipcode(tuple_val, Census.ALL, year = yr)
        df_data = pd.DataFrame(data)
        df_data['year'] = int(yr)
        if df_acs is None:
            df_acs = df_data
        else:
            lst = [df_acs, df_data]
            df_acs = pd.concat(lst)
    return df_acs
def retrieve_us_data(cities_geo=None,
                     API_key="9320d66d590a2cc9bb46c24b1c4144d1bc7eccfb",
                     year=None):
    '''Retrieve variables of interest data from the US census API.
    
    Args:
        cities_geo (list of lists) : state and city FIPS code for each city of interest, for example [[17, 14000], [36, 36061]] for Chicago and NYC, 
        (use [[]] for single city)
        API_key (string) : API_key for US census (default is key of Leonardo Nicoletti)
        year (int) : year of interest (default is latest year in acs5 dataset)
    
    Returns:
        list : list of census GeoDataFrames
    
    '''
    name = ['NAME']
    pop_metrics = ['B00001_001E', 'B01002_001E']

    us_citizen = [
        'B05001_002E', 'B05001_003E', 'B05001_004E', 'B05001_005E',
        'B05001PR_002E', 'B05001PR_003E', 'B05001PR_004E', 'B05001PR_005E'
    ]
    immigrant = ['B05001_006E', 'B05001PR_006E']

    white = ['B02001_002E']
    minority = [
        'B02001_003E', 'B02001_004E', 'B02001_005E', 'B02001_006E',
        'B02001_007E', 'B02001_008E', 'B02001_009E'
    ]

    tot_households = ['B11001_001E']
    average_household_size = ['B25010_001E']
    married_households = ['B11001_003E']
    single_parent = ['B11001_005E', 'B11001_006E']
    nonfamily_households = ['B11001_007E']

    median_household_income = ['B19013_001E']
    aggregate_household_income = ['B19025_001E']
    per_capita_income = ['B19301_001E']

    only_english = ['B16001_002E', 'B06007_002E']
    other_languages_bad_english = ['B06007_008E']
    other_languages_good_english = ['B06007_007E']

    tot_pop_in_housing = ['B25008_001E']
    owner = ['B25008_002E']
    renter = ['B25008_003E']
    median_monthly_housing_costs = ['B25105_001E']
    median_house_value = ['B25107_001E']
    aggregate_house_value = ['B25108_001E']
    total_gross_rent = ['B25063_001E']
    median_gross_rent = ['B25064_001E']
    aggregate_gross_rent = ['B25065_001E']
    tot_gross_rent_as_percent_of_income = ['B25070_001E']
    less_than_30_of_income = [
        'B25070_002E', 'B25070_003E', 'B25070_004E', 'B25070_005E',
        'B25070_006E'
    ]
    more_than_30_of_income = [
        'B25070_007E', 'B25070_008E', 'B25070_009E', 'B25070_010E'
    ]
    median_gross_rent_as_percent_of_income = ['B25071_001E']
    median_n_rooms = ['B25018_001E']
    aggregate_n_rooms = ['B25019_001E']

    tot_edu_attainment = ['B15003_001E']
    less_than_high_school = [
        'B15003_002E', 'B15003_003E', 'B15003_004E', 'B15003_005E',
        'B15003_006E', 'B15003_007E', 'B15003_008E', 'B15003_009E',
        'B15003_010E', 'B15003_011E', 'B15003_012E', 'B15003_013E',
        'B15003_014E', 'B15003_015E', 'B15003_016E'
    ]
    high_school = ['B15003_017E']
    associates_degree = ['B15003_021E']
    bachelors_degree = ['B15003_022E']
    masters_degree = ['B15003_023E']
    professional_school_degree = ['B15003_024E']
    doctorate_degree = ['B15003_025E']

    tot_employment_status = ['B23025_001E']
    employed = ['B23025_004E']
    unemployed = ['B23025_005E']

    categories_of_choice = [
        name, pop_metrics, us_citizen, immigrant, white, minority,
        tot_households, average_household_size, married_households,
        single_parent, nonfamily_households, median_household_income,
        aggregate_household_income, per_capita_income, only_english,
        other_languages_bad_english, other_languages_good_english,
        tot_pop_in_housing, owner, renter, median_monthly_housing_costs,
        median_house_value, aggregate_house_value, total_gross_rent,
        median_gross_rent, aggregate_gross_rent,
        tot_gross_rent_as_percent_of_income, less_than_30_of_income,
        more_than_30_of_income, median_gross_rent_as_percent_of_income,
        median_n_rooms, aggregate_n_rooms, tot_edu_attainment,
        less_than_high_school, high_school, associates_degree,
        bachelors_degree, masters_degree, professional_school_degree,
        doctorate_degree, tot_employment_status, employed, unemployed
    ]

    categories_of_choice = list(
        itertools.chain.from_iterable(categories_of_choice))

    # API key for US census and year of interest
    c = Census(API_key, year=year)

    cities_census = []

    for city in cities_geo:
        city_census = c.acs5.state_place_blockgroup(
            tuple(categories_of_choice),
            city[0],
            city[1],
            return_geometry=True)
        city_gdf = gpd.GeoDataFrame.from_features(city_census['features'])
        cities_census.append(city_gdf)
        print("City collected successfully!")

    return [city_census for city_census in cities_census]
Beispiel #3
0
import csv
import json
import sys

from census_area import Census

from config import CENSUS_API_KEY

c = Census(CENSUS_API_KEY)

writer = csv.writer(sys.stdout)

with open('raw/cpd_district_boundaries.geojson', 'r') as f:
    police_districts = json.load(f)

# ACS5 SEX BY AGE (B01001) SUBTABLE DEFINITIONS
#
# RACE/ETHNICITY
# B - Black only
# H - White, not Hispanic
# I - Hispanic
#
# SEX/AGE
# 005 - Boys, ages 10-14
# 006 - Boys, ages 15-17
# 020 - Girls, ages 10-14
# 021 - Girls, ages 15-17

table_map = {
    'black_male': ['B01001B_005E', 'B01001B_006E'],
    'black_female': ['B01001B_020E', 'B01001B_021E'],
Beispiel #4
0
import json
import sys

import scrapelib
import tqdm
from scrapelib.cache import FileCache
from census_area import Census

s = scrapelib.Scraper(raise_errors=False, requests_per_minute=0)

API_KEY = 'ac94ba69718a7e1da4f89c6d218b8f6b5ae9ac49'

geographies = json.load(sys.stdin)
c = Census(API_KEY, session=s, year=int(sys.argv[1]))

VARS = {'B03002_001E': 0, # total population,
        'B03002_003E': 0, # Not Hispanic or Latino white
        'B03002_004E': 0, # Not Hispanic or Latino black
        'B03002_006E': 0, # Not Hispanic or Latino asian
        'B03002_012E': 0, # Hispanic or Latino
        'B25120_001E': 0, # Aggregate household income in the past 12 months
        'B19001_001E': 0, # Households,
        'B05003_009E': 0, # Native Born, Male, Over 18
        'B05003_011E': 0, # Naturalized U.S. Citizen, Male, Over 18
        'B05003_020E': 0, # Native Born, female, Over 18
        'B05003_022E': 0, # Naturalized U.S. Citizen, Female, Over 18       
        }

READABLE_VARS = {'geography number': None,
                 'B03002_001E': 'Total Population',
                 'B03002_003E': 'Not Hispanic or Latino Origin, Whites',
import sys
import os

from census_area import Census

from secrets import CENSUS_API_KEY


# Default to total population if no environment variable set.
# See https://api.census.gov/data/2018/acs/acs5/variables.html for variable definitions.
tables = os.environ.get('ACS_TABLES', 'B01003_001E').split(',')

writer = csv.writer(sys.stdout)
writer.writerow(['community_area', 'ward', *tables])

c = Census(CENSUS_API_KEY, year=2017)

for feature in json.load(sys.stdin):
    community_area = feature['properties']['community_area'].title()
    ward = feature['properties']['ward']

    table_values = []

    for table in tables:
        data_by_tract = c.acs5.geo_tract(('NAME', table), feature)

        # data_by_tract contains a three-tuple of the tract feature, properties
        # of the tract, and the proportion of the tract that overlaps with the
        # ward. Multiply the tract figure by the overlap in order to estimate
        # the portion of the value that belongs to the ward.
        count = sum(tract_data[table] * percent_overlap for _, tract_data, percent_overlap in data_by_tract)
Beispiel #6
0
import scrapelib
import tqdm
from scrapelib.cache import FileCache
from census_area import Census

s = scrapelib.Scraper(raise_errors=False, requests_per_minute=0)
cache = FileCache('cache')

s.cache_storage = cache
s.cache_write_only = False

API_KEY = 'ac94ba69718a7e1da4f89c6d218b8f6b5ae9ac49'

geographies = json.load(sys.stdin)
c = Census(API_KEY, session=s)

VARS = {
    'B03002_001E': 0,  # total population,
    'B03002_003E': 0,  # Not Hispanic or Latino white
    'B03002_004E': 0,  # Not Hispanic or Latino black
    'B03002_006E': 0,  # Not Hispanic or Latino asian
    'B03002_012E': 0,  # Hispanic or Latino
    'B25120_001E': 0,  # Aggregate household income in the past 12 months
    'B19001_001E': 0,  # Households
}

TRACT_LEVEL_VARS = {'B19001_001E', 'B25120_001E'}

READABLE_VARS = {
    'geography number': None,
import sys
import json
import csv

from census_area import Census

from secrets import API_KEY

writer = csv.writer(sys.stdout)
writer.writerow(('community', 'population'))

geojson = json.load(sys.stdin)
features = geojson["features"]

c = Census(API_KEY)

fields = {'total_pop': 'B01003_001E'}

for feature in features:
    community = feature["properties"]["community"]
    tracts = c.acs5.geo_tract(('NAME', fields['total_pop']),
                              feature["geometry"])
    total_pop = sum(int(tract[1]['B01003_001E']) for tract in tracts)
    # Remove population of Cook County Jail
    # (source: https://performance.cookcountyil.gov/reports/Sheriff-DOC)
    if community == "SOUTH LAWNDALE":
        total_pop -= 9000
    writer.writerow((community, total_pop))
# # PART I<br>DATA WRANGLING & EXPLANATORY ANALYSIS

# # 1 Demographic Variables of Census Tracts

# ## 1.1 Data Collection Through API

# In[1]:

from census_area import Census

# In[2]:

# Initialize API key
api_key = "56ee867dc43e9c68de842ea51d8b52130c9ea382"
c = Census(key=api_key)

# In[3]:

# Set FIPS codes of PA and Philly
PA_code = 42
Philly_code = 60000

# In[8]:

# Codes for demographics of interest
codes = [
    'NAME', 'B01001_002E', 'B01001_026E', 'B02001_002E', 'B01002_001E',
    'B06001_002E', 'B19001_001E', 'B06009_005E'
]
# Request ACS data