def __init__(self, key, controls_csv, tazset=None, puma_data_dir=None, fips_file=None, write_households_csv=None, write_persons_csv=None, write_append=False, start_hhid=1, start_persid=1): pd.options.display.width = 200 pd.options.display.float_format = '{:,.3f}'.format pd.options.display.max_columns = 30 # start ids here self.start_hhid = start_hhid self.start_persid = start_persid # Starter.__init__(self, key, '06', '075') self.c = Census(key, base_url=puma_data_dir, fips_url=fips_file) self.hh_csvfile = None if write_households_csv: self.hh_csvfile = open(write_households_csv, 'a' if write_append else 'w') self.per_csvfile = None if write_persons_csv: self.per_csvfile = open(write_persons_csv, 'a' if write_append else 'w') # if appending, no header if write_append: self.wrote_hh_header = True self.wrote_pers_header = True # Read the control file print "\n\nReading the control file [%s]" % controls_csv self.controls = pd.read_csv(controls_csv, index_col=False) # Limit to only the specified TAZs if tazset and len(tazset) > 0: print "Using only TAZs in %s" % str(tazset) self.controls = self.controls[self.controls.SFTAZ.isin(tazset)] self.tazToPUMA2010 = pd.read_csv( r"Q:\Model Development\Population Synthesizer\4. Geographic Work\Census 2010 PUMAs\TAZ2454_to_Census2010PUMAs.csv", index_col=0, converters={'PUMA2010': str}) self.state = '06' # for caching - indexed by puma self.h_pums = {} self.jd_households = {} self.p_pums = {} self.jd_persons = {}
def get_acs_data(county, spec, settings): state = settings['state'] census_year = settings['census_year'] if settings['tract'] == 'None': tract = None else: tract = settings['tract'] c = Census(os.environ[settings['census_key']]) hh_bg_columns = get_column_names('block_group', 'household', spec) hh_tract_columns = get_column_names('tract', 'household', spec) if len([x for x in hh_bg_columns if x in hh_tract_columns]) > 0: raise RuntimeError("The same acs column is being used as block group and tract. Please check expression file.") h_acs = c.block_group_and_tract_query( hh_bg_columns, hh_tract_columns, state, county, merge_columns=['tract', 'county', 'state'], block_group_size_attr=settings['hh_bg_size_attr'], tract_size_attr=settings['hh_tract_size_attr'], tract=tract, year=census_year) pers_bg_columns = get_column_names('block_group', 'person', spec) pers_tract_columns = get_column_names('tract', 'person', spec) if len([x for x in pers_bg_columns if x in pers_tract_columns]) > 0: raise RuntimeError("The same acs column is being used as block group and tract. Please check expression file.") p_acs = c.block_group_and_tract_query( pers_bg_columns, pers_tract_columns, state, county, merge_columns=['tract', 'county', 'state'], block_group_size_attr=settings['pers_bg_size_attr'], tract_size_attr=settings['pers_tract_size_attr'], tract=tract, year = census_year) all_acs = h_acs.merge(p_acs, how = 'left', on = ['state', 'county', 'tract', 'block group']) return all_acs
def c(): return Census('bfa6b4e541243011fab6307a31aed9e91015ba90')
state='25' state_code='ma' elif city=='Detroit': state='26' state_code='mi' ALL_ZONES_PATH='./scripts/cities/'+city+'/clean/model_area.geojson' SIM_ZONES_PATH='./scripts/cities/'+city+'/clean/sim_zones.json' OD_PATH='./scripts/cities/'+city+'/raw/LODES/'+state_code+'_od_main_JT00_2015.csv' ALL_SYNTH_HH_PATH='./scripts/cities/'+city+'/clean/all_synth_hh.csv' ALL_SYNTH_PERSONS_PATH='./scripts/cities/'+city+'/clean/all_synth_persons.csv' SIM_POP_PATH='./scripts/cities/'+city+'/clean/sim_pop.json' VACANT_PATH='./scripts/cities/'+city+'/clean/vacant.json' FLOATING_PATH='./scripts/cities/'+city+'/clean/floating.json' c = Census('7a25a7624075d46f112113d33106b6648f42686a') # load the block group geojson for the whole area # get set of tracts covered # identify the data we want at tract and block group level #Households income_columns = ['B19001_0%02dE'%i for i in range(1, 18)] vehicle_columns = ['B08201_0%02dE'%i for i in range(1, 7)] workers_columns = ['B08202_0%02dE'%i for i in range(1, 6)] families_columns = ['B11001_001E', 'B11001_002E'] # year_built_columns= ['B25034_001E', 'B25034_002E', 'B25034_003E'] # includes vacant structures? tenure_columns=['B25063_001E', 'B25075_001E'] block_group_columns = income_columns + families_columns + tenure_columns