def setup_orca(dfa, dfb, dfa_col, dfb_col, dfa_factor, dfb_factor): orca.add_injectable('a_factor', dfa_factor) @orca.injectable() def b_factor(): return dfb_factor orca.add_table('dfa', dfa) @orca.table('dfb') def dfb_table(): return dfb orca.add_column('dfa', 'acol', dfa_col) orca.add_column('dfb', 'bcol', dfb_col) @orca.column('dfa') def extra_acol(a_factor): return dfa_col * a_factor @orca.column('dfb') def extra_bcol(b_factor): return dfb_col * b_factor orca.broadcast('dfb', 'dfa', cast_on='a_id', onto_index=True) @orca.step() def test_step(dfa, dfb): pass
def load_rental_listings(): """ This initialization step loads the Craigslist rental listings data for hedonic estimation. Not needed for simulation. Data expectations ----------------- - injectable 'net' that can provide 'node_id' and 'tmnode_id' from lat-lon coordinates - some way to get 'zone_id' (currently using parcels table) - 'sfbay_craigslist.csv' file Results ------- - creates new 'craigslist' table with the following columns: - 'price' (int, may be missing) - 'sqft_per_unit' (int, may be missing) - 'price_per_sqft' (float, may be missing) - 'bedrooms' (int, may be missing) - 'neighborhood' (string, ''-filled) - 'node_id' (int, may be missing, corresponds to index of 'nodes') - 'tmnode_id' (int, may be missing, corresponds to index of 'tmnodes') - 'zone_id' (int, may be missing, corresponds to index of 'zones') - adds broadcasts linking 'craigslist' to 'nodes', 'tmnodes', 'logsums' """ @orca.table('craigslist', cache=True) def craigslist(store): df = store['rentals'] net = orca.get_injectable('net') df['node_id'] = net['walk'].get_node_ids( df['longitude'], df['latitude']) df['tmnode_id'] = net['drive'].get_node_ids( df['longitude'], df['latitude']) # fill nans -- missing bedrooms are mostly studio apts df['bedrooms'] = df.bedrooms.replace(np.nan, 1) df['neighborhood'] = df.neighborhood.replace(np.nan, '') # gotta do this to use the same yaml for estimation and simulation df['sqft_per_unit'] = df['sqft'] df['price_per_sqft'] = df['rent_sqft'] return df # Is it simpler to just do this in the table definition since it # is never updated? @orca.column('craigslist', 'zone_id', cache=True) def zone_id(craigslist, parcels): return misc.reindex(parcels.zone_id, craigslist.node_id) orca.broadcast('nodes', 'craigslist', cast_index=True, onto_on='node_id') orca.broadcast('tmnodes', 'craigslist', cast_index=True, onto_on='tmnode_id') orca.broadcast('zones', 'craigslist', cast_index=True, onto_on='zone_id') orca.broadcast('logsums', 'craigslist', cast_index=True, onto_on='zone_id') return
@orca.table('travel_data', cache=True) def travel_data(store): df = store['travel_data'] return df @orca.table('zones', cache=True) def zones(store): df = store['zones'] return df @orca.table('zoning_heights', cache=True) def zoning_heights(store): df = store['zoning_heights'] return df orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id') orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id') orca.broadcast('fazes', 'zones', cast_index=True, onto_on='faz_id') orca.broadcast('gridcells', 'parcels', cast_index=True, onto_on='grid_id') orca.broadcast('households', 'persons', cast_index=True, onto_on='household_id') #orca.broadcast('jobs', 'households', cast_index=True, onto_on='job_id') orca.broadcast('jobs', 'persons', cast_index=True, onto_on='job_id') orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast('parcels', 'schools', cast_index=True, onto_on='parcel_id') orca.broadcast('tractcity', 'parcels', cast_index=True, onto_on='tractcity_id') orca.broadcast('zones', 'parcels', cast_index=True, onto_on='zone_id') orca.broadcast('zones', 'persons_for_estimation', cast_index=True, onto_on='household_zone_id') orca.broadcast('zones', 'persons', cast_index=True, onto_on='household_zone_id') orca.broadcast('buildings', 'households_for_estimation', cast_index=True, onto_on='building_id') orca.broadcast('buildings_lag1', 'households_for_estimation', cast_index=True, onto_on='building_id') orca.broadcast('households_for_estimation', 'persons_for_estimation', cast_index=True, onto_on='household_id')
import orca import pandas as pd @orca.table(cache=True) def accessibility(store): df = store["skims/accessibility"] df.columns = [c.upper() for c in df.columns] return df @orca.column("accessibility") def mode_choice_logsums(accessibility): # TODO a big todo here is to compute actual mode choice logsums from our # TODO upcoming mode choice model return pd.Series(0, accessibility.index) # this would be accessibility around the household location - be careful with # this one as accessibility at some other location can also matter orca.broadcast('accessibility', 'households', cast_index=True, onto_on='TAZ')
2: "Urban", 3: "Suburban", 4: "Rural" }) return tg # these are shapes - "zones" in the bay area @orca.table('zones', cache=True) def zones(store): df = store['zones'] df = df.sort_index() return df # this specifies the relationships between tables orca.broadcast('parcels_geography', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') orca.broadcast('parcels', 'homesales', cast_index=True, onto_on='parcel_id') orca.broadcast('nodes', 'homesales', cast_index=True, onto_on='node_id') orca.broadcast('tmnodes', 'homesales', cast_index=True, onto_on='tmnode_id') orca.broadcast('nodes', 'costar', cast_index=True, onto_on='node_id') orca.broadcast('tmnodes', 'costar', cast_index=True, onto_on='tmnode_id') orca.broadcast('logsums', 'homesales', cast_index=True, onto_on='zone_id') orca.broadcast('logsums', 'costar', cast_index=True, onto_on='zone_id') orca.broadcast('taz_geography', 'parcels', cast_index=True, onto_on='zone_id')
df = df.reset_index().drop_duplicates(subset='parcel').set_index('parcel') return df # this is the actual zoning @orca.table('zoning', cache=True) def zoning(store): df = store['zoning'] return df # zoning for use in the "baseline" scenario # comes in the hdf5 @orca.table('zoning_baseline', cache=True) def zoning_baseline(zoning, zoning_for_parcels): df = pd.merge(zoning_for_parcels.to_frame(), zoning.to_frame(), left_on='zoning', right_index=True) return df orca.broadcast('zones', 'homesales', cast_index=True, onto_on='zone_id') orca.broadcast('zones', 'costar', cast_index=True, onto_on='zone_id') orca.broadcast('zones', 'apartments', cast_index=True, onto_on='zone_id') orca.broadcast('zones', 'buildings', cast_index=True, onto_on='zone_id') orca.broadcast('zones_prices', 'buildings', cast_index=True, onto_on='zone_id') orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id') orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
t_data=travel_data.to_frame(columns=['am_single_vehicle_to_work_travel_time']).reset_index(level=1) return t_data[['to_zone_id']][t_data.am_single_vehicle_to_work_travel_time<20] @orca.table('t_data_dist30', cache=True) def dist30( travel_data): t_data=travel_data.to_frame(columns=['am_single_vehicle_to_work_travel_time']).reset_index(level=1) return t_data[['to_zone_id']][t_data.am_single_vehicle_to_work_travel_time<30] @orca.table('t_data_dist15', cache=True) def dist30( travel_data): t_data=travel_data.to_frame(columns=['am_single_vehicle_to_work_travel_time']).reset_index(level=1) return t_data[['to_zone_id']][t_data.am_single_vehicle_to_work_travel_time<15] @orca.table('t_data_dist45', cache=True) def dist30( travel_data): t_data=travel_data.to_frame(columns=['am_single_vehicle_to_work_travel_time']).reset_index(level=1) return t_data[['to_zone_id']][t_data.am_single_vehicle_to_work_travel_time<45] #broadcass orca.broadcast('zones', 'parcels', cast_index=True, onto_on='zone_id') orca.broadcast('parcels','buildings', cast_index=True, onto_on='parcel_id', onto_index=False) orca.broadcast('buildings','households', cast_index=True, onto_on='building_id') orca.broadcast('buildings', 'establishments', cast_index=True, onto_on ='building_id') orca.broadcast('zoning', 'parcels', cast_index=True, onto_on='zoning_id') orca.broadcast('fars', 'parcels', cast_index=True, onto_on='far_id') orca.broadcast('counties','zones', cast_index=True, onto_index=True) orca.broadcast('buildings','households_for_estimation', cast_index=True, onto_on='building_id') orca.broadcast('counties', 'establishments', cast_index=True, onto_on='zone_id') orca.broadcast('counties', 'households', cast_index=True, onto_on='zone_id')
# these are shapes - "zones" in the bay area @orca.table('zones', cache=True) def zones(store): df = store['zones'] return df # these are dummy returns that last until accessibility runs @orca.table("nodes", cache=True) def nodes(): return pd.DataFrame() @orca.table("logsums", cache=True) def logsums(settings): logsums_index = settings.get("logsums_index_col", "taz") return pd.read_csv(os.path.join(misc.data_dir(), 'logsums.csv'), index_col=logsums_index) # this specifies the relationships between tables orca.broadcast('nodes', 'buildings', cast_index=True, onto_on='node_id') orca.broadcast('nodes', 'parcels', cast_index=True, onto_on='node_id') orca.broadcast('logsums', 'buildings', cast_index=True, onto_on='zone_id') orca.broadcast('logsums', 'parcels', cast_index=True, onto_on='zone_id') orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast( 'buildings', 'households', cast_index=True, onto_on='building_id') orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
orca.get_table('/2015/jobs').to_frame().head() # what is code number for other employment types? to be continued... #################################################################### ## population by year and geographic units # read population, household, parcel file, df1 = orca.get_table('/2015/persons').to_frame() orca.add_table('my_person', df1) df2 = orca.get_table('/2015/households').to_frame() df2['household_id'] = df2.index orca.add_table('my_household', df2) df3 = orca.get_table('/2015/parcels').to_frame() orca.add_table('my_parcel', df3) # define the merging relationship, between person and household orca.broadcast(cast='my_person', onto='my_household', cast_index=True, onto_on='household_id') #orca.broadcast(cast='my_household', onto='my_parcel', cast_index=True, onto_on='census_2010_block_group_id') my_col = ['population_2015'] #my_col = ['population_2015', 'faz_id'] @orca.step() def get_person_geo(): # join person data with hourshold data df4 = orca.merge_tables(target='my_household', tables=['my_person', 'my_household']) # geographic info to dictionary faz_dict = dict(zip(df3['census_2010_block_group_id'], df3['faz_id'])) zone_dict = dict(zip(df3['census_2010_block_group_id'], df3['zone_id'])) city_dict = dict(zip(df3['census_2010_block_group_id'], df3['city_id'])) # map geo info to person table df4['faz_id'] = df4['census_2010_block_group_id'].map(faz_dict)
df = store['assessor_transactions'] df["index"] = df.index df.drop_duplicates(cols='index', take_last=True, inplace=True) del df["index"] return df @orca.table('luz_base_indicators', cache=True) def luz_base_indicators(store): households = store['households'][['building_id']] jobs = store['jobs'][['building_id']] buildings = store['buildings'][['parcel_id']] parcels = store['parcels'][['luz_id']] buildings['luz_id'] = misc.reindex(parcels.luz_id, buildings.parcel_id) households['luz_id'] = misc.reindex(buildings.luz_id, households.building_id) jobs['luz_id'] = misc.reindex(buildings.luz_id, jobs.building_id) hh_luz_base = households.groupby('luz_id').size() emp_luz_base = jobs.groupby('luz_id').size() return pd.DataFrame({'hh_base':hh_luz_base, 'emp_base':emp_luz_base}) # this specifies the relationships between tables orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast( 'buildings', 'households', cast_index=True, onto_on='building_id') orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id') orca.broadcast('nodes', 'buildings', cast_index=True, onto_on='node_id') orca.broadcast('nodes', 'parcels', cast_index=True, onto_on='node_id') orca.broadcast('nodes', 'costar', cast_index=True, onto_on='node_id') orca.broadcast('parcels', 'costar', cast_index=True, onto_on='parcel_id') orca.broadcast('nodes', 'assessor_transactions', cast_index=True, onto_on='node_id') orca.broadcast('parcels', 'assessor_transactions', cast_index=True, onto_on='parcel_id')
# census tracts for parcels, to assign earthquake probabilities @orca.table(cache=True) def parcels_tract(): return pd.read_csv( os.path.join(misc.data_dir(), "parcel_tract_xwalk.csv"), index_col='parcel_id') # earthquake and fire damage probabilities for census tracts @orca.table(cache=True) def tracts_earthquake(): return pd.read_csv( os.path.join(misc.data_dir(), "tract_damage_earthquake.csv")) # this specifies the relationships between tables orca.broadcast('buildings', 'residential_units', cast_index=True, onto_on='building_id') orca.broadcast('residential_units', 'households', cast_index=True, onto_on='unit_id') orca.broadcast('parcels_geography', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') # not defined in urbansim_Defaults orca.broadcast('tmnodes', 'buildings', cast_index=True, onto_on='tmnode_id') orca.broadcast('taz_geography', 'parcels', cast_index=True, onto_on='zone_id')
import orca @orca.table(cache=True) def land_use(store): return store["land_use/taz_data"] orca.broadcast('land_use', 'households', cast_index=True, onto_on='TAZ') @orca.column("land_use") def total_households(land_use): return land_use.local.TOTHH @orca.column("land_use") def total_employment(land_use): return land_use.local.TOTEMP @orca.column("land_use") def total_acres(land_use): return land_use.local.TOTACRE @orca.column("land_use") def county_id(land_use): return land_use.local.COUNTY
@orca.table('scheduled_development_events', cache=True) def scheduled_development_events(store): return store['scheduled_development_events'] @orca.table('zoning', cache=True) def zoning(store): return store['zoning'] @orca.table('zoning_allowed_uses', cache=True) def zoning_allowed_uses(store, parcels): zoning_allowed_uses_df = store['zoning_allowed_uses'] parcels = parcels.to_frame(columns = ['zoning_id',]) allowed_df = pd.DataFrame(index=parcels.index) for devtype in np.unique(zoning_allowed_uses_df.index.values): devtype_allowed = zoning_allowed_uses_df.loc[zoning_allowed_uses_df.index == devtype]\ .reset_index().set_index('zoning_id') allowed = misc.reindex(devtype_allowed.development_type_id, parcels.zoning_id) df = pd.DataFrame(data=False, index=allowed.index, columns=['allowed']) df[~allowed.isnull()] = True allowed_df[devtype] = df.allowed return allowed_df orca.broadcast('parcels', 'assessor_transactions', cast_index=True, onto_on='parcel_id') orca.broadcast('nodes', 'assessor_transactions', cast_index=True, onto_on='node_id') orca.broadcast('zoning', 'parcels', cast_index=True, onto_on='zoning_id')
columns=trace_columns, warn_if_empty=True) """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ @orca.table(cache=True) def mandatory_tours(persons): persons = persons.to_frame( columns=["mandatory_tour_frequency", "is_worker"]) persons = persons[~persons.mandatory_tour_frequency.isnull()] df = process_mandatory_tours(persons) return df # broadcast mandatory_tours on to persons using the person_id foreign key orca.broadcast('persons', 'mandatory_tours', cast_index=True, onto_on='person_id') orca.broadcast('persons_merged', 'mandatory_tours', cast_index=True, onto_on='person_id')
# convert indexes to alternative names choices = pd.Series( mandatory_tour_frequency_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) print "Choices:\n", choices.value_counts() orca.add_column("persons", "mandatory_tour_frequency", choices) """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ @orca.table(cache=True) def mandatory_tours(persons): persons = persons.to_frame(columns=["mandatory_tour_frequency", "is_worker"]) persons = persons[~persons.mandatory_tour_frequency.isnull()] return process_mandatory_tours(persons) # broadcast mandatory_tours on to persons using the person_id foreign key orca.broadcast('persons', 'mandatory_tours', cast_index=True, onto_on='person_id') orca.broadcast('persons_merged', 'mandatory_tours', cast_index=True, onto_on='person_id')
df_pet = pd.DataFrame( {'pet_name': ['wilkie', 'lassie', 'leo', 'felix', 'rex' ], 'age': [14, 104, 3, 82, 7 ], 'iq': [100, 140, 87, 120, 94], 'species_id': ['D', 'D', 'C', 'C', 'D']}, index=['p0', 'p1', 'p2', 'p3', 'p4']) df_pet['init_age'] = df_pet['age'] df_pet # register the tables orca.add_table('species', df_species) orca.add_table('pets', df_pet) # broadcast so we can merge pets and species dataframes on pets.species_id orca.broadcast(cast='species', onto='pets', cast_index=True, onto_on='species_id') # this is a common merge so might as well define it once here and use it @orca.table() def pets_merged(pets, species): return orca.merge_tables(pets.name, tables=[pets, species]) # this is the orca registered version of the merged table orca.get_table('pets_merged').to_frame() ########################################################################## # simple_simulate ##########################################################################
if "households_sample_size" in settings: return asim.random_rows(store["households"], settings["households_sample_size"]) return store["households"] # this is a common merge so might as well define it once here and use it @orca.table() def households_merged(households, land_use, accessibility): return orca.merge_tables(households.name, tables=[ households, land_use, accessibility]) orca.broadcast('households', 'persons', cast_index=True, onto_on='household_id') @orca.column("households") def income_in_thousands(households): return households.income / 1000 @orca.column("households") def income_segment(households): return pd.cut(households.income_in_thousands, bins=[-np.inf, 30, 60, 100, np.inf], labels=[1, 2, 3, 4]) @orca.column("households")
store = orca.get_injectable("store") orca.add_table(name, store[name]) orca.add_table("remi_pop_total", pd.read_csv("data/remi_hhpop_bylarge.csv", index_col='large_area_id')) orca.add_table('target_vacancies', pd.read_csv("data/target_vacancies.csv")) # these are dummy returns that last until accessibility runs for node_tbl in ['nodes', 'nodes_walk', 'nodes_drv']: empty_df = pd.DataFrame() orca.add_table(node_tbl, empty_df) # GQ placeholders # for gq_tbl in ['tazcounts2040gq', 'tazcounts2015gq', 'tazcounts2020gq', 'tazcounts2035gq', 'tazcounts2025gq', # 'tazcounts2030gq']: # empty_df = pd.DataFrame() # orca.add_table(gq_tbl, empty_df) # this specifies the relationships between tables orca.broadcast('nodes_walk', 'buildings', cast_index=True, onto_on='nodeid_walk') orca.broadcast('nodes_walk', 'parcels', cast_index=True, onto_on='nodeid_walk') orca.broadcast('nodes_drv', 'buildings', cast_index=True, onto_on='nodeid_drv') orca.broadcast('nodes_drv', 'parcels', cast_index=True, onto_on='nodeid_drv') orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id') orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id') orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id') orca.broadcast('households', 'persons', cast_index=True, onto_on='household_id') orca.broadcast('building_types', 'buildings', cast_index=True, onto_on='building_type_id') orca.broadcast('zones', 'parcels', cast_index=True, onto_on='zone_id') orca.broadcast('schools', 'parcels', cast_on='parcel_id', onto_index=True)