def parcel_summary(parcels, buildings, households, jobs, run_number, year, parcels_zoning_calculations, initial_year, final_year): if year not in [initial_year, final_year]: return df = parcels.to_frame([ "x", "y", "total_residential_units", "total_job_spaces", "first_building_type_id" ]) df2 = parcels_zoning_calculations.to_frame( ["zoned_du", "zoned_du_underbuild", "zoned_du_underbuild_nodev"]) df = df.join(df2) households_df = orca.merge_tables( 'households', [buildings, households], columns=['parcel_id', 'base_income_quartile']) # add households by quartile on each parcel for i in range(1, 5): df['hhq%d' % i] = households_df[ households_df.base_income_quartile == i].\ parcel_id.value_counts() jobs_df = orca.merge_tables('jobs', [buildings, jobs], columns=['parcel_id', 'empsix']) # add jobs by empsix category on each parcel for cat in jobs_df.empsix.unique(): df[cat] = jobs_df[jobs_df.empsix == cat].\ parcel_id.value_counts() df.to_csv( os.path.join("runs", "run%d_parcel_data_%d.csv" % (run_number, year))) if year == final_year: # do diff with initial year df2 = pd.read_csv(os.path.join( "runs", "run%d_parcel_data_%d.csv" % (run_number, initial_year)), index_col="parcel_id") for col in df.columns: if col in ["x", "y", "first_building_type_id"]: continue df[col] = df[col] - df2[col] df.to_csv( os.path.join("runs", "run%d_parcel_data_diff.csv" % run_number))
def juris_ave_income(households, buildings, parcels_geography, parcels): h = orca.merge_tables("households", [households, buildings, parcels_geography], columns=["jurisdiction_id", "income"]) s = h.groupby(h.jurisdiction_id).income.quantile(.5) return misc.reindex(s, parcels_geography.jurisdiction_id).\ reindex(parcels.index).fillna(s.median()).apply(np.log1p)
def ln_pop_within_20min(zones, t_data_dist20): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id']) zonal_pop=zones.zonal_pop t_data=t_data_dist20.to_frame() t_data.loc[:,'attr']=zonal_pop[t_data_dist20.to_zone_id].values zone_time_range=t_data.groupby(level=0).attr.apply(np.sum) return reindex(zone_time_range, b.zone_id).apply(np.log1p)
def hh_merged(): df = orca.merge_tables(target='households', tables=[ 'households', 'units', 'buildings', 'parcels', 'nodessmall', 'nodeswalk' ]) return df
def ln_non_residential_sqft_zone(): b = orca.merge_tables( 'buildings', tables=['buildings', 'parcels'], columns=['btype_hlcm', 'residential_units', 'zone_id']) return b[b.btype_hlcm == 3].groupby('zone_id').residential_units.sum( ) * 100.0 / (b.groupby('zone_id').residential_units.sum())
def avg_unit_price_zone(zones): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['residential_units','improvement_value','unit_price_residential','zone_id']) b = b[(b.residential_units>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_residential.mean() out = pd.Series(index=zones.index, name='avg_unit_price_zone') out.loc[b.index] = b out.loc[out.isnull()] = b.mean() return out
def ln_pop_within_20min(zones, t_data_dist20): b = orca.merge_tables('buildings', tables=['buildings', 'parcels'], columns=['zone_id']) zonal_pop = zones.zonal_pop t_data = t_data_dist20.to_frame() t_data.loc[:, 'attr'] = zonal_pop[t_data_dist20.to_zone_id].values zone_time_range = t_data.groupby(level=0).attr.apply(np.sum)
def jobs_within_15min(zones, t_data_dist15): b = orca.merge_tables('buildings', tables=['buildings', 'parcels'], columns=['zone_id']) t_data = t_data_dist15.to_frame() t_data.loc[:, 'attr'] = zones.zonal_emp[t_data_dist15.to_zone_id].values zone_time_range = t_data.groupby(level=0).attr.apply(np.sum) return reindex(zone_time_range, b.zone_id)
def btype(households): df = orca.merge_tables('households', tables=['households', 'buildings'], columns=['building_type_id']) return 1 * (df.building_type_id == 2) + 2 * (df.building_type_id == 3) + 3 * ( df.building_type_id == 20) + 4 * np.invert( np.in1d(df.building_type_id, [2, 3, 20]))
def ln_emp_sector5_within_20min(t_data_dist20): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id']) e =orca.get_table('establishments').to_frame(columns=['sector_id_six','zone_id','employees']) e = e.loc[e.sector_id_six == 5] zonal_emp = e.groupby('zone_id').employees.sum() t_data=t_data_dist20.to_frame() t_data.loc[:,'attr']=zonal_emp[t_data_dist20.to_zone_id].values zone_time_range=t_data.groupby(level=0).attr.apply(np.sum) return reindex(zone_time_range,b.zone_id).apply(np.log1p)
def _get_df(self, tables='unset', fallback_tables=None, filters='unset', model_expression=None): """ Generate a data table for estimation or prediction, relying on functionality from Orca and `urbansim.models.util`. This should be performed immediately before estimation or prediction so that it reflects the current data state. The output includes only the necessary columns: those mentioned in the model expression or filters, plus (it appears) the index of each merged table. Relevant row filters are also applied. TO DO - this method is a generalization of _get_data(), and should replace it, but does not currently support column filtering or PyLogit model expressions. Parameters ---------- tables : str or list of str, optional Name of table or tables. If not provided, `self.tables` will be used. fallback_tables : str or list of str, optional Table(s) to use if first argument evaluates to `None`. filters : str or list of str, optional Filter(s) to apply. If not provided, `self.filters` will be used. model_expression : NOT YET IMPLEMENTED Model expression, for determining which columns are needed. If not provided, `self.model_expression` will be used. TO DO - this needs to handle the large MNL case where there are two sets of data tables, so we can't use urbansim.models.util.columns_in_formula() Returns ------- DataFrame """ if tables == 'unset': tables = self.tables if tables is None: tables = fallback_tables if filters == 'unset': filters = self.filters if isinstance(tables, list): df = orca.merge_tables(target=tables[0], tables=tables) else: df = orca.get_table(tables).to_frame() df = util.apply_filter_query(df, filters) return df
def avg_unit_price_zone(): b = orca.merge_tables('buildings', tables=['buildings', 'parcels'], columns=[ 'non_residential_sqft', 'improvement_value', 'unit_price_non_residential', 'zone_id' ]) return b[(b.non_residential_sqft > 0) * (b.improvement_value > 0)].groupby( 'zone_id').unit_price_non_residential.mean()
def emp_transition(employment_control_totals, year): location_fname = 'zone_id' tran = TabularTotalsTransition(employment_control_totals.to_frame(), 'total_number_of_jobs', accounting_column='employees') df = orca.merge_tables('establishments', tables=['establishments', 'counties']) print "%d establishments with %d employees before transition" % (len(df.index), df.employees.sum()) df, added, copied, removed = tran.transition(df, year) print "%d establishments with %d employees after transition" % (len(df.index), df.employees.sum()) df.loc[added, location_fname] = -1 df.loc[added, "building_id"] = -1 orca.add_table('establishments', df.loc[:, orca.get_table('establishments').local_columns])
def juris_ave_income(households, buildings, parcels_geography, parcels): # get frame of income and jurisdiction h = orca.merge_tables("households", [households, buildings, parcels_geography], columns=["jurisdiction_id", "income"]) # get median income by jurisdiction s = h.groupby(h.jurisdiction_id).income.quantile(.5) # map it to parcels - fill na with median for all areas # should probably remove the log transform and do that in the models return misc.reindex(s, parcels_geography.jurisdiction_id).\ reindex(parcels.index).fillna(s.median()).apply(np.log1p)
def to_frame(tables, cfg, additional_columns=[]): cfg = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg) tables = [t for t in tables if t is not None] columns = misc.column_list(tables, cfg.columns_used()) + additional_columns if len(tables) > 1: df = orca.merge_tables(target=tables[0].name, tables=tables, columns=columns) else: df = tables[0].to_frame(columns) df = deal_with_nas(df) return df
def jobs_subset(jobs, parcels, buildings): df = orca.merge_tables('jobs', [parcels, buildings, jobs], columns=['zone_id', 'empsix']) # totally baffled by this - after joining the three tables we have three # zone_ids, one from the parcel table, one from buildings, and one from jobs # and the one called zone_id has null values while there others do not # going to change this while I think about this df["zone_id"] = df.zone_id_x return df
def get_person_geo(): # join person data with hourshold data df4 = orca.merge_tables(target='my_household', tables=['my_person', 'my_household']) # geographic info to dictionary faz_dict = dict(zip(df3['census_2010_block_group_id'], df3['faz_id'])) zone_dict = dict(zip(df3['census_2010_block_group_id'], df3['zone_id'])) city_dict = dict(zip(df3['census_2010_block_group_id'], df3['city_id'])) # map geo info to person table df4['faz_id'] = df4['census_2010_block_group_id'].map(faz_dict) df4['zone_id'] = df4['census_2010_block_group_id'].map(zone_dict) df4['city_id'] = df4['census_2010_block_group_id'].map(city_dict) orca.add_table('my_person_geo', df4)
def ln_emp_sector5_within_20min(t_data_dist20): b = orca.merge_tables('buildings', tables=['buildings', 'parcels'], columns=['zone_id']) e = orca.get_table('establishments').to_frame( columns=['sector_id_six', 'zone_id', 'employees']) e = e.loc[e.sector_id_six == 5] zonal_emp = e.groupby('zone_id').employees.sum() t_data = t_data_dist20.to_frame() t_data.loc[:, 'attr'] = zonal_emp[t_data_dist20.to_zone_id].values zone_time_range = t_data.groupby(level=0).attr.apply(np.sum) return reindex(zone_time_range, b.zone_id).apply(np.log1p)
def _get_data(self, task='fit'): """ DEPRECATED - this should be replaced by the more general utils.get_data() Generate a data table for estimation or prediction, relying on functionality from Orca and UrbanSim.models.util. This should be performed immediately before estimation or prediction so that it reflects the current data state. The output includes only the necessary columns: those mentioned in the model expression or filters, plus (it appears) the index of each merged table. Relevant filter queries are applied. Parameters ---------- task : 'fit' or 'predict' Returns ------- DataFrame """ # TO DO - verify input data if isinstance(self.model_expression, str): expr_cols = util.columns_in_formula(self.model_expression) if (task == 'fit'): tables = self.tables columns = expr_cols + util.columns_in_filters(self.filters) filters = self.filters elif (task == 'predict'): if self.out_tables is not None: tables = self.out_tables else: tables = self.tables columns = expr_cols + util.columns_in_filters(self.out_filters) if self.out_column is not None: columns += [self.out_column] filters = self.out_filters if isinstance(tables, list): df = orca.merge_tables(target=tables[0], tables=tables, columns=columns) else: df = orca.get_table(tables).to_frame(columns) df = util.apply_filter_query(df, filters) return df
def parcel_avg_price(use): #if use is residential translate unit price to price per sqft buildings = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['unit_price_residential','building_type_id','residential_sqft', 'zone_id', 'unit_price_non_residential']) use_btype = orca.get_injectable('use_btype') if use == 'residential': price = (buildings.unit_price_residential.loc[np.in1d(buildings.building_type_id, use_btype[use])] / buildings.residential_sqft.loc[np.in1d(buildings.building_type_id, use_btype[use])]).groupby(buildings.zone_id).mean() else: price = buildings.unit_price_non_residential.loc[np.in1d(buildings.building_type_id, use_btype[use])].groupby(buildings.zone_id).mean() return misc.reindex(price, orca.get_table('parcels').zone_id)
def avg_unit_price_zone(zones): b = orca.merge_tables('buildings', tables=['buildings', 'parcels'], columns=[ 'residential_units', 'improvement_value', 'unit_price_residential', 'zone_id' ]) b = b[(b.residential_units > 0) * (b.improvement_value > 0)].groupby( 'zone_id').unit_price_residential.mean() out = pd.Series(index=zones.index, name='avg_unit_price_zone') out.loc[b.index] = b out.loc[out.isnull()] = b.mean() return out
def hh_transition(households, household_control_totals, year): location_fname = 'zone_id' tran = TabularTotalsTransition(household_control_totals.to_frame(), 'households') #tran = TabularGrowthRateTransition(household_control_totals.to_frame(), 'annual_rate') df = orca.merge_tables('households', tables=['households', 'counties']) print "{0} households with a total population of {1}".format(len(df.index), df.persons.sum()) df, added, copied, removed = tran.transition(df, year) print "{0} households with a total population of {1}".format(len(df.index), df.persons.sum()) df.loc[added, location_fname] = -1 df.loc[added, 'building_id'] = -1 orca.add_table('households', df.loc[:, orca.get_table('households').local_columns]) print df.groupby('county_id').persons.sum()
def TOD_choice_simulate(): """ Generate time of day period choices for the synthetic population home-work and work-home trips. """ TOD_obs = orca.merge_tables('persons', ['persons', 'households', 'jobs']) TOD_obs.dropna(inplace=True) skims = pd.read_csv('./data/skims_110118.csv') TOD_obs = pd.merge(TOD_obs, skims, how='left', left_on=['zone_id_home', 'zone_id_work'], right_on=['orig', 'dest']) TOD_obs = pd.merge(TOD_obs, skims, how='left', left_on=['zone_id_work', 'zone_id_home'], right_on=['orig', 'dest'], suffixes=('_HW', '_WH')) TOD_list = ['EA', 'AM', 'MD', 'PM', 'EV'] for tod1 in TOD_list: for tod2 in TOD_list: col_name = f'da_Time_{tod1}_{tod2}' TOD_obs[col_name] = TOD_obs[f'da_Time_{tod1}_HW'] + TOD_obs[ f'da_Time_{tod2}_WH'] # TOD_obs['TOD'] = None m = mm.get_step('TOD_choice') @orca.table(cache=True) def tripsA(): return TOD_obs m.run() results = orca.get_table('tripsA').to_frame() persons = orca.get_table('persons').to_frame() persons = pd.merge(persons, results[['TOD']], how='left', left_index=True, right_index=True) orca.add_table('persons', persons)
def establishments(store, zone_redevelopment): e = store['establishments'] b = store['buildings'] b_removed_idx = b.loc[np.in1d(b.parcel_id, zone_redevelopment.parcel_id)].index zone_ids = orca.merge_tables('buildings', tables=['buildings', 'parcels'], columns=['zone_id']).zone_id e = e.loc[e.employees > 0] e.loc[e.building_id.isin(b_removed_idx), 'building_id'] = -1 e.loc[:, 'zone_id'] = zone_ids[e.building_id].fillna(-1).values.astype( 'int32') e.zone_id.fillna(-1) return e
def household_to_buildings(households, buildings): unplaced_hh = households.to_frame(columns=["zone_id", "building_id"]) unplaced_hh = unplaced_hh.loc[unplaced_hh.building_id == -1] b = orca.merge_tables( "buildings", tables=["parcels", "buildings"], columns=["zone_id", "residential_units", "vacant_residential_units"], ) vacant_units = b.loc[b.vacant_residential_units > 0, "vacant_residential_units"] units = b.loc[np.repeat(vacant_units.index.values, vacant_units.values.astype("int"))].reset_index() print len(units) print len(unplaced_hh)
def emp_transition(tbl, location_fname, year): #tran = TabularFilteredTotalsTransition(tbl.to_frame(), 'total_number_of_jobs', ['sector_id_six','home_based_status'], # accounting_column='employees') tran = TabularTotalsTransition(tbl.to_frame(), 'total_number_of_jobs', accounting_column='employees') cols = orca.get_table('establishments').local_columns add_cols = ['zone_id','county_id','sector_id_six'] cols = cols + add_cols df = orca.merge_tables('establishments', tables=['establishments', 'counties']) print "%d establishments with %d employees before transition" % (len(df.index), df.employees.sum()) df, added, copied, removed = tran.transition(df, year) print "%d establishments with %d employees after transition" % (len(df.index), df.employees.sum()) df.loc[added, location_fname] = -1 df.loc[added, "building_id"] = -1 orca.add_table('establishments', df.loc[:, orca.get_table('establishments').local_columns]) orca.add_table('updated_emp', df, cache=True, cache_scope='iteration')
def building_summary(parcels, run_number, year, buildings, initial_year, final_year): if year not in [initial_year, final_year]: return df = orca.merge_tables('buildings', [parcels, buildings], columns=[ 'performance_zone', 'year_built', 'residential_units', 'unit_price', 'zone_id', 'non_residential_sqft', 'deed_restricted_units', 'job_spaces', 'x', 'y' ]) df.to_csv( os.path.join("runs", "run%d_building_data_%d.csv" % (run_number, year)))
def building_summary(parcels, run_number, year, buildings, initial_year, final_year): if year not in [initial_year, final_year]: return df = orca.merge_tables( 'buildings', [parcels, buildings], columns=['performance_zone', 'year_built', 'residential_units', 'unit_price', 'zone_id', 'non_residential_sqft', 'deed_restricted_units', 'job_spaces', 'x', 'y']) df.to_csv( os.path.join("runs", "run%d_building_data_%d.csv" % (run_number, year)) )
def hh_transition(households, tbl, location_fname, year): migration = orca.get_table('migration_data').to_frame() pdf = migration['net_migration'] / migration.net_migration.sum() tran = DRCOGHouseholdTransitionModel(tbl.to_frame(), 'total_number_of_households', prob_dist = [pdf], migration_data=migration) cols = orca.get_table('households').local_columns add_cols = ['zone_id','county_id'] cols = cols + add_cols df = orca.merge_tables('households', tables=['households','counties']) print "%d households before transition" % len(df.index) df, added, copied, removed = tran.transition(df, year, [pdf]) print "%d households after transition" % len(df.index) df.loc[added, location_fname] = -1 df.loc[added, 'building_id'] = -1 orca.add_table('households', df.loc[:, orca.get_table('households').local_columns]) orca.add_table('updated_hh', df, cache=True, cache_scope='iteration')
def supply_demand(cfg, hh_demand, alternatives, price_col, reg_col=None, units_col=None, iterations=1): lcm = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg) demand_frame = hh_demand.to_frame() alts_frame = alternatives.to_frame(columns=[units_col, price_col]) alts_seg = alts_frame.index.values new_price, zone_ratios = supplydemand.supply_and_demand(lcm, demand_frame, alts_frame, alts_seg, price_col, iterations=iterations, reg_col=reg_col, clip_change_low=1, clip_change_high=100) alternatives.update_col_from_series(price_col, new_price) #update building prices from zones buildings = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['unit_price_residential', 'unit_price_non_residential', 'zone_id']) new_price_name = {'avg_unit_price_zone' : 'unit_price_residential', 'avg_nonres_unit_price_zone': 'unit_price_non_residential'} new_price = new_price.fillna(0) new_price_df = pd.DataFrame(new_price) merged = pd.merge(buildings, new_price_df, left_on='zone_id', right_index=True) orca.get_table('buildings').update_col_from_series(new_price_name[price_col], merged[price_col])
def to_frame(tbl, join_tbls, cfg, additional_columns=[]): """ Leverage all the built in functionality of the sim framework to join to the specified tables, only accessing the columns used in cfg (the model yaml configuration file), an any additionally passed columns (the sim framework is smart enough to figure out which table to grab the column off of) Parameters ---------- tbl : DataFrameWrapper The table to join other tables to join_tbls : list of DataFrameWrappers or strs A list of tables to join to "tbl" cfg : str The filename of a yaml configuration file from which to parse the strings which are actually used by the model additional_columns : list of strs A list of additional columns to include Returns ------- A single DataFrame with the index from tbl and the columns used by cfg and any additional columns specified """ join_tbls = join_tbls if isinstance(join_tbls, list) else [join_tbls] tables = [tbl] + join_tbls cfg = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg) tables = [t for t in tables if t is not None] columns = misc.column_list(tables, cfg.columns_used()) + additional_columns if len(tables) > 1: df = orca.merge_tables(target=tables[0].name, tables=tables, columns=columns) else: df = tables[0].to_frame(columns) check_nas(df) return df
def calculate_model_variables(self): """ Calculate variables needed to simulate the model, and returns DataFrames of simulation-ready tables with needed variables. Returns ------- choosers : pandas.DataFrame DataFrame of choosers. alternatives : pandas.DataFrame DataFrame of alternatives. """ columns_used = self.columns_used() + [self.choice_column] columns_used = columns_used + [self.agent_units ] if self.agent_units else columns_used choosers = orca.get_table(self.choosers).to_frame(columns_used) supply_column_names = [ col for col in [self.supply_variable, self.vacant_variable] if col is not None ] columns_used.extend(supply_column_names) if self.merge_tables: mt = copy.deepcopy(self.merge_tables) mt.append(self.alternatives) all_cols = [] for table in mt: all_cols.extend(orca.get_table(table).columns) all_cols = [col for col in all_cols if col in columns_used] alternatives = orca.merge_tables(target=self.alternatives, tables=mt, columns=all_cols) else: alternatives = orca.get_table( self.alternatives).to_frame(columns_used + supply_column_names) return choosers, alternatives
def residential_sqft_zone(): df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['residential_sqft','zone_id']) return df.groupby('zone_id').residential_sqft.sum()
def residential_units_zone(): df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['residential_units','zone_id']) return df.groupby('zone_id').residential_units.sum().apply(np.log1p)
def residential_unit_density_zone(zones): df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['residential_units','zone_id']) return (df.groupby('zone_id').residential_units.sum()/zones.acreage)
def proportional_elcm(jobs, households, buildings, parcels, year, run_number): juris_assumptions_df = pd.read_csv(os.path.join( "data", "juris_assumptions.csv" ), index_col="juris") # not a big fan of this - jobs with building_ids of -1 get dropped # by the merge so you have to grab the columns first and fill in # juris iff the building_id is != -1 jobs_df = jobs.to_frame(["building_id", "empsix"]) df = orca.merge_tables( target='jobs', tables=[jobs, buildings, parcels], columns=['juris', 'zone_id']) jobs_df["juris"] = df["juris"] jobs_df["zone_id"] = df["zone_id"] hh_df = orca.merge_tables( target='households', tables=[households, buildings, parcels], columns=['juris', 'zone_id', 'county']) # the idea here is to make sure we don't lose local retail and gov't # jobs - there has to be some amount of basic services to support an # increase in population buildings_df = orca.merge_tables( target='buildings', tables=[buildings, parcels], columns=['juris', 'zone_id', 'general_type', 'vacant_job_spaces']) buildings_df = buildings_df.rename(columns={ 'zone_id_x': 'zone_id', 'general_type_x': 'general_type'}) # location options are vacant job spaces in retail buildings - this will # overfill certain location because we don't have enough space building_subset = buildings_df[buildings_df.general_type == "Retail"] location_options = building_subset.juris.repeat( building_subset.vacant_job_spaces.clip(0)) print "Running proportional jobs model for retail" s = _proportional_jobs_model( # we now take the ratio of retail jobs to households as an input # that is manipulable by the modeler - this is stored in a csv # per jurisdiction juris_assumptions_df.minimum_forecast_retail_jobs_per_household, "RETEMPN", "juris", hh_df, jobs_df, location_options ) jobs.update_col_from_series("building_id", s) # first read the file from disk - it's small so no table source taz_assumptions_df = pd.read_csv(os.path.join( "data", "taz_growth_rates_gov_ed.csv" ), index_col="Taz") # we're going to multiply various aggregations of populations by factors # e.g. high school jobs are multiplied by county pop and so forth - this # is the dict of the aggregations of household counts mapping_d = { "TAZ Pop": hh_df["zone_id"].dropna().astype('int').value_counts(), "County Pop": taz_assumptions_df.County.map( hh_df["county"].value_counts()), "Reg Pop": len(hh_df) } # the factors are set up in relation to pop, not hh count pop_to_hh = .43 # don't need county anymore del taz_assumptions_df["County"] # multipliers are in first row (not counting the headers) multipliers = taz_assumptions_df.iloc[0] # done with the row taz_assumptions_df = taz_assumptions_df.iloc[1:] # this is weird but Pandas was giving me a strange error when I tried # to change the type of the index directly taz_assumptions_df = taz_assumptions_df.reset_index() taz_assumptions_df["Taz"] = taz_assumptions_df.Taz.astype("int") taz_assumptions_df = taz_assumptions_df.set_index("Taz") # now go through and multiply each factor by the aggregation it applied to target_jobs = pd.Series(0, taz_assumptions_df.index) for col, mult in zip(taz_assumptions_df.columns, multipliers): target_jobs += (taz_assumptions_df[col].astype('float') * mapping_d[mult] * pop_to_hh).fillna(0) target_jobs = target_jobs.astype('int') print "Running proportional jobs model for gov/edu" # location options are vacant job spaces in retail buildings - this will # overfill certain location because we don't have enough space building_subset = buildings_df[ buildings.general_type.isin(["Office", "School"])] location_options = building_subset.zone_id.repeat( building_subset.vacant_job_spaces.clip(0)) # now do the same thing for gov't jobs s = _proportional_jobs_model( None, # computing jobs directly "OTHEMPN", "zone_id", hh_df, jobs_df, location_options, target_jobs=target_jobs ) jobs.update_col_from_series("building_id", s)
def inclusionary_housing_revenue_reduction(feasibility, units): print "Computing adjustments due to inclusionary housing" # AMI by jurisdiction # # in practice deed restrictions are done by household size but we aren't # going to deed restrict them by household size so it makes sense not to # do that here - if we did this by household size like we do in the real # world we'd need to have a better representation of what household size # is in which unit type households = orca.get_table("households") buildings = orca.get_table("buildings") parcels_geography = orca.get_table("parcels_geography") h = orca.merge_tables("households", [households, buildings, parcels_geography], columns=["juris_name", "income"]) AMI = h.groupby(h.juris_name).income.quantile(.5) # per Aksel Olsen (@akselx) # take 90% of AMI and multiple by 33% to get the max amount a # household can pay per year, divide by 12 to get monthly amt, # subtract condo fee monthly_condo_fee = 250 monthly_affordable_payment = AMI * .9 * .33 / 12 - monthly_condo_fee def value_can_afford(monthly_payment): # this is a 10 year average freddie mac interest rate ten_year_average_interest = .055 return np.npv(ten_year_average_interest/12, [monthly_payment]*30*12) value_can_afford = {k: value_can_afford(v) for k, v in monthly_affordable_payment.to_dict().items()} value_can_afford = pd.Series(value_can_afford) # account for interest and property taxes interest_and_prop_taxes = .013 value_can_afford /= 1+interest_and_prop_taxes # there's a lot more nuance to inclusionary percentages than this - # e.g. specific neighborhoods get specific amounts - # http://sf-moh.org/modules/showdocument.aspx?documentid=7253 pct_inclusionary = orca.get_injectable("inclusionary_housing_settings") juris_name = parcels_geography.juris_name.loc[feasibility.index] pct_affordable = juris_name.map(pct_inclusionary).fillna(0) value_can_afford = juris_name.map(value_can_afford) num_affordable_units = (units * pct_affordable).fillna(0).astype("int") ave_price_per_unit = \ feasibility[('residential', 'building_revenue')] / units revenue_diff_per_unit = (ave_price_per_unit - value_can_afford).fillna(0) print "Revenue difference per unit (not zero values)" print revenue_diff_per_unit[revenue_diff_per_unit > 0].describe() revenue_reduction = revenue_diff_per_unit * num_affordable_units s = num_affordable_units.groupby(parcels_geography.juris_name).sum() print "Feasibile affordable units by jurisdiction" print s[s > 0].order() return revenue_reduction, num_affordable_units
def res_units_per_bldg(): df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id','residential_units']) df = df.groupby('zone_id').residential_units.mean() return df
def ln_non_residential_sqft_zone(): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['btype_hlcm','residential_units','zone_id']) return b[b.btype_hlcm==3].groupby('zone_id').residential_units.sum()*100.0/(b.groupby('zone_id').residential_units.sum())
def mandatory_tours_merged(mandatory_tours, persons_merged): return orca.merge_tables(mandatory_tours.name, [mandatory_tours, persons_merged])
def average_resunit_size(): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['sqft_per_unit','zone_id']) return b.groupby('zone_id').sqft_per_unit.mean()
def median_yearbuilt_pre_1950(): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['year_built','zone_id']) return (b.groupby('zone_id').year_built.median()<1950).astype('int32')
def avg_unit_price_zone(): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['non_residential_sqft','improvement_value','unit_price_non_residential','zone_id']) return b[(b.non_residential_sqft>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_non_residential.mean()
def geographic_summary(parcels, households, jobs, buildings, taz_geography, run_number, year, summary, final_year): # using the following conditional b/c `year` is used to pull a column # from a csv based on a string of the year in add_population() # and in add_employment() and 2009 is the # 'base'/pre-simulation year, as is the 2010 value in the csv. if year == 2009: year = 2010 base = True else: base = False households_df = orca.merge_tables('households', [parcels, buildings, households], columns=[ 'pda', 'zone_id', 'juris', 'superdistrict', 'persons', 'income', 'base_income_quartile' ]) jobs_df = orca.merge_tables( 'jobs', [parcels, buildings, jobs], columns=['pda', 'superdistrict', 'juris', 'zone_id', 'empsix']) buildings_df = orca.merge_tables('buildings', [parcels, buildings], columns=[ 'pda', 'superdistrict', 'juris', 'building_type_id', 'zone_id', 'residential_units', 'building_sqft', 'non_residential_sqft' ]) parcel_output = summary.parcel_output # because merge_tables returns multiple zone_id_'s, but not the one we need buildings_df = buildings_df.rename(columns={'zone_id_x': 'zone_id'}) geographies = ['superdistrict', 'pda', 'juris'] if year in [2010, 2015, 2020, 2025, 2030, 2035, 2040]: for geography in geographies: # create table with household/population summaries summary_table = pd.pivot_table(households_df, values=['persons'], index=[geography], aggfunc=[np.size]) summary_table.columns = ['tothh'] # income quartile counts summary_table['hhincq1'] = \ households_df.query("base_income_quartile == 1").\ groupby(geography).size() summary_table['hhincq2'] = \ households_df.query("base_income_quartile == 2").\ groupby(geography).size() summary_table['hhincq3'] = \ households_df.query("base_income_quartile == 3").\ groupby(geography).size() summary_table['hhincq4'] = \ households_df.query("base_income_quartile == 4").\ groupby(geography).size() # residential buildings by type summary_table['sfdu'] = buildings_df.\ query("building_type_id == 1 or building_type_id == 2").\ groupby(geography).residential_units.sum() summary_table['mfdu'] = buildings_df.\ query("building_type_id == 3 or building_type_id == 12").\ groupby(geography).residential_units.sum() # employees by sector summary_table['totemp'] = jobs_df.\ groupby(geography).size() summary_table['agrempn'] = jobs_df.query("empsix == 'AGREMPN'").\ groupby(geography).size() summary_table['mwtempn'] = jobs_df.query("empsix == 'MWTEMPN'").\ groupby(geography).size() summary_table['retempn'] = jobs_df.query("empsix == 'RETEMPN'").\ groupby(geography).size() summary_table['fpsempn'] = jobs_df.query("empsix == 'FPSEMPN'").\ groupby(geography).size() summary_table['herempn'] = jobs_df.query("empsix == 'HEREMPN'").\ groupby(geography).size() summary_table['othempn'] = jobs_df.query("empsix == 'OTHEMPN'").\ groupby(geography).size() # summary columns summary_table['occupancy_rate'] = summary_table['tothh'] / \ (summary_table['sfdu'] + summary_table['mfdu']) summary_table['non_residential_sqft'] = buildings_df.\ groupby(geography)['non_residential_sqft'].sum() summary_table['sq_ft_per_employee'] = \ summary_table['non_residential_sqft'] / summary_table['totemp'] if parcel_output is not None: parcel_output['subsidized_units'] = \ parcel_output.deed_restricted_units - \ parcel_output.inclusionary_units # columns re: affordable housing summary_table['deed_restricted_units'] = \ parcel_output.groupby(geography).\ deed_restricted_units.sum() summary_table['inclusionary_units'] = \ parcel_output.groupby(geography).inclusionary_units.sum() summary_table['subsidized_units'] = \ parcel_output.groupby(geography).subsidized_units.sum() summary_table['inclusionary_revenue_reduction'] = \ parcel_output.groupby(geography).\ policy_based_revenue_reduction.sum() summary_table['inclusionary_revenue_reduction_per_unit'] = \ summary_table.inclusionary_revenue_reduction / \ summary_table.inclusionary_units summary_table['total_subsidy'] = \ parcel_output[parcel_output.subsidized_units > 0].\ groupby(geography).max_profit.sum() * -1 summary_table['subsidy_per_unit'] = \ summary_table.total_subsidy / \ summary_table.subsidized_units # fill in 0 values where there are NA's so that summary table # outputs are the same over the years otherwise a PDA or summary # geography would be dropped if it had no employment or housing if geography == 'superdistrict': all_summary_geographies = buildings_df[geography].unique() else: all_summary_geographies = parcels[geography].unique() summary_table = \ summary_table.reindex(all_summary_geographies).fillna(0) if base is False: summary_csv = "runs/run{}_{}_summaries_{}.csv".\ format(run_number, geography, year) elif base is True: summary_csv = "runs/run{}_{}_summaries_{}.csv".\ format(run_number, geography, 2009) summary_table.to_csv(summary_csv) # ############################## # ############################## # ##Write Summary of Accounts### # ############################## # ############################## if year == final_year: for acct_name, acct in orca.get_injectable("coffer").iteritems(): fname = "runs/run{}_acctlog_{}_{}.csv".\ format(run_number, acct_name, year) acct.to_frame().to_csv(fname) # ############################## # ############################## # ####Write Urban Footprint##### # #########Summary############## # ############################## # ############################## buildings_uf_df = orca.merge_tables('buildings', [parcels, buildings], columns=[ 'urban_footprint', 'year_built', 'acres', 'residential_units', 'non_residential_sqft' ]) buildings_uf_df['count'] = 1 s1 = buildings_uf_df['residential_units'] / buildings_uf_df['acres'] s2 = s1 > 1 s3 = (buildings_uf_df['urban_footprint'] == 0) * 1 buildings_uf_df['denser_greenfield'] = s3 * s2 df = buildings_uf_df.\ loc[buildings_uf_df['year_built'] > 2010].\ groupby('urban_footprint').sum() df = df[[ 'count', 'residential_units', 'non_residential_sqft', 'acres' ]] df2 = buildings_uf_df.\ loc[buildings_uf_df['year_built'] > 2010].\ groupby('denser_greenfield').sum() df2 = df2[[ 'count', 'residential_units', 'non_residential_sqft', 'acres' ]] formatters = { 'count': '{:.0f}', 'residential_units': '{:.0f}', 'non_residential_sqft': '{:.0f}', 'acres': '{:.0f}' } df = format_df(df, formatters) df2 = format_df(df2, formatters) df = df.transpose() df2 = df2.transpose() df[2] = df2[1] df.columns = [ 'urban_footprint_0', 'urban_footprint_1', 'denser_greenfield' ] uf_summary_csv = "runs/run{}_urban_footprint_summary_{}.csv".\ format(run_number, year) df.to_csv(uf_summary_csv)
def tours_merged(tours, persons_merged): return orca.merge_tables(tours.name, tables=[tours, persons_merged])
def jobs_within_15min(zones, t_data_dist15): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id']) t_data=t_data_dist15.to_frame() t_data.loc[:,'attr']=zones.zonal_emp[t_data_dist15.to_zone_id].values zone_time_range=t_data.groupby(level=0).attr.apply(np.sum) return reindex(zone_time_range, b.zone_id)
def ln_non_residential_sqft_zone(): df = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['non_residential_sqft','zone_id']) return df.groupby('zone_id').non_residential_sqft.sum().apply(np.log1p)
def inclusionary_housing_revenue_reduction(feasibility, units): print "Computing adjustments due to inclusionary housing" # AMI by jurisdiction # # in practice deed restrictions are done by household size but we aren't # going to deed restrict them by household size so it makes sense not to # do that here - if we did this by household size like we do in the real # world we'd need to have a better representation of what household size # is in which unit type households = orca.get_table("households") buildings = orca.get_table("buildings") parcels_geography = orca.get_table("parcels_geography") h = orca.merge_tables("households", [households, buildings, parcels_geography], columns=["juris_name", "income"]) AMI = h.groupby(h.juris_name).income.quantile(.5) # per Aksel Olsen (@akselx) # take 90% of AMI and multiple by 33% to get the max amount a # household can pay per year, divide by 12 to get monthly amt, # subtract condo fee monthly_condo_fee = 250 monthly_affordable_payment = AMI * .9 * .33 / 12 - monthly_condo_fee def value_can_afford(monthly_payment): # this is a 10 year average freddie mac interest rate ten_year_average_interest = .055 return np.npv(ten_year_average_interest / 12, [monthly_payment] * 30 * 12) value_can_afford = { k: value_can_afford(v) for k, v in monthly_affordable_payment.to_dict().items() } value_can_afford = pd.Series(value_can_afford) # account for interest and property taxes interest_and_prop_taxes = .013 value_can_afford /= 1 + interest_and_prop_taxes # there's a lot more nuance to inclusionary percentages than this - # e.g. specific neighborhoods get specific amounts - # http://sf-moh.org/modules/showdocument.aspx?documentid=7253 pct_inclusionary = orca.get_injectable("inclusionary_housing_settings") juris_name = parcels_geography.juris_name.loc[feasibility.index] pct_affordable = juris_name.map(pct_inclusionary).fillna(0) value_can_afford = juris_name.map(value_can_afford) num_affordable_units = (units * pct_affordable).fillna(0).astype("int") ave_price_per_unit = \ feasibility[('residential', 'building_revenue')] / units revenue_diff_per_unit = (ave_price_per_unit - value_can_afford).fillna(0) print "Revenue difference per unit (not zero values)" print revenue_diff_per_unit[revenue_diff_per_unit > 0].describe() revenue_reduction = revenue_diff_per_unit * num_affordable_units s = num_affordable_units.groupby(parcels_geography.juris_name).sum() print "Feasibile affordable units by jurisdiction" print s[s > 0].sort_values() return revenue_reduction, num_affordable_units
def topsheet(households, jobs, buildings, parcels, zones, year, run_number, taz_geography, parcels_zoning_calculations, summary, settings, parcels_geography): hh_by_subregion = misc.reindex(taz_geography.subregion, households.zone_id).value_counts() households_df = orca.merge_tables( 'households', [parcels_geography, buildings, households], columns=['pda_id', 'tpp_id', 'income']) hh_by_inpda = households_df.pda_id.notnull().value_counts() hhincome_by_intpp = households_df.income.groupby( households_df.tpp_id.notnull()).mean() # round to nearest 100s hhincome_by_intpp = (hhincome_by_intpp / 100).round() * 100 jobs_by_subregion = misc.reindex(taz_geography.subregion, jobs.zone_id).value_counts() jobs_df = orca.merge_tables('jobs', [parcels, buildings, jobs], columns=['pda']) jobs_by_inpda = jobs_df.pda.notnull().value_counts() capacity = parcels_zoning_calculations.\ zoned_du_underbuild_nodev.groupby(parcels.subregion).sum() if year == 2010: # save some info for computing growth measures orca.add_injectable( "base_year_measures", { "hh_by_subregion": hh_by_subregion, "jobs_by_subregion": jobs_by_subregion, "hh_by_inpda": hh_by_inpda, "jobs_by_inpda": jobs_by_inpda, "hhincome_by_intpp": hhincome_by_intpp, "capacity": capacity }) # if year != 2040: # return base_year_measures = orca.get_injectable("base_year_measures") f = open( os.path.join("runs", "run%d_topsheet_%d.log" % (run_number, year)), "w") def write(s): # print s f.write(s + "\n\n") def norm_and_round(s): # normalize and round a series return str((s / s.sum()).round(2)) nhh = len(households) write("Number of households = %d" % nhh) nj = len(jobs) write("Number of jobs = %d" % nj) n = len(households.building_id[households.building_id == -1]) write("Number of unplaced households = %d" % n) n = len(jobs.building_id[jobs.building_id == -1]) write("Number of unplaced jobs = %d" % n) du = buildings.residential_units.sum() write("Number of residential units = %d" % du) write("Residential vacancy rate = %.2f" % (1 - 0 - float(nhh) / du)) du = buildings.deed_restricted_units.sum() write("Number of deed restricted units = %d" % du) write("Base year mean income by whether household is in tpp:\n%s" % base_year_measures["hhincome_by_intpp"]) write("Horizon year mean income by whether household is in tpp:\n%s" % hhincome_by_intpp) jsp = buildings.job_spaces.sum() write("Number of job spaces = %d" % jsp) write("Non-residential vacancy rate = %.2f" % (1 - 0 - float(nj) / jsp)) tmp = base_year_measures["hh_by_subregion"] write("Households base year share by subregion:\n%s" % norm_and_round(tmp)) write("Households share by subregion:\n%s" % norm_and_round(hh_by_subregion)) diff = hh_by_subregion - base_year_measures["hh_by_subregion"] write("Households pct of regional growth by subregion:\n%s" % norm_and_round(diff)) tmp = base_year_measures["jobs_by_subregion"] write("Jobs base year share by subregion:\n%s" % norm_and_round(tmp)) write("Jobs share by subregion:\n%s" % norm_and_round(jobs_by_subregion)) diff = jobs_by_subregion - base_year_measures["jobs_by_subregion"] write("Jobs pct of regional growth by subregion:\n%s" % norm_and_round(diff)) tmp = base_year_measures["hh_by_inpda"] write("Households base year share in pdas:\n%s" % norm_and_round(tmp)) write("Households share in pdas:\n%s" % norm_and_round(hh_by_inpda)) diff = hh_by_inpda - base_year_measures["hh_by_inpda"] write("Households pct of regional growth in pdas:\n%s" % norm_and_round(diff)) tmp = base_year_measures["jobs_by_inpda"] write("Jobs base year share in pdas:\n%s" % norm_and_round(tmp)) write("Jobs share in pdas:\n%s" % norm_and_round(jobs_by_inpda)) diff = jobs_by_inpda - base_year_measures["jobs_by_inpda"] write("Jobs pct of regional growth in pdas:\n%s" % norm_and_round(diff)) write("Base year dwelling unit raw capacity:\n%s" % base_year_measures["capacity"]) write("Dwelling unit raw capacity:\n%s" % capacity) if summary.parcel_output is not None: df = summary.parcel_output # we mark greenfield as a parcel with less than 500 current sqft greenfield = df.total_sqft < 500 write( "Current share of projects which are greenfield development:\n%s" % norm_and_round(greenfield.value_counts())) write("Current share of units which are greenfield development:\n%s" % norm_and_round(df.residential_units.groupby(greenfield).sum())) cmap = settings["county_id_tm_map"] jobs_by_county = jobs.zone_id.map(taz_geography.county)\ .map(cmap).value_counts() households_by_county = households.zone_id.map(taz_geography.county)\ .map(cmap).value_counts() jobs_by_housing = jobs_by_county / households_by_county.replace(0, 1) write("Jobs/housing balance:\n" + str(jobs_by_housing)) f.close()
def non_mandatory_tours_merged(non_mandatory_tours, persons_merged): tours = non_mandatory_tours return orca.merge_tables(tours.name, tables=[tours, persons_merged])
def property_taxes(buildings, parcels_geography, acct_settings, coffer, year): buildings = orca.merge_tables('buildings', [buildings, parcels_geography]) tax_buildings(buildings, acct_settings, coffer["prop_tax_acct"], year)
def persons_merged(persons, households, land_use, accessibility): return orca.merge_tables(persons.name, tables=[ persons, households, land_use, accessibility])
def households_merged(households, land_use, accessibility): return orca.merge_tables(households.name, tables=[ households, land_use, accessibility])
def persons_merged(persons, households, land_use, accessibility): return orca.merge_tables( persons.name, tables=[persons, households, land_use, accessibility])
def btype(households): df = orca.merge_tables('households', tables=['households','buildings'], columns=['building_type_id']) return 1*(df.building_type_id==2) + 2*(df.building_type_id==3) + 3*(df.building_type_id==20) + 4*np.invert(np.in1d(df.building_type_id,[2,3,20]))