def large_area_id(jobs, buildings): job_la = "jobs_large_area_lookup" if (not orca.is_injectable(job_la)) or (len(orca.get_injectable(job_la)) == 0): orca.add_injectable(job_la, misc.reindex(buildings.large_area_id, jobs.building_id), autocall=False, cache=True) return orca.get_injectable(job_la).loc[jobs.index]
def add_extra_columns(df): for col in ["residential_price", "non_residential_price"]: df[col] = 0 if "deed_restricted_units" not in df.columns: df["deed_restricted_units"] = 0 else: print "Number of deed restricted units built = %d" %\ df.deed_restricted_units.sum() df["redfin_sale_year"] = 2012 if "residential_units" not in df: df["residential_units"] = 0 if "parcel_size" not in df: df["parcel_size"] = \ orca.get_table("parcels").parcel_size.loc[df.parcel_id] if "year" in orca.orca._INJECTABLES and "year_built" not in df: df["year_built"] = orca.get_injectable("year") if "form_to_btype_func" in orca.orca._INJECTABLES and \ "building_type_id" not in df: form_to_btype_func = orca.get_injectable("form_to_btype_func") df["building_type_id"] = df.apply(form_to_btype_func, axis=1) return df
def get_simulation_models(SCENARIO): models = [ "neighborhood_vars", # local accessibility vars "regional_vars", # regional accessibility vars "rsh_simulate", # residential sales hedonic "nrh_simulate", # non-residential rent hedonic "households_relocation", "households_transition", "jobs_relocation", "jobs_transition", "price_vars", "scheduled_development_events", # scheduled buildings additions "lump_sum_accounts", # run the subsidized acct system "subsidized_residential_developer_lump_sum_accts", "alt_feasibility", "residential_developer", "developer_reprocess", "retail_developer", "office_developer", "hlcm_simulate", # put these last so they don't get "elcm_simulate", # displaced by new dev "topsheet", "parcel_summary", "building_summary", "diagnostic_output", "geographic_summary", "travel_model_output" ] # calculate VMT taxes if SCENARIO in ["1", "3", "4"]: # calculate the vmt fees at the end of the year # note that you might also have to change the fees that get # imposed - look for fees_per_unit column in variables.py if SCENARIO == "3": orca.get_injectable("settings")["vmt_fee_res"] = True if SCENARIO == "1": orca.get_injectable("settings")["vmt_fee_com"] = True if SCENARIO == "4": orca.get_injectable("settings")["vmt_fee_com"] = True models.insert(models.index("diagnostic_output"), "calculate_vmt_fees") models.insert(models.index("alt_feasibility"), "subsidized_residential_feasibility") models.insert(models.index("alt_feasibility"), "subsidized_residential_developer_vmt") return models
def subsidized_residential_feasibility(parcels, settings, add_extra_columns_func, parcel_sales_price_sqft_func, parcel_is_allowed_func, parcels_geography): kwargs = settings['feasibility'].copy() kwargs["only_built"] = False kwargs["forms_to_test"] = ["residential"] # step 1 utils.run_feasibility(parcels, parcel_sales_price_sqft_func, parcel_is_allowed_func, **kwargs) feasibility = orca.get_table("feasibility").to_frame() # get rid of the multiindex that comes back from feasibility feasibility = feasibility.stack(level=0).reset_index(level=1, drop=True) # join to parcels_geography for filtering feasibility = feasibility.join(parcels_geography.to_frame()) # add the multiindex back feasibility.columns = pd.MultiIndex.from_tuples([ ("residential", col) for col in feasibility.columns ]) feasibility = policy_modifications_of_profit(feasibility, parcels) orca.add_table("feasibility", feasibility) df = orca.get_table("feasibility").to_frame() df = df.stack(level=0).reset_index(level=1, drop=True) df.to_csv("runs/run{}_feasibility_{}.csv".format( orca.get_injectable("run_number"), orca.get_injectable("year")))
def subsidized_residential_feasibility( parcels, settings, add_extra_columns_func, parcel_sales_price_sqft_func, parcel_is_allowed_func, parcels_geography): kwargs = settings['feasibility'].copy() kwargs["only_built"] = False kwargs["forms_to_test"] = ["residential"] # step 1 utils.run_feasibility(parcels, parcel_sales_price_sqft_func, parcel_is_allowed_func, **kwargs) feasibility = orca.get_table("feasibility").to_frame() # get rid of the multiindex that comes back from feasibility feasibility = feasibility.stack(level=0).reset_index(level=1, drop=True) # join to parcels_geography for filtering feasibility = feasibility.join(parcels_geography.to_frame()) # add the multiindex back feasibility.columns = pd.MultiIndex.from_tuples( [("residential", col) for col in feasibility.columns]) feasibility = policy_modifications_of_profit(feasibility, parcels) orca.add_table("feasibility", feasibility) df = orca.get_table("feasibility").to_frame() df = df.stack(level=0).reset_index(level=1, drop=True) df.to_csv("runs/run{}_feasibility_{}.csv".format( orca.get_injectable("run_number"), orca.get_injectable("year")))
def parcel_is_allowed(form): settings = orca.get_injectable("settings") mapping = orca.get_injectable("mapping") form_to_btype = mapping["form_to_btype"] # we have zoning by building type but want # to know if specific forms are allowed zoning_baseline = orca.get_table("zoning_baseline") zoning_scenario = orca.get_table("zoning_scenario") parcels = orca.get_table("parcels") allowed = pd.Series(0, index=parcels.index) # first, it's allowed if any building type that matches # the form is allowed for typ in form_to_btype[form]: allowed |= zoning_baseline[typ] # then we override it with any values that are specified in the scenarios # i.e. they come from the add_bldg and drop_bldg columns for typ in form_to_btype[form]: allowed = zoning_scenario[typ].combine_first(allowed) # notice there is some dependence on ordering here. basically values take # precedent that occur LAST in the form_to_btype mapping # this is a fun modification - when we get too much retail in jurisdictions # we can just eliminate all retail if "eliminate_retail_zoning_from_juris" in settings and form == "retail": allowed *= ~orca.get_table("parcels").juris.isin( settings["eliminate_retail_zoning_from_juris"]) return allowed.astype("bool")
def get_dynamic_filepath(self): """ Substitute run id, model iteration, and/or timestamp into the filename. For the run id and model iteration, we look for Orca injectables named ``run_id`` and ``iter_var``, respectively. If none is found, we use ``0``. The timestamp is UTC, formatted as ``YYYYMMDD-HHMMSS``. Returns ------- str """ if self.path is None: raise ValueError("Please provide a file path") run = 0 if orca.is_injectable('run_id'): run = orca.get_injectable('run_id') iter = 0 if orca.is_injectable('iter_var'): iter = orca.get_injectable('iter_var') ts = datetime.datetime.utcnow().strftime('%Y%m%d-%H%M%S') s = self.path s = s.replace('%RUN%', str(run)) s = s.replace('%ITER%', str(iter)) s = s.replace('%TS%', ts) return s
def parcel_is_allowed(form): settings = orca.get_injectable('settings') form_to_btype = settings["form_to_btype"] # we have zoning by building type but want # to know if specific forms are allowed allowed = [orca.get_table('zoning_baseline') ['type%d' % typ] > 0 for typ in form_to_btype[form]] # also check if the scenario based zoning adds the building type allowed2 = [orca.get_table('zoning_scenario') ['add-type%d' % typ] > 0 for typ in form_to_btype[form]] allowed = allowed + allowed2 allowed = pd.concat(allowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False) # also check if the scenario based zoning drops the building type # NOTE THAT DROPPING OVERRIDES ADDING! disallowed = [orca.get_table('zoning_scenario') ['drop-type%d' % typ] > 0 for typ in form_to_btype[form]] disallowed = pd.concat(disallowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False) allowed = allowed.astype('bool') & ~disallowed settings = orca.get_injectable("settings") if "eliminate_retail_zoning_from_juris" in settings and form == "retail": allowed *= ~orca.get_table("parcels").juris.isin( settings["eliminate_retail_zoning_from_juris"]) return allowed.astype("bool")
def add_extra_columns_func(df): for col in ["residential_price", "non_residential_price"]: df[col] = 0 if "deed_restricted_units" not in df.columns: df["deed_restricted_units"] = 0 else: print "Number of deed restricted units built = %d" %\ df.deed_restricted_units.sum() df["redfin_sale_year"] = 2012 df["redfin_sale_price"] = np.nan if "residential_units" not in df: df["residential_units"] = 0 if "parcel_size" not in df: df["parcel_size"] = \ orca.get_table("parcels").parcel_size.loc[df.parcel_id] if "year" in orca.orca._INJECTABLES and "year_built" not in df: df["year_built"] = orca.get_injectable("year") if "form_to_btype_func" in orca.orca._INJECTABLES and \ "building_type" not in df: form_to_btype_func = orca.get_injectable("form_to_btype_func") df["building_type"] = df.apply(form_to_btype_func, axis=1) return df
def get_simulation_models(SCENARIO): models = [ "neighborhood_vars", # local accessibility vars "regional_vars", # regional accessibility vars "rsh_simulate", # residential sales hedonic "nrh_simulate", # non-residential rent hedonic "households_relocation", "households_transition", "jobs_relocation", "jobs_transition", "price_vars", "scheduled_development_events", # scheduled buildings additions "lump_sum_accounts", # run the subsidized acct system "subsidized_residential_developer_lump_sum_accts", "alt_feasibility", "residential_developer", "developer_reprocess", "office_developer", "retail_developer", "additional_units", "hlcm_simulate", # put these last so they don't get "proportional_elcm", # start with a proportional jobs model "elcm_simulate", # displaced by new dev "topsheet", "parcel_summary", "building_summary", "diagnostic_output", "geographic_summary", "travel_model_output" ] # calculate VMT taxes if SCENARIO in ["1", "3", "4"]: # calculate the vmt fees at the end of the year # note that you might also have to change the fees that get # imposed - look for fees_per_unit column in variables.py if SCENARIO == "3": orca.get_injectable("settings")["vmt_res_for_res"] = True if SCENARIO == "1": orca.get_injectable("settings")["vmt_com_for_res"] = True if SCENARIO == "4": orca.get_injectable("settings")["vmt_com_for_res"] = True orca.get_injectable("settings")["vmt_com_for_com"] = False models.insert(models.index("office_developer"), "subsidized_office_developer") models.insert(models.index("diagnostic_output"), "calculate_vmt_fees") models.insert(models.index("alt_feasibility"), "subsidized_residential_feasibility") models.insert(models.index("alt_feasibility"), "subsidized_residential_developer_vmt") return models
def year(): try: if orca.get_injectable("iter_var") is not None: return orca.get_injectable("iter_var") except: pass # if we're not running simulation, return base year return 2010
def preload_injectables(): t0 = print_elapsed_time() # load skim_stack if orca.is_injectable('preload_injectables'): orca.get_injectable('preload_injectables') t0 = print_elapsed_time("preload_injectables", t0)
def interaction_trace_rows(interaction_df, choosers): """ Trace model design for interaction_simulate Parameters ---------- model_design: pandas.DataFrame traced model_design dataframe choosers: pandas.DataFrame interaction_simulate choosers (needed to filter the model_design dataframe by traced hh or person id) Returns ------- trace_rows : numpy.ndarray array of booleans to select values in eval_interaction_utilities df to trace trace_ids : tuple (str, numpy.ndarray) column name and array of trace_ids for use by """ # slicer column name and id targets to use for chooser id added to model_design dataframe # currently we only ever slice by person_id, but that could change, so we check here... if choosers.index.name == 'PERID' \ or choosers.index.name == orca.get_injectable('persons_index_name'): slicer_column_name = choosers.index.name targets = orca.get_injectable('trace_person_ids') elif (choosers.index.name == 'tour_id' and 'person_id' in choosers.columns): slicer_column_name = 'person_id' targets = orca.get_injectable('trace_person_ids') else: raise RuntimeError("trace_interaction_model_design don't know how to slice index '%s'" % choosers.index.name) # we can deduce the sample_size from the relative size of model_design and choosers # (model design rows are repeated once for each alternative) sample_size = len(interaction_df.index) / len(choosers.index) if slicer_column_name == choosers.index.name: trace_rows = np.in1d(choosers.index, targets) trace_ids = np.asanyarray(choosers[trace_rows].index) else: trace_rows = np.in1d(choosers['person_id'], targets) trace_ids = np.asanyarray(choosers[trace_rows].person_id) trace_rows = np.repeat(trace_rows, sample_size) trace_ids = np.repeat(trace_ids, sample_size) assert type(trace_rows) == np.ndarray assert type(trace_ids) == np.ndarray trace_ids = (slicer_column_name, trace_ids) return trace_rows, trace_ids
def large_area_id(households, buildings): hh_la = "households_large_area_lookup" if (not orca.is_injectable(hh_la)) or (len(orca.get_injectable(hh_la)) == 0): orca.add_injectable(hh_la, misc.reindex(buildings.large_area_id, households.building_id), autocall=False, cache=True) return orca.get_injectable(hh_la).loc[households.index]
def generate_indicators(year, forecast_year, parcels, zones): # If iter_var is not defined is a presimulation generation if orca.get_injectable('iter_var'): year = orca.get_injectable('iter_var') else: year = orca.get_injectable('base_year') # General output indicators cfg = orca.get_injectable('output_parameters')['output_variables'] zone_ind = zones.to_frame(cfg['zones']) zone_ind = zone_ind.reindex(sorted(zone_ind.columns), axis=1) parcel_ind = parcels.to_frame(cfg['parcels']) parcel_ind = parcel_ind.reindex(sorted(parcel_ind.columns), axis=1) zone_ind.to_csv('./runs/zone_indicators_%s.csv' % year) parcel_ind.to_csv('./runs/parcel_indicators_%s.csv' % year) # Output indicators by building type btype_columns = [ 'building_type_id', 'is_residential', 'is_non_residential' ] btypes = orca.get_table('building_types').to_frame(btype_columns) btypes = btypes.reset_index() btypes.loc[btypes['is_residential']==True, 'ind_res'] = \ "sum_residential_units_" + btypes.building_type_id.astype(str) btypes.loc[btypes['is_non_residential'] == True, 'ind_non_res'] = \ "sum_non_residential_sqft_" + btypes.building_type_id.astype(str) btype_ind_cols = list(btypes.ind_res) + list(btypes.ind_non_res) btype_ind_cols = [ind for ind in btype_ind_cols if str(ind) != 'nan'] zone_type = zones.to_frame(btype_ind_cols) parcel_type = parcels.to_frame(btype_ind_cols) zone_type = zone_type.reindex(sorted(zone_type.columns), axis=1) parcel_type = parcel_type.reindex(sorted(parcel_type.columns), axis=1) zone_type.to_csv('./runs/zone_indicators_building_type_%s.csv' % year) parcel_type.to_csv('./runs/parcel_indicators_building_type_%s.csv' % year) # Generate chart indicators if year == forecast_year: vdict, cdict = export_indicator_definitions() data, variables, geo_small, geo_large, custom_v = prepare_chart_data() for table in custom_v: for var in custom_v[table]: gen_custom_barchart(table, var) used_variables = [] for aggtype in ['sum', 'mean']: for var in variables[aggtype]: print('Generating charts for ' + var) gen_var_barcharts_by_geo(data, var, aggtype, geo_large) gen_var_histograms(data, var, aggtype, geo_small, vdict, cdict) gen_barcharts_n_largest(data, var, aggtype, geo_small, 10) used_variables = used_variables + [var] for aggtype2 in ['sum', 'mean']: for var2 in variables[aggtype2]: if (var != var2) & (var2 not in used_variables): gen_var_scatters(data, var, var2, aggtype, aggtype2, 'zone_id', geo_large)
def close(): """ Close any known open files """ close_open_files() orca.get_injectable('pipeline_store').close() orca.add_injectable('pipeline_store', None) logger.info("close_pipeline")
def skims(): skims = skim.Skims() # FIXME - this is reusing the same skim as all the different kinds of skims for typ in ["SOV_TIME", "SOVTOLL_TIME", "HOV2_TIME", "SOV_DIST", "SOVTOLL_DIST", "HOV2_DIST", "SOV_BTOLL", "SOVTOLL_BTOLL", "HOV2_BTOLL", "SOVTOLL_VTOLL"]: for period in ["AM", "MD", "PM"]: skims[(typ, period)] = orca.get_injectable("distance_skim") skims['DISTANCE'] = orca.get_injectable("distance_skim") return skims
def get_config_file(type): configs = orca.get_injectable('inputs')['model_configs'][type.split('_') [0]] sc = orca.get_injectable('scenario') sc_cfg = 's{}_{}_config'.format(sc, type) gen_cfg = '{}_config'.format(type) if sc_cfg in configs: return configs[sc_cfg] elif gen_cfg in configs: return configs[gen_cfg] else: return '{}.yaml'.format(type)
def get_control_file(type): controls = orca.get_injectable('inputs')['control_tables'][type] sc = orca.get_injectable('scenario') sc_file = 's{}_{}_controls_input_file'.format(sc, type) gen_file = '{}_controls_input_file'.format(type) if sc_file in controls: fname = controls[sc_file] elif gen_file in controls: fname = controls[gen_file] else: fname = '{}_controls.csv'.format(type) return fname
def config_logger(custom_config_file=None, basic=False): """ Configure logger if log_config_file is not supplied then look for conf file in configs_dir if not found use basicConfig Parameters ---------- custom_config_file: str custom config filename basic: boolean basic setup Returns ------- Nothing """ log_config_file = None if custom_config_file and os.path.isfile(custom_config_file): log_config_file = custom_config_file elif not basic: # look for conf file in configs_dir configs_dir = orca.get_injectable('configs_dir') default_config_file = os.path.join(configs_dir, LOGGING_CONF_FILE_NAME) if os.path.isfile(default_config_file): log_config_file = default_config_file if log_config_file: with open(log_config_file) as f: config_dict = yaml.load(f) config_dict = config_dict['logging'] config_dict.setdefault('version', 1) logging.config.dictConfig(config_dict) else: logging.basicConfig(level=logging.INFO, stream=sys.stdout) logger = logging.getLogger(ASIM_LOGGER) if custom_config_file and not os.path.isfile(custom_config_file): logger.error("#\n#\n#\nconfig_logger could not find conf file '%s'" % custom_config_file) if log_config_file: logger.info("Read logging configuration from: %s" % log_config_file) else: print "Configured logging using basicConfig" logger.info("Configured logging using basicConfig") output_dir = orca.get_injectable('output_dir') logger.info("Deleting files in output_dir %s" % output_dir) delete_csv_files(output_dir)
def get_config_file(type): configs = orca.get_injectable('settings')['model_configs'][type. split('_')[0]] sc = orca.get_injectable('scenario') sc_cfg = '{}_{}_config'.format(sc, type) gen_cfg = '{}_config'.format(type) if sc_cfg in configs: return configs[sc_cfg] elif gen_cfg in configs: return configs[gen_cfg] else: return '{}.yaml'.format(type)
def get_control_file(type): controls = orca.get_injectable('settings')['control_tables'][type] sc = orca.get_injectable('scenario') sc_file = 's{}_{}_controls_input_file'.format(sc, type) gen_file = '{}_controls_input_file'.format(type) if sc_file in controls: fname = controls[sc_file] elif gen_file in controls: fname = controls[gen_file] else: fname = '{}_controls.csv'.format(type) return fname
def assert_injectable_can_be_generated(injectable_name): """ Can an _InjectableFuncWrapper be evaluated without errors? (The Orca documentation appears inconsistent, but orca.get_injectable() *does* attempt to evaluate wrapped functions, and returns the result.) Parameters ---------- injectable_name : str Returns ------- None """ assert_injectable_is_registered(injectable_name) if orca.injectable_type(injectable_name) == 'function': try: _ = orca.get_injectable(injectable_name) except: # TODO: issues #3 log backtrace msg = "Injectable '%s' is registered but cannot be evaluated" % injectable_name raise OrcaAssertionError(msg) return
def year(base_year): if 'iter_var' in orca.list_injectables(): year = orca.get_injectable('iter_var') if year is not None: return year # outside of a run, return the base/default return base_year
def cost_shifter_callback(self, form, df, costs): """ Multiplies total_development costs (already including planning costs) by cost shifter values defined in cost_shifters.yaml by zone_district_id. This is done for calibration purposes ---------- form : str The name of the form. df: DataFrame Dataframe of allowed site proposals. costs: Array Array of total_development costs, already considering planning-related costs. Returns ------- Array of total_development_costs including planning_costs and multiplied by cost shifters """ shifter_cfg = orca.get_injectable('cost_shifters')['calibration'] geography = shifter_cfg['calibration_geography_id'] shift_type = 'residential' if form == 'residential' else 'non_residential' shifters = shifter_cfg['proforma_cost_shifters'][shift_type] for geo, geo_df in df.reset_index().groupby(geography): shifter = shifters[geo] costs[:, geo_df.index] *= shifter return costs
def get_development_projects(): conn_string = orca.get_injectable('conn_string') if len(conn_string) == 0: print 'A "conn_string" injectable must be registered and populated. Skipping export-to-Urban-Canvas.' return None conn = psycopg2.connect(conn_string) cur = conn.cursor() print "Loading committed development projects table" devproj_query = "select id, placetype_id as building_type_id, duration, buildings_number, average_floors as stories, sqft as non_residential_sqft, sqft_unit as sqft_per_unit, units as residential_units, Name as name, start_date from developmentprojects where committed = 'TRUE';" devproj = sql.read_frame(devproj_query,conn) devproj['year_built'] = devproj.start_date.astype('object').astype('str') devproj.year_built = devproj.year_built.str.slice(start=0, stop=4) devproj.year_built = devproj.year_built.astype('int') print "Loading development project parcels" dp_pcl_query = "select developmentprojects_parcels.development_project, developmentprojects_parcels.parcel_id, parcel.parcel_acres from developmentprojects_parcels, parcel where developmentprojects_parcels.parcel_id = parcel.parcel_id;" dp_pcl = sql.read_frame(dp_pcl_query, conn) devproject_parcel_ids = dp_pcl.groupby('development_project').parcel_id.max().reset_index() ##In future, use the parcel_acres field on this tbl too scheduled_development_events = pd.merge(devproject_parcel_ids, devproj, left_on='development_project', right_on='id') scheduled_development_events = scheduled_development_events.rename(columns={'development_project':'scheduled_development_event_id', 'building_type_id':'development_type_id'}) scheduled_development_events = scheduled_development_events[['scheduled_development_event_id', 'year_built', 'development_type_id', 'stories', u'non_residential_sqft', 'sqft_per_unit', 'residential_units', 'parcel_id']] for col in scheduled_development_events: scheduled_development_events[col] = scheduled_development_events[col].astype('int') return scheduled_development_events
def parcel_is_allowed(form): settings = orca.get_injectable("settings") form_to_btype = settings["form_to_btype"] # we have zoning by building type but want # to know if specific forms are allowed zoning_baseline = orca.get_table("zoning_baseline") zoning_scenario = orca.get_table("zoning_scenario") parcels = orca.get_table("parcels") allowed = pd.Series(0, index=parcels.index) # first, it's allowed if any building type that matches # the form is allowed for typ in form_to_btype[form]: allowed |= zoning_baseline[typ] # then we override it with any values that are specified in the scenarios # i.e. they come from the add_bldg and drop_bldg columns for typ in form_to_btype[form]: allowed = zoning_scenario[typ].combine_first(allowed) # notice there is some dependence on ordering here. basically values take # precedent that occur LAST in the form_to_btype mapping # this is a fun modification - when we get too much retail in jurisdictions # we can just eliminate all retail if "eliminate_retail_zoning_from_juris" in settings and form == "retail": allowed *= ~orca.get_table("parcels").juris.isin( settings["eliminate_retail_zoning_from_juris"]) return allowed.astype("bool")
def year(): try: return orca.get_injectable("iter_var") except Exception as e: pass # if we're not running simulation, return base year return 2014
def open_pipeline_store(overwrite=False): """ Open the pipeline checkpoint store and add an orca injectable to access it Parameters ---------- overwrite : bool delete file before opening (unless resuming) """ if orca.is_injectable('pipeline_store'): raise RuntimeError("Pipeline store is already open!") pipeline_file_path = orca.get_injectable('pipeline_path') if overwrite: try: if os.path.isfile(pipeline_file_path): logger.debug("removing pipeline store: %s" % pipeline_file_path) os.unlink(pipeline_file_path) except Exception as e: print(e) logger.warn("Error removing %s: %s" % (e, )) store = pd.HDFStore(pipeline_file_path, mode='a') orca.add_injectable('pipeline_store', store) logger.debug("opened pipeline_store")
def year(): try: return orca.get_injectable("iter_var") except: pass # if we're not running simulation, return base year return 2014
def get_checkpoints(): """ Get pandas dataframe of info about all checkpoints stored in pipeline Returns ------- checkpoints_df : pandas.DataFrame """ store = get_pipeline_store() if store: df = store[_CHECKPOINT_TABLE_NAME] else: pipeline_file_path = orca.get_injectable('pipeline_path') df = pd.read_hdf(pipeline_file_path, _CHECKPOINT_TABLE_NAME) # non-table columns first (column order in df is random because created from a dict) table_names = [ name for name in df.columns.values if name not in _NON_TABLE_COLUMNS ] df = df[[_CHECKPOINT_NAME, _TIMESTAMP] + table_names] df.index.name = 'step_num' return df
def full_run(preload_3d_skims, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) inject_settings(configs_dir, households_sample_size=households_sample_size, preload_3d_skims=preload_3d_skims, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability) orca.add_injectable("set_random_seed", set_random_seed) orca.clear_cache() tracing.config_logger() # grab some of the tables orca.get_table("land_use").to_frame().info() orca.get_table("households").to_frame().info() orca.get_table("persons").to_frame().info() assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE assert orca.get_injectable("chunk_size") == chunk_size # run the models in the expected order orca.run(["compute_accessibility"]) orca.run(["school_location_simulate"]) orca.run(["workplace_location_simulate"]) orca.run(["auto_ownership_simulate"]) orca.run(["cdap_simulate"]) orca.run(['mandatory_tour_frequency']) orca.get_table("mandatory_tours").tour_type.value_counts() orca.run(['non_mandatory_tour_frequency']) orca.get_table("non_mandatory_tours").tour_type.value_counts() orca.run(["destination_choice"]) orca.run(["mandatory_scheduling"]) orca.run(["non_mandatory_scheduling"]) orca.run(["patch_mandatory_tour_destination"]) orca.run(["tour_mode_choice_simulate"]) orca.run(["trip_mode_choice_simulate"]) tours_merged = orca.get_table("tours_merged").to_frame() tour_count = len(tours_merged.index) orca.clear_cache() return tour_count
def parcel_is_allowed(form): form_to_btype = orca.get_injectable("form_to_btype") # we have zoning by building type but want # to know if specific forms are allowed allowed = [orca.get_table('zoning_baseline') ['type%d' % typ] == 't' for typ in form_to_btype[form]] return pd.concat(allowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False)
def injectable_repr(inj_name): """ Returns the type and repr of an injectable. JSON response has "type" and "repr" keys. """ i = orca.get_injectable(inj_name) return jsonify(type=str(type(i)), repr=repr(i))
def ciacre(parcels, taz): f = orca.get_injectable('parcel_first_building_type_is') s = f('select_non_residential') s1 = parcels.get_column('zone_id') s2 = parcels.parcel_acres * s df = pd.DataFrame(data={'zone_id': s1, 'ciacre': s2}) s3 = df.groupby('zone_id').ciacre.sum() return s3
def resacre(parcels): f = orca.get_injectable('parcel_first_building_type_is') s = f('residential') | f('mixedresidential') s1 = parcels.get_column('zone_id') s2 = parcels.parcel_acres * s df = pd.DataFrame(data={'zone_id': s1, 'residential_acres': s2}) s3 = df.groupby('zone_id').residential_acres.sum() return s3
def get_pipeline_store(): """ Return the open pipeline hdf5 checkpoint store or return False if it not been opened """ if orca.is_injectable('pipeline_store'): return orca.get_injectable('pipeline_store') else: return None
def craigslist(): df = pd.read_csv(os.path.join(misc.data_dir(), "sfbay_craigslist.csv")) net = orca.get_injectable('net') df['node_id'] = net['walk'].get_node_ids(df['lon'], df['lat']) df['tmnode_id'] = net['drive'].get_node_ids(df['lon'], df['lat']) # fill nans -- missing bedrooms are mostly studio apts df['bedrooms'] = df.bedrooms.replace(np.nan, 1) df['neighborhood'] = df.neighborhood.replace(np.nan, '') return df
def get_step_arg(arg_name, default=_NO_DEFAULT): args = orca.get_injectable('step_args') assert isinstance(args, dict) if arg_name not in args and default == _NO_DEFAULT: raise "step arg '%s' not found and no default" % arg_name return args.get(arg_name, default)
def add_extra_columns_func(df): df['source'] = 'developer_model' for col in ["residential_price", "non_residential_rent"]: df[col] = 0 if "deed_restricted_units" not in df.columns: df["deed_restricted_units"] = 0 else: print("Number of deed restricted units built = %d" % df.deed_restricted_units.sum()) df["preserved_units"] = 0.0 if "inclusionary_units" not in df.columns: df["inclusionary_units"] = 0 else: print("Number of inclusionary units built = %d" % df.inclusionary_units.sum()) if "subsidized_units" not in df.columns: df["subsidized_units"] = 0 else: print("Number of subsidized units built = %d" % df.subsidized_units.sum()) df["redfin_sale_year"] = 2012 df["redfin_sale_price"] = np.nan if "residential_units" not in df: df["residential_units"] = 0 if "parcel_size" not in df: df["parcel_size"] = \ orca.get_table("parcels").parcel_size.loc[df.parcel_id] if orca.is_injectable("year") and "year_built" not in df: df["year_built"] = orca.get_injectable("year") if orca.is_injectable("form_to_btype_func") and \ "building_type" not in df: form_to_btype_func = orca.get_injectable("form_to_btype_func") df["building_type"] = df.apply(form_to_btype_func, axis=1) return df
def parcel_is_allowed(form): form_to_btype = orca.get_injectable("form_to_btype") # we have zoning by building type but want # to know if specific forms are allowed allowed = [ orca.get_table('zoning_baseline')['type%d' % typ] == 't' for typ in form_to_btype[form] ] return pd.concat(allowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False)
def year(): default_year = 2015 try: iter_var = orca.get_injectable('iter_var') if iter_var is not None: return iter_var else: return default_year except: return default_year
def get_logsum_file(type='mandatory'): logsums = orca.get_injectable('settings')['logsums'][type] sc = orca.get_injectable('scenario') yr = orca.get_injectable('year') try: prev_type = orca.get_injectable('previous_{}_logsum_type'.format(type)) if prev_type == 'generic': return orca.get_injectable('previous_{}_logsum_file'.format(type)) elif prev_type == 'year': if 'logsum_{}'.format(yr) in logsums: ls = logsums['logsum_{}'.format(yr)] orca.add_injectable('previous_{}_logsum_file'.format(type), ls) return ls else: return orca.get_injectable('previous_{}_logsum_file' .format(type)) elif prev_type == 'scenario': if 'logsum_s{}'.format(sc) in logsums: ls = logsums['logsum_s{}'.format(sc)] orca.add_injectable('previous_{}_logsum_file' .format(type), ls) return ls else: return orca.get_injectable('previous_{}_logsum_file' .format(type)) else: if 'logsum_{}_s{}'.format(yr, sc) in logsums: ls = logsums['logsum_{}_s{}'.format(yr, sc)] orca.add_injectable('previous_{}_logsum_file' .format(type), ls) return ls else: return orca.get_injectable('previous_{}_logsum_file' .format(type)) except: if 'logsum' in logsums: ls = logsums['logsum'] ls_type = 'generic' if 'logsum_{}'.format(yr) in logsums: ls = logsums['logsum_{}'.format(yr)] ls_type = 'year' if 'logsum_s{}'.format(sc) in logsums: ls = logsums['logsum_s{}'.format(sc)] ls_type = 'scenario' if 'logsum_{}_s{}'.format(yr, sc) in logsums: ls = logsums['logsum_{}_s{}'.format(yr, sc)] ls_type = 'year_scenario' orca.add_injectable('previous_{}_logsum_type'.format(type), ls_type) orca.add_injectable('previous_{}_logsum_file'.format(type), ls) return ls
def add_extra_columns_func(df): for col in ["residential_price", "non_residential_rent"]: if col not in df.columns: df[col] = 0 else: df[col] = df[col].fillna(0) if "deed_restricted_units" not in df.columns: df["deed_restricted_units"] = 0 else: print "Number of deed restricted units built = %d" %\ df.deed_restricted_units.sum() df["redfin_sale_year"] = 2012 df["redfin_sale_price"] = np.nan if "residential_units" not in df.columns: df["residential_units"] = 0 else: df["residential_units"] = df["residential_units"].fillna(0) # we're keeping sqft per unit in the buildings table but we need # to make sure we get a comparable column out of the feasibility # table which is what generates new buildings. ave_unit_size is # the closest thing, even though its calculated at the parcel level # rather than the building level if 'sqft_per_unit' not in df.columns: df['sqft_per_unit'] = df['ave_unit_size'] if "parcel_size" not in df: df["parcel_size"] = \ orca.get_table("parcels").parcel_size.loc[df.parcel_id] if orca.is_injectable("year") and "year_built" not in df: df["year_built"] = orca.get_injectable("year") if orca.is_injectable("form_to_btype_func") and \ "building_type" not in df: form_to_btype_func = orca.get_injectable("form_to_btype_func") df["building_type"] = df.apply(form_to_btype_func, axis=1) return df
def conditional_upzone(scenario, attr_name, upzone_name): scenario_inputs = orca.get_injectable("scenario_inputs") zoning_baseline = orca.get_table( scenario_inputs["baseline"]["zoning_table_name"]) attr = zoning_baseline[attr_name] if scenario != "baseline": zoning_scenario = orca.get_table( scenario_inputs[scenario]["zoning_table_name"]) upzone = zoning_scenario[upzone_name].dropna() attr = pd.concat([attr, upzone], axis=1).max(skipna=True, axis=1) return attr
def form_to_btype_func(building): settings = orca.get_injectable('settings') form = building.form dua = building.residential_units / (building.parcel_size / 43560.0) # precise mapping of form to building type for residential if form is None or form == "residential": if dua < 16: return "HS" elif dua < 32: return "HT" return "HM" return settings["form_to_btype"][form][0]
def parcel_avg_price(use): #if use is residential translate unit price to price per sqft buildings = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['unit_price_residential','building_type_id','residential_sqft', 'zone_id', 'unit_price_non_residential']) use_btype = orca.get_injectable('use_btype') if use == 'residential': price = (buildings.unit_price_residential.loc[np.in1d(buildings.building_type_id, use_btype[use])] / buildings.residential_sqft.loc[np.in1d(buildings.building_type_id, use_btype[use])]).groupby(buildings.zone_id).mean() else: price = buildings.unit_price_non_residential.loc[np.in1d(buildings.building_type_id, use_btype[use])].groupby(buildings.zone_id).mean() return misc.reindex(price, orca.get_table('parcels').zone_id)
def supply_and_demand_multiplier_func(demand, supply): s = demand / supply settings = orca.get_injectable('settings') print "Number of submarkets where demand exceeds supply:", len(s[s > 1.0]) # print "Raw relationship of supply and demand\n", s.describe() supply_correction = settings["price_equilibration"] clip_change_high = supply_correction["kwargs"]["clip_change_high"] t = s t -= 1.0 t = t / t.max() * (clip_change_high-1) t += 1.0 s.loc[s > 1.0] = t.loc[s > 1.0] return s, (s <= 1.0).all()
def parcel_is_allowed(form): settings = orca.get_injectable('settings') form_to_btype = settings["form_to_btype"] # we have zoning by building type but want # to know if specific forms are allowed allowed = [orca.get_table('zoning_baseline') ['type%d' % typ] > 0 for typ in form_to_btype[form]] s = pd.concat(allowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False) #if form == "residential": # # allow multifam in pdas # s[orca.get_table('parcels').pda.notnull()] = 1 return s
def craigslist(store): df = store['rentals'] net = orca.get_injectable('net') df['node_id'] = net['walk'].get_node_ids( df['longitude'], df['latitude']) df['tmnode_id'] = net['drive'].get_node_ids( df['longitude'], df['latitude']) # fill nans -- missing bedrooms are mostly studio apts df['bedrooms'] = df.bedrooms.replace(np.nan, 1) df['neighborhood'] = df.neighborhood.replace(np.nan, '') # gotta do this to use the same yaml for estimation and simulation df['sqft_per_unit'] = df['sqft'] df['price_per_sqft'] = df['rent_sqft'] return df
def job_spaces(): store = orca.get_injectable('store') b = orca.get_table('buildings').to_frame(['luz_id', 'development_type_id','non_residential_sqft', 'year_built']) bsqft_job = store['building_sqft_per_job'] merged = pd.merge(b.reset_index(), bsqft_job, left_on = ['luz_id', 'development_type_id'], right_on = ['luz_id', 'development_type_id']) merged = merged.set_index('building_id') merged.sqft_per_emp[merged.sqft_per_emp < 40] = 40 merged['job_spaces'] = np.ceil(merged.non_residential_sqft / merged.sqft_per_emp) job_spaces = pd.Series(merged.job_spaces, index = b.index) b['job_spaces'] = job_spaces b.job_spaces[(b.luz_id <17)&(b.year_built<2013)] = np.ceil(b.job_spaces[(b.luz_id <17)&(b.year_built<2013)]/10.0) b.job_spaces[(b.job_spaces > 2000)&(b.year_built<2013)] = 2000 b.job_spaces[b.job_spaces.isnull()] = np.ceil(b.non_residential_sqft/200.0) b.job_spaces[b.year_built < 2013] = np.ceil(b.job_spaces[b.year_built < 2013]/3.25) return b.job_spaces
def parcel_is_allowed_func(form): config = orca.get_injectable("pf_config") bt_distr = config.forms[form] glu = config.form_glut[form] zoning = orca.get_table('parcel_zoning') btused = config.residential_uses.index[bt_distr > 0] is_res_bt = config.residential_uses[btused] unit = config.form_density_type[form] parcels = orca.get_table('parcels') result = pd.Series(0, index=parcels.index) for typ in is_res_bt.index: this_zoning = zoning.local.loc[np.logical_and(zoning.index.get_level_values("constraint_type") == unit, zoning.index.get_level_values("generic_land_use_type_id") == glu)] pcls = this_zoning.index.get_level_values("parcel_id") result[pcls] = result[pcls] + 1 return (result == is_res_bt.index.size)
def full_run(store, omx_file, preload_3d_skims, chunk_size=0): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example') orca.add_injectable("configs_dir", configs_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, preload_3d_skims=preload_3d_skims, chunk_size=chunk_size) orca.add_injectable("omx_file", omx_file) orca.add_injectable("store", store) orca.add_injectable("set_random_seed", set_random_seed) orca.clear_cache() # grab some of the tables orca.get_table("land_use").to_frame().info() orca.get_table("households").to_frame().info() orca.get_table("persons").to_frame().info() assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE assert orca.get_injectable("chunk_size") == chunk_size # run the models in the expected order orca.run(["school_location_simulate"]) orca.run(["workplace_location_simulate"]) orca.run(["auto_ownership_simulate"]) orca.run(["cdap_simulate"]) orca.run(['mandatory_tour_frequency']) orca.get_table("mandatory_tours").tour_type.value_counts() orca.run(['non_mandatory_tour_frequency']) orca.get_table("non_mandatory_tours").tour_type.value_counts() orca.run(["destination_choice"]) orca.run(["mandatory_scheduling"]) orca.run(["non_mandatory_scheduling"]) orca.run(["patch_mandatory_tour_destination"]) orca.run(["tour_mode_choice_simulate"]) orca.run(["trip_mode_choice_simulate"]) tours_merged = orca.get_table("tours_merged").to_frame() tour_count = len(tours_merged.index) orca.clear_cache() return tour_count
def profit_to_prob_func(df): # the clip is because we still might build negative profit buildings # (when we're subsidizing them) and choice doesn't allow negative # probability options max_profit = df.max_profit.clip(1) factor = float(orca.get_injectable("settings")[ "profit_vs_return_on_cost_combination_factor"]) df['return_on_cost'] = max_profit / df.total_cost # now we're going to make two pdfs and weight them ROC_p = df.return_on_cost.values / df.return_on_cost.sum() profit_p = max_profit / max_profit.sum() p = 1.0 * ROC_p + factor * profit_p return p / p.sum()
def parcel_is_allowed(form): settings = orca.get_injectable('settings') form_to_btype = settings["form_to_btype"] # we have zoning by building type but want # to know if specific forms are allowed allowed = [orca.get_table('zoning_baseline') ['type%d' % typ] > 0 for typ in form_to_btype[form]] # also check if the scenario based zoning adds the building type allowed2 = [orca.get_table('zoning_scenario') ['type%d' % typ] > 0 for typ in form_to_btype[form]] allowed = allowed + allowed2 s = pd.concat(allowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False) return s.astype("bool")
def fill_nas_from_config(dfname, df): df_cnt = len(df) fillna_config = orca.get_injectable("fillna_config") fillna_config_df = fillna_config[dfname] for fname in fillna_config_df: filltyp, dtyp = fillna_config_df[fname] s_cnt = df[fname].count() fill_cnt = df_cnt - s_cnt if filltyp == "zero": val = 0 elif filltyp == "mode": val = df[fname].dropna().value_counts().idxmax() elif filltyp == "median": val = df[fname].dropna().quantile() else: assert 0, "Fill type not found!" print "Filling column {} with value {} ({} values)".\ format(fname, val, fill_cnt) df[fname] = df[fname].fillna(val).astype(dtyp) return df