def test_full_run(store): orca.add_injectable("configs_dir", os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example')) orca.add_injectable("store", store) orca.add_injectable("nonmotskm_matrix", np.ones((1454, 1454))) orca.add_injectable("set_random_seed", set_random_seed) # grab some of the tables orca.get_table("land_use").to_frame().info() orca.get_table("households").to_frame().info() orca.get_table("persons").to_frame().info() assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE # run the models in the expected order orca.run(["workplace_location_simulate"]) orca.run(["auto_ownership_simulate"]) orca.run(["cdap_simulate"]) orca.run(['mandatory_tour_frequency']) orca.get_table("mandatory_tours").tour_type.value_counts() orca.run(['non_mandatory_tour_frequency']) orca.get_table("non_mandatory_tours").tour_type.value_counts() orca.run(["destination_choice"]) orca.run(["mandatory_scheduling"]) orca.run(["non_mandatory_scheduling"]) orca.run(["mode_choice_simulate"]) orca.clear_cache()
def test_rng_access(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() pipeline.set_rn_generator_base_seed(0) pipeline.open_pipeline() with pytest.raises(RuntimeError) as excinfo: pipeline.set_rn_generator_base_seed(0) assert "call set_rn_generator_base_seed before the first step" in str( excinfo.value) rng = pipeline.get_rn_generator() pipeline.close_pipeline() orca.clear_cache()
def test_full_run2(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs2') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data2') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'integerize_final_seed_weights', 'sub_balancing.geography = DISTRICT', 'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ', 'expand_households', 'summarize', 'write_results' ] pipeline.run(models=_MODELS, resume_after=None) assert isinstance(pipeline.get_table('expanded_household_ids'), pd.DataFrame) assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def full_run(preload_3d_skims, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) inject_settings(configs_dir, households_sample_size=households_sample_size, preload_3d_skims=preload_3d_skims, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability) orca.add_injectable("set_random_seed", set_random_seed) orca.clear_cache() tracing.config_logger() # grab some of the tables orca.get_table("land_use").to_frame().info() orca.get_table("households").to_frame().info() orca.get_table("persons").to_frame().info() assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE assert orca.get_injectable("chunk_size") == chunk_size # run the models in the expected order orca.run(["compute_accessibility"]) orca.run(["school_location_simulate"]) orca.run(["workplace_location_simulate"]) orca.run(["auto_ownership_simulate"]) orca.run(["cdap_simulate"]) orca.run(['mandatory_tour_frequency']) orca.get_table("mandatory_tours").tour_type.value_counts() orca.run(['non_mandatory_tour_frequency']) orca.get_table("non_mandatory_tours").tour_type.value_counts() orca.run(["destination_choice"]) orca.run(["mandatory_scheduling"]) orca.run(["non_mandatory_scheduling"]) orca.run(["patch_mandatory_tour_destination"]) orca.run(["tour_mode_choice_simulate"]) orca.run(["trip_mode_choice_simulate"]) tours_merged = orca.get_table("tours_merged").to_frame() tour_count = len(tours_merged.index) orca.clear_cache() return tour_count
def wplcm_simulate(persons, households, jobs): # can only send in jobs that have a valid building_id, so remove unlocated jobs for now jobs_df = jobs.to_frame() jobs_df = jobs_df[jobs_df.building_id>0] jobs_df.index.name = 'job_id' orca.add_table('located_jobs', jobs_df) located_jobs = orca.get_table('located_jobs') res = utils.lcm_simulate("wplcmcoef.yaml", persons, located_jobs, None, "job_id", "number_of_jobs", "vacant_jobs", cast=True) orca.clear_cache()
def compute_indicators(settings, iter_var): # loop over indicators and datasets from settings and store into file for ind, value in settings['indicators'].iteritems(): for ds in value['dataset']: ds_tablename = '%s_%s_%s' % (ds, ind, str(iter_var)) df = orca.get_table(ds)[ind] #print 'ds is %s and ind is %s' % (ds, ind) #print orca.get_table(ds)[ind].to_frame().head() orca.add_table(ds_tablename, df) ind_table_list.append(ds_tablename) orca.clear_cache()
def households_transition(households, household_controls, year, settings, persons): orig_size_hh = households.local.shape[0] orig_size_pers = persons.local.shape[0] orig_pers_index = persons.index orig_hh_index = households.index res = utils.full_transition(households, household_controls, year, settings['households_transition'], "building_id", linked_tables={"persons": (persons.local, 'household_id')}) print "Net change: %s households" % (orca.get_table("households"). local.shape[0] - orig_size_hh) print "Net change: %s persons" % (orca.get_table("persons"). local.shape[0] - orig_size_pers) # changes to households/persons table are not reflected in local scope # need to reset vars to get changes. households = orca.get_table('households') persons = orca.get_table("persons") # need to make some updates to the persons & households table households.update_col_from_series("is_inmigrant", pd.Series(np.where (~households.index.isin (orig_hh_index), 1, 0), index=households.index), cast=True) # new workers dont have jobs yet, set job_id to -1 persons.update_col_from_series("job_id", pd.Series(np.where(~persons.index.isin (orig_pers_index), -1, persons.job_id), index=persons.index), cast=True) # dont know their work at home status yet, set to 0: persons.update_col_from_series("work_at_home", pd.Series(np.where (~persons.index.isin (orig_pers_index), 0, persons.work_at_home), index=persons.index), cast=True) # set non-worker job_id to -2 persons.update_col_from_series("job_id", pd.Series(np.where (persons.employment_status > 0, persons.job_id, -2), index=persons.index), cast=True) orca.clear_cache() return res
def full_run(resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) inject_settings(configs_dir, households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability) orca.clear_cache() tracing.config_logger() # assert orca.get_injectable("chunk_size") == chunk_size _MODELS = [ 'compute_accessibility', 'school_location_sample', 'school_location_logsums', 'school_location_simulate', 'workplace_location_sample', 'workplace_location_logsums', 'workplace_location_simulate', 'auto_ownership_simulate', 'cdap_simulate', 'mandatory_tour_frequency', 'mandatory_scheduling', 'non_mandatory_tour_frequency', 'destination_choice', 'non_mandatory_scheduling', 'tour_mode_choice_simulate', 'create_simple_trips', 'trip_mode_choice_simulate' ] pipeline.run(models=_MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) pipeline.close() orca.clear_cache() return tour_count
def orca_year_dataset(hdf, year): if str(year) == '2015': year = 'base' orca.add_injectable("jobs_large_area_lookup", []) orca.add_injectable("households_large_area_lookup", []) orca.add_injectable("year", int(year if str(year) != 'base' else 2015)) for tbl in ['households', 'persons', 'jobs', 'buildings', 'parcels', 'dropped_buildings']: name = str(year) + '/' + tbl if name in hdf: df = hdf[name] else: stub_name = str(2016) + '/' + tbl print "No table named " + name + ". Using the structuer from " + stub_name + "." df = hdf[stub_name].iloc[0:0] orca.add_table(tbl, df) orca.clear_cache()
def test_misc(): orca.clear_cache() with pytest.raises(RuntimeError) as excinfo: orca.get_injectable("configs_dir") assert "directory does not exist" in str(excinfo.value) with pytest.raises(RuntimeError) as excinfo: orca.get_injectable("data_dir") assert "directory does not exist" in str(excinfo.value) with pytest.raises(RuntimeError) as excinfo: orca.get_injectable("output_dir") assert "directory does not exist" in str(excinfo.value) configs_dir = os.path.join(os.path.dirname(__file__), 'configs_test_misc') orca.add_injectable("configs_dir", configs_dir) settings = orca.get_injectable("settings") assert isinstance(settings, dict) assert orca.get_injectable("trace_person_ids") == [] assert orca.get_injectable("trace_tour_ids") == [] data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) with pytest.raises(RuntimeError) as excinfo: orca.get_injectable("store") assert "store file name not specified in settings" in str(excinfo.value) settings = {'store': 'bogus.h5'} orca.add_injectable("settings", settings) with pytest.raises(RuntimeError) as excinfo: orca.get_injectable("store") assert "store file not found" in str(excinfo.value) # these should be None until overridden assert orca.get_injectable("hh_index_name") is None assert orca.get_injectable("persons_index_name") is None # default values if not specified in settings assert orca.get_injectable("hh_chunk_size") == 0 assert orca.get_injectable("chunk_size") == 0 assert orca.get_injectable("preload_3d_skims") is False
def full_run(store, omx_file, preload_3d_skims, chunk_size=0): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example') orca.add_injectable("configs_dir", configs_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, preload_3d_skims=preload_3d_skims, chunk_size=chunk_size) orca.add_injectable("omx_file", omx_file) orca.add_injectable("store", store) orca.add_injectable("set_random_seed", set_random_seed) orca.clear_cache() # grab some of the tables orca.get_table("land_use").to_frame().info() orca.get_table("households").to_frame().info() orca.get_table("persons").to_frame().info() assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE assert orca.get_injectable("chunk_size") == chunk_size # run the models in the expected order orca.run(["school_location_simulate"]) orca.run(["workplace_location_simulate"]) orca.run(["auto_ownership_simulate"]) orca.run(["cdap_simulate"]) orca.run(['mandatory_tour_frequency']) orca.get_table("mandatory_tours").tour_type.value_counts() orca.run(['non_mandatory_tour_frequency']) orca.get_table("non_mandatory_tours").tour_type.value_counts() orca.run(["destination_choice"]) orca.run(["mandatory_scheduling"]) orca.run(["non_mandatory_scheduling"]) orca.run(["patch_mandatory_tour_destination"]) orca.run(["tour_mode_choice_simulate"]) orca.run(["trip_mode_choice_simulate"]) tours_merged = orca.get_table("tours_merged").to_frame() tour_count = len(tours_merged.index) orca.clear_cache() return tour_count
def test_full_run1(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor', 'setup_data_structures', 'initial_seed_balancing', 'meta_control_factoring', 'final_seed_balancing', 'integerize_final_seed_weights', 'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ', 'expand_households', 'summarize', 'write_tables', 'write_synthetic_population', ] pipeline.run(models=_MODELS, resume_after=None) expanded_household_ids = pipeline.get_table('expanded_household_ids') assert isinstance(expanded_household_ids, pd.DataFrame) taz_hh_counts = expanded_household_ids.groupby('TAZ').size() assert len(taz_hh_counts) == TAZ_COUNT assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT # output_tables action: skip assert not os.path.exists(os.path.join(output_dir, 'households.csv')) assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def test_mini_run(random_seed): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.add_injectable("set_random_seed", set_random_seed) orca.clear_cache() assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE # run the models in the expected order orca.run(["compute_accessibility"]) orca.run(["workplace_location_simulate"]) orca.run(["auto_ownership_simulate"]) # this is a regression test so that we know if these numbers change auto_choice = orca.get_table('households').get_column('auto_ownership') hh_ids = [2124015, 961042, 1583271] choices = [1, 1, 1] print "auto_choice\n", auto_choice.head(3) pdt.assert_series_equal( auto_choice[hh_ids], pd.Series(choices, index=pd.Index(hh_ids, name="HHID"))) orca.run(["cdap_simulate"]) orca.run(['mandatory_tour_frequency']) mtf_choice = orca.get_table('persons').get_column( 'mandatory_tour_frequency') per_ids = [326914, 172781, 298898] choices = ['school1', 'work_and_school', 'work2'] print "mtf_choice\n", mtf_choice.head(20) pdt.assert_series_equal( mtf_choice[per_ids], pd.Series(choices, index=pd.Index(per_ids, name='PERID'))) orca.clear_cache()
def setup(): orca.orca._INJECTABLES.pop('skim_dict', None) orca.orca._INJECTABLES.pop('skim_stack', None) configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) orca.clear_cache() tracing.config_logger()
def test_pipeline_run(): orca.orca._INJECTABLES.pop('skim_dict', None) orca.orca._INJECTABLES.pop('skim_stack', None) configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'step1', ] pipeline.run(models=_MODELS, resume_after=None) table1 = pipeline.get_table("table1").column1 # test that model arg is passed to step pipeline.run_model('step2.table_name=table2') table2 = pipeline.get_table("table2").column1 # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("table1", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() orca.clear_cache() close_handlers()
def test_mini_run(store, omx_file, random_seed): configs_dir = os.path.join(os.path.dirname(__file__)) orca.add_injectable("configs_dir", configs_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.add_injectable("omx_file", omx_file) orca.add_injectable("store", store) orca.add_injectable("set_random_seed", set_random_seed) orca.clear_cache() assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE # run the models in the expected order orca.run(["workplace_location_simulate"]) orca.run(["auto_ownership_simulate"]) # this is a regression test so that we know if these numbers change auto_choice = orca.get_table('households').get_column('auto_ownership') hh_ids = [2124015, 961042, 1583271] choices = [1, 2, 2] print "auto_choice\n", auto_choice.head(3) pdt.assert_series_equal( auto_choice[hh_ids], pd.Series(choices, index=pd.Index(hh_ids, name="HHID"))) orca.run(["cdap_simulate"]) orca.run(['mandatory_tour_frequency']) mtf_choice = orca.get_table('persons').get_column('mandatory_tour_frequency') per_ids = [172616, 172781, 172782] choices = ['work1', 'school1', 'work_and_school'] print "mtf_choice\n", mtf_choice.head(20) pdt.assert_series_equal( mtf_choice[per_ids], pd.Series(choices, index=pd.Index(per_ids, name='PERID'))) orca.clear_cache()
def test_full_run1(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) # data_dir = os.path.join(os.path.dirname(__file__), 'data') data_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example', 'data') orca.add_injectable("data_dir", data_dir) # scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios') scenarios_dir = os.path.join(os.path.dirname(__file__), '..', '..', 'example', 'scenarios') orca.add_injectable("scenarios_dir", scenarios_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() # run list from settings file is dict with list of 'steps' and optional 'resume_after' run_list = setting('run_list') assert 'steps' in run_list, "Did not find steps in run_list" # list of steps and possible resume_after in run_list steps = run_list.get('steps') pipeline.run(models=steps, resume_after=None) # geo_crosswalk = pipeline.get_table('geo_crosswalk') # assert geo_crosswalk.index.name == 'TAZ' # assert 'FAF4' in geo_crosswalk.columns # assert 'FIPS' in geo_crosswalk.columns # assert os.path.exists(os.path.join(output_dir, 'naics_set.csv')) # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def parcels_zoning_by_scenario(parcels, parcels_zoning_calculations, zoning_baseline): df = pd.DataFrame(index=parcels.index) df["baseline_dua"] = zoning_baseline.max_dua df["baseline_far"] = zoning_baseline.max_far df["baseline_height"] = zoning_baseline.max_height df["zoning_name"] = zoning_baseline["name"] df["zoning_source"] = zoning_baseline["tablename"] for scenario in [str(i) for i in range(4)]: orca.clear_cache() orca.add_injectable("scenario", scenario) z = orca.get_table("parcels_zoning_calculations") df["max_dua_%s" % scenario] = z.effective_max_dua df["max_far_%s" % scenario] = z.effective_max_far df["du_underbuild_%s" % scenario] = z.zoned_du_underbuild df["non_res_cat_%s" % scenario] = z.non_res_categories return df
def full_run(resume_after=None, chunk_size=0, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, trace_hh_id=None, trace_od=None, check_for_variability=None): configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) settings = inject_settings(configs_dir, households_sample_size=households_sample_size, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_od=trace_od, check_for_variability=check_for_variability) orca.clear_cache() tracing.config_logger() MODELS = settings['models'] pipeline.run(models=MODELS, resume_after=resume_after) tours = pipeline.get_table('tours') tour_count = len(tours.index) pipeline.close_pipeline() orca.clear_cache() return tour_count
def hlcm_simulate(households, buildings, persons, settings): movers = households.to_frame() movers = movers[movers.building_id == -1] relocated = movers[movers.is_inmigrant < 1] res = utils.lcm_simulate("hlcmcoef.yaml", households, buildings, None, "building_id", "residential_units", "vacant_residential_units", cast=True) orca.clear_cache() # Determine which relocated persons get disconnected from their job if settings.get('remove_jobs_from_workers', False): persons_df = persons.to_frame() relocated_workers = persons_df.loc[(persons_df.employment_status > 0) & (persons_df.household_id.isin (relocated.index))] relocated_workers['new_dist_to_work'] = network_distance_from_home_to_work( relocated_workers.workplace_zone_id, relocated_workers.household_zone_id) relocated_workers['prev_dist_to_work'] = network_distance_from_home_to_work( relocated_workers.workplace_zone_id, relocated_workers.prev_household_zone_id) # if new distance to work is greater than old, disconnect person from job relocated_workers.job_id = np.where(relocated_workers.new_dist_to_work > relocated_workers.prev_dist_to_work, -1, relocated_workers.job_id) persons.update_col_from_series("job_id", relocated_workers.job_id, cast=True) # Update is_inmigrant- I think this it is ok to do this now, # but perhaps this should be part of a clean up step # at the end of the sim year. households.update_col_from_series("is_inmigrant", pd.Series(0, index=households.index), cast=True) orca.clear_cache() return res
def test_full_run2_repop_replace(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'input_pre_processor.table_list=repop_input_table_list', 'repop_setup_data_structures', 'initial_seed_balancing.final=true', 'integerize_final_seed_weights.repop', 'repop_balancing', 'expand_households.repop;replace', 'write_synthetic_population.repop', 'write_tables.repop', ] pipeline.run(models=_MODELS, resume_after='summarize') expanded_household_ids = pipeline.get_table('expanded_household_ids') assert isinstance(expanded_household_ids, pd.DataFrame) taz_hh_counts = expanded_household_ids.groupby('TAZ').size() assert len(taz_hh_counts) == TAZ_COUNT assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT # tables will no longer be available after pipeline is closed pipeline.close_pipeline() orca.clear_cache()
def wahcm_simulate(persons, jobs, households, zones): work_at_home_prob = work_at_home_simulate("wahcmcoeff.yaml", persons, [households, zones])[1] jobs_df = jobs.to_frame() home_based_jobs = jobs_df[(jobs_df.home_based_status == 1) & (jobs_df.vacant_jobs>0)] # sample home workers using the exact number of vacant home based jobs, weighted by the probablities from the wachm: home_workers = work_at_home_prob.sample(len(home_based_jobs), weights = work_at_home_prob.values) # update job_id on the persons table # should not matter which person gets which home-based job combine_indexes = pd.DataFrame([home_workers.index, home_based_jobs.index]).transpose() combine_indexes.columns = ['person_id', 'job_id'] combine_indexes.set_index('person_id', inplace=True) combine_indexes['work_at_home'] = 1 # updates job_id, work_at_home on the persons table where index (person_id) matches in combine_indexes persons.update_col_from_series("job_id", combine_indexes.job_id, cast = True) persons.update_col_from_series('work_at_home', combine_indexes.work_at_home, cast = True) print "%s additional people assigned to work at home." % len(combine_indexes) # building_id on jobs table for home based workers should be the household building_id of the person assigned the job # get building_id: combine_indexes['building_id'] = 0 combine_indexes.building_id.update(persons.household_building_id) #update building_id & vacant_jobs on jobs table: combine_indexes.reset_index(level = None, inplace = True) combine_indexes.set_index('job_id', inplace=True) combine_indexes['vacant_jobs'] = 0 # update jobs table- building_id of at home workers and 0 for vacant_jobs jobs.update_col_from_series('building_id', combine_indexes.building_id, cast = True) jobs.update_col_from_series('vacant_jobs', combine_indexes.vacant_jobs, cast = True) print "Number of unplaced home-based jobs: %s" % len(jobs.local[(jobs.local.home_based_status==1) & (jobs.local.vacant_jobs > 0) & (jobs.building_id > 0)]) orca.clear_cache()
def test_mini_run(store, random_seed): orca.add_injectable("configs_dir", os.path.join(os.path.dirname(__file__))) orca.add_injectable("store", store) orca.add_injectable("nonmotskm_matrix", np.ones((1454, 1454))) orca.add_injectable("set_random_seed", set_random_seed) assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE # run the models in the expected order orca.run(["workplace_location_simulate"]) orca.run(["auto_ownership_simulate"]) # this is a regression test so that we know if these numbers change auto_choice = orca.get_table('households').get_column('auto_ownership') print auto_choice[[2306822, 652072, 651907]] pdt.assert_series_equal( auto_choice[[2306822, 652072, 651907]], pd.Series( [2, 1, 1], index=pd.Index([2306822, 652072, 651907], name='HHID'))) orca.run(["cdap_simulate"]) orca.run(['mandatory_tour_frequency']) mtf_choice = orca.get_table('persons').get_column( 'mandatory_tour_frequency') pdt.assert_series_equal( mtf_choice[[146642, 642922, 642921]], pd.Series( ['school1', 'work1', 'school2'], index=pd.Index([146642, 642922, 642921], name='PERID'))) orca.clear_cache()
def orca_year_dataset(hdf, year): orca.clear_cache() if str(year) == '2015': year = 'base' orca.add_injectable("year", int(year if str(year) != 'base' else 2015)) for tbl in [ 'parcels', 'buildings', 'jobs', 'households', 'persons', 'group_quarters', 'base_job_space', 'dropped_buildings' ]: name = str(year) + '/' + tbl if name in hdf: df = hdf[name] else: stub_name = str(2020) + '/' + tbl print "No table named " + name + ". Using the structuer from " + stub_name + "." df = hdf[stub_name].iloc[0:0] if tbl in {'households', 'jobs'} and 'large_area_id' not in df.columns: print 'impute large_area_id' df['large_area_id'] = misc.reindex( orca.get_table('buildings').large_area_id, df.building_id) orca.add_table(tbl, df.fillna(0))
def clear_cache(): orca.clear_cache()
def elcm_simulate(jobs, buildings, parcels, zones, gridcells): res = utils.lcm_simulate("elcmcoef.yaml", jobs, buildings, [parcels, zones, gridcells], "building_id", "job_spaces", "vacant_job_spaces", cast=True) orca.clear_cache()
def teardown_function(func): orca.clear_cache() inject.reinject_decorated_tables()
def test_mini_pipeline_run2(): # the important thing here is that we should get # exactly the same results as for test_mini_pipeline_run # when we restart pipeline configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() # should be able to get this BEFORE pipeline is opened checkpoints_df = pipeline.get_checkpoints() prev_checkpoint_count = len(checkpoints_df.index) assert prev_checkpoint_count == 7 pipeline.start_pipeline('auto_ownership_simulate') auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are the 2nd-4th households in households table hh_ids = [26960, 857296, 93428] choices = [0, 1, 0] expected_auto_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(4) pdt.assert_series_equal(auto_choice[hh_ids], expected_auto_choice) # try to run a model already in pipeline with pytest.raises(RuntimeError) as excinfo: pipeline.run_model('auto_ownership_simulate') assert "run model 'auto_ownership_simulate' more than once" in str( excinfo.value) # and these new ones pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency per_ids = [92363, 92681, 93428] choices = ['work1', 'school1', 'school2'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # should be able to get this before pipeline is closed (from existing open store) assert orca.get_injectable('pipeline_store') is not None checkpoints_df = pipeline.get_checkpoints() assert len(checkpoints_df.index) == prev_checkpoint_count pipeline.close() # should also be able to get this after pipeline is closed (open and close) assert orca.get_injectable('pipeline_store') is None checkpoints_df = pipeline.get_checkpoints() assert len(checkpoints_df.index) == prev_checkpoint_count orca.clear_cache()
def test_mini_pipeline_run(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE _MODELS = [ 'compute_accessibility', 'school_location_simulate', 'workplace_location_simulate', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are the first 3 households in households table hh_ids = [26960, 857296, 93428] choices = [0, 1, 0] expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(10) pdt.assert_series_equal(auto_choice[hh_ids], expected_choice) pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency per_ids = [92363, 92681, 93428] choices = ['work1', 'school1', 'school2'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close() orca.clear_cache()
def clear_cache(): return orca.clear_cache()
def test_mini_pipeline_run2(): # the important thing here is that we should get # exactly the same results as for test_mini_pipeline_run # when we restart pipeline configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() # should be able to get this BEFORE pipeline is opened checkpoints_df = pipeline.get_checkpoints() prev_checkpoint_count = len(checkpoints_df.index) # print "checkpoints_df\n", checkpoints_df[['checkpoint_name']] assert prev_checkpoint_count == 11 pipeline.open_pipeline('auto_ownership_simulate') auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are the same as in test_mini_pipeline_run1 hh_ids = [464138, 1918238, 2201602] choices = [0, 1, 2] expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(4) pdt.assert_series_equal(auto_choice[hh_ids], expected_choice) # try to run a model already in pipeline with pytest.raises(RuntimeError) as excinfo: pipeline.run_model('auto_ownership_simulate') assert "run model 'auto_ownership_simulate' more than once" in str( excinfo.value) # and these new ones pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency per_ids = [24375, 92744, 172491] choices = ['school2', 'work_and_school', 'work1'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # should be able to get this before pipeline is closed (from existing open store) checkpoints_df = pipeline.get_checkpoints() assert len(checkpoints_df.index) == prev_checkpoint_count pipeline.close_pipeline() orca.clear_cache()
def test_mini_pipeline_run(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() tracing.config_logger() # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE _MODELS = [ 'initialize', 'compute_accessibility', 'school_location_sample', 'school_location_logsums', 'school_location_simulate', 'workplace_location_sample', 'workplace_location_logsums', 'workplace_location_simulate', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are among the first 10 households in households table hh_ids = [464138, 1918238, 2201602] choices = [0, 1, 2] expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(10) pdt.assert_series_equal(auto_choice[hh_ids], expected_choice) pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency # these choices are nonsensical as the test mandatory_tour_frequency spec is very truncated per_ids = [24375, 92744, 172491] choices = ['school2', 'work_and_school', 'work1'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) # mtf_choice # PERID # 23647 NaN # 24203 NaN # 24375 school2 # 24687 NaN # 24824 NaN # 24975 NaN # 25027 NaN # 25117 NaN # 25772 NaN # 25871 NaN # 26284 NaN # 26863 NaN # 27059 NaN # 92233 NaN # 92382 school1 # 92744 work_and_school # 92823 NaN # 93172 school2 # 93774 NaN # 172491 work1 # Name: mandatory_tour_frequency, dtype: object pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() orca.clear_cache() close_handlers()