def test_pipeline_run(): orca.orca._INJECTABLES.pop('skim_dict', None) orca.orca._INJECTABLES.pop('skim_stack', None) configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) orca.clear_cache() tracing.config_logger() _MODELS = [ 'step1', ] pipeline.run(models=_MODELS, resume_after=None) table1 = pipeline.get_table("table1").column1 # test that model arg is passed to step pipeline.run_model('step2.table_name=table2') table2 = pipeline.get_table("table2").column1 # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("table1", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() orca.clear_cache() close_handlers()
def test_mini_pipeline_run(): setup_dirs() inject_settings(households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, write_skim_cache=True ) _MODELS = [ 'initialize_landuse', 'compute_accessibility', 'initialize_households', 'school_location', 'workplace_location', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) regress_mini_auto() pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') regress_mini_mtf() regress_mini_location_choice_logsums() # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "never checkpointed" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) # should create optional workplace_location_sample table workplace_location_sample_df = pipeline.get_table("workplace_location_sample") assert 'mode_choice_logsum' in workplace_location_sample_df pipeline.close_pipeline() inject.clear_cache() close_handlers()
def test_mini_pipeline_run(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') setup_dirs(configs_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE, # use_shadow_pricing=True ) _MODELS = [ 'initialize_landuse', 'compute_accessibility', 'initialize_households', 'school_location', 'workplace_location', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) regress_mini_auto() pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') regress_mini_mtf() # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "never checkpointed" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() inject.clear_cache() close_handlers()
def test_mini_pipeline_run2(): # the important thing here is that we should get # exactly the same results as for test_mini_pipeline_run # when we restart pipeline configs_dir = os.path.join(os.path.dirname(__file__), 'configs') setup_dirs(configs_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) # should be able to get this BEFORE pipeline is opened checkpoints_df = pipeline.get_checkpoints() prev_checkpoint_count = len(checkpoints_df.index) # print "checkpoints_df\n", checkpoints_df[['checkpoint_name']] assert prev_checkpoint_count == 8 pipeline.open_pipeline('auto_ownership_simulate') regress_mini_auto() # try to run a model already in pipeline with pytest.raises(RuntimeError) as excinfo: pipeline.run_model('auto_ownership_simulate') assert "run model 'auto_ownership_simulate' more than once" in str( excinfo.value) # and these new ones pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') regress_mini_mtf() # should be able to get this before pipeline is closed (from existing open store) checkpoints_df = pipeline.get_checkpoints() assert len(checkpoints_df.index) == prev_checkpoint_count # - write list of override_hh_ids to override_hh_ids.csv in data for use in next test num_hh_ids = 10 hh_ids = pipeline.get_table("households").head(num_hh_ids).index.values hh_ids = pd.DataFrame({'household_id': hh_ids}) data_dir = inject.get_injectable('data_dir') hh_ids.to_csv(os.path.join(data_dir, 'override_hh_ids.csv'), index=False, header=True) pipeline.close_pipeline() inject.clear_cache() close_handlers()
def test_mini_pipeline_run2(): # the important thing here is that we should get # exactly the same results as for test_mini_pipeline_run # when we restart pipeline configs_dir = os.path.join(os.path.dirname(__file__), 'configs') setup_dirs(configs_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) # should be able to get this BEFORE pipeline is opened checkpoints_df = pipeline.get_checkpoints() prev_checkpoint_count = len(checkpoints_df.index) # print "checkpoints_df\n", checkpoints_df[['checkpoint_name']] assert prev_checkpoint_count == 8 pipeline.open_pipeline('auto_ownership_simulate') regress_mini_auto() # try to run a model already in pipeline with pytest.raises(RuntimeError) as excinfo: pipeline.run_model('auto_ownership_simulate') assert "run model 'auto_ownership_simulate' more than once" in str(excinfo.value) # and these new ones pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') regress_mini_mtf() # should be able to get this before pipeline is closed (from existing open store) checkpoints_df = pipeline.get_checkpoints() assert len(checkpoints_df.index) == prev_checkpoint_count # - write list of override_hh_ids to override_hh_ids.csv in data for use in next test num_hh_ids = 10 hh_ids = pipeline.get_table("households").head(num_hh_ids).index.values hh_ids = pd.DataFrame({'household_id': hh_ids}) data_dir = inject.get_injectable('data_dir') hh_ids.to_csv(os.path.join(data_dir, 'override_hh_ids.csv'), index=False, header=True) pipeline.close_pipeline() inject.clear_cache() close_handlers()
def test_mini_pipeline_run2(): # the important thing here is that we should get # exactly the same results as for test_mini_pipeline_run # when we restart pipeline configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() # should be able to get this BEFORE pipeline is opened checkpoints_df = pipeline.get_checkpoints() prev_checkpoint_count = len(checkpoints_df.index) # print "checkpoints_df\n", checkpoints_df[['checkpoint_name']] assert prev_checkpoint_count == 11 pipeline.open_pipeline('auto_ownership_simulate') auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are the same as in test_mini_pipeline_run1 hh_ids = [464138, 1918238, 2201602] choices = [0, 1, 2] expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(4) pdt.assert_series_equal(auto_choice[hh_ids], expected_choice) # try to run a model already in pipeline with pytest.raises(RuntimeError) as excinfo: pipeline.run_model('auto_ownership_simulate') assert "run model 'auto_ownership_simulate' more than once" in str( excinfo.value) # and these new ones pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency per_ids = [24375, 92744, 172491] choices = ['school2', 'work_and_school', 'work1'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # should be able to get this before pipeline is closed (from existing open store) checkpoints_df = pipeline.get_checkpoints() assert len(checkpoints_df.index) == prev_checkpoint_count pipeline.close_pipeline() orca.clear_cache()
def test_mini_pipeline_run(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() tracing.config_logger() # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE _MODELS = [ 'initialize', 'compute_accessibility', 'school_location_sample', 'school_location_logsums', 'school_location_simulate', 'workplace_location_sample', 'workplace_location_logsums', 'workplace_location_simulate', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are among the first 10 households in households table hh_ids = [464138, 1918238, 2201602] choices = [0, 1, 2] expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(10) pdt.assert_series_equal(auto_choice[hh_ids], expected_choice) pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency # these choices are nonsensical as the test mandatory_tour_frequency spec is very truncated per_ids = [24375, 92744, 172491] choices = ['school2', 'work_and_school', 'work1'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) # mtf_choice # PERID # 23647 NaN # 24203 NaN # 24375 school2 # 24687 NaN # 24824 NaN # 24975 NaN # 25027 NaN # 25117 NaN # 25772 NaN # 25871 NaN # 26284 NaN # 26863 NaN # 27059 NaN # 92233 NaN # 92382 school1 # 92744 work_and_school # 92823 NaN # 93172 school2 # 93774 NaN # 172491 work1 # Name: mandatory_tour_frequency, dtype: object pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close_pipeline() orca.clear_cache() close_handlers()
def test_mini_pipeline_run(): configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE _MODELS = [ 'compute_accessibility', 'school_location_simulate', 'workplace_location_simulate', 'auto_ownership_simulate' ] pipeline.run(models=_MODELS, resume_after=None) auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are the first 3 households in households table hh_ids = [26960, 857296, 93428] choices = [0, 1, 0] expected_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(10) pdt.assert_series_equal(auto_choice[hh_ids], expected_choice) pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency per_ids = [92363, 92681, 93428] choices = ['work1', 'school1', 'school2'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # try to get a non-existant table with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("bogus") assert "not in checkpointed tables" in str(excinfo.value) # try to get an existing table from a non-existant checkpoint with pytest.raises(RuntimeError) as excinfo: pipeline.get_table("households", checkpoint_name="bogus") assert "not in checkpoints" in str(excinfo.value) pipeline.close() orca.clear_cache()
def test_mini_pipeline_run2(): # the important thing here is that we should get # exactly the same results as for test_mini_pipeline_run # when we restart pipeline configs_dir = os.path.join(os.path.dirname(__file__), 'configs') orca.add_injectable("configs_dir", configs_dir) output_dir = os.path.join(os.path.dirname(__file__), 'output') orca.add_injectable("output_dir", output_dir) data_dir = os.path.join(os.path.dirname(__file__), 'data') orca.add_injectable("data_dir", data_dir) inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE) orca.clear_cache() # should be able to get this BEFORE pipeline is opened checkpoints_df = pipeline.get_checkpoints() prev_checkpoint_count = len(checkpoints_df.index) assert prev_checkpoint_count == 7 pipeline.start_pipeline('auto_ownership_simulate') auto_choice = pipeline.get_table("households").auto_ownership # regression test: these are the 2nd-4th households in households table hh_ids = [26960, 857296, 93428] choices = [0, 1, 0] expected_auto_choice = pd.Series(choices, index=pd.Index(hh_ids, name="HHID"), name='auto_ownership') print "auto_choice\n", auto_choice.head(4) pdt.assert_series_equal(auto_choice[hh_ids], expected_auto_choice) # try to run a model already in pipeline with pytest.raises(RuntimeError) as excinfo: pipeline.run_model('auto_ownership_simulate') assert "run model 'auto_ownership_simulate' more than once" in str( excinfo.value) # and these new ones pipeline.run_model('cdap_simulate') pipeline.run_model('mandatory_tour_frequency') mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency per_ids = [92363, 92681, 93428] choices = ['work1', 'school1', 'school2'] expected_choice = pd.Series(choices, index=pd.Index(per_ids, name='PERID'), name='mandatory_tour_frequency') print "mtf_choice\n", mtf_choice.head(20) pdt.assert_series_equal(mtf_choice[per_ids], expected_choice) # should be able to get this before pipeline is closed (from existing open store) assert orca.get_injectable('pipeline_store') is not None checkpoints_df = pipeline.get_checkpoints() assert len(checkpoints_df.index) == prev_checkpoint_count pipeline.close() # should also be able to get this after pipeline is closed (open and close) assert orca.get_injectable('pipeline_store') is None checkpoints_df = pipeline.get_checkpoints() assert len(checkpoints_df.index) == prev_checkpoint_count orca.clear_cache()