def full_run(configs_dir,
             data_dir,
             resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None,
             two_zone=True):

    setup_dirs(configs_dir, data_dir)

    settings = inject_settings(
        two_zone=two_zone,
        households_sample_size=households_sample_size,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_od=trace_od,
        check_for_variability=check_for_variability,
        use_shadow_pricing=False
    )  # shadow pricing breaks replicability when sample_size varies

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    return tour_count
Example #2
0
def test_pipeline_checkpoint_drop():

    setup()

    _MODELS = [
        'step1',
        '_step2',
        '_step_add_col.table_name=table2;column_name=c2',
        '_step_forget_tab.table_name=table2',
        'step3',
        'step_forget_tab.table_name=table3',
    ]
    pipeline.run(models=_MODELS, resume_after=None)

    checkpoints = pipeline.get_checkpoints()
    print "checkpoints\n", checkpoints

    pipeline.get_table("table1")

    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table2")
    assert "never checkpointed" in str(excinfo.value)

    # can't get a dropped table from current checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table3")
    assert "was dropped" in str(excinfo.value)

    # ensure that we can still get table3 from a checkpoint at which it existed
    pipeline.get_table("table3", checkpoint_name="step3")

    pipeline.close_pipeline()
    close_handlers()
Example #3
0
def full_run(resume_after=None, chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None, trace_od=None, check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'example', 'configs')

    setup_dirs(configs_dir)

    settings = inject_settings(
        configs_dir,
        households_sample_size=households_sample_size,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_od=trace_od,
        check_for_variability=check_for_variability,
        use_shadow_pricing=False)  # shadow pricing breaks replicability when sample_size varies

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    return tour_count
Example #4
0
def test_full_run2_repop_replace():
    # Note: tests are run in alphabetical order.
    # This tests expects to find the pipeline h5 file from
    # test_full_run1 in the output folder

    _MODELS = [
        'input_pre_processor.table_list=repop_input_table_list;repop',
        'repop_setup_data_structures',
        'initial_seed_balancing.final=true;repop',
        'integerize_final_seed_weights.repop',
        'repop_balancing',
        'expand_households.repop;replace',
        'write_synthetic_population.repop',
        'write_tables.repop',
    ]

    pipeline.run(models=_MODELS, resume_after='summarize')

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    inject.clear_cache()
Example #5
0
def test_full_run1():

    _MODELS = [
        'input_pre_processor',
        'setup_data_structures',
        'initial_seed_balancing',
        'meta_control_factoring',
        'final_seed_balancing',
        'integerize_final_seed_weights',
        'sub_balancing.geography=TRACT',
        'sub_balancing.geography=TAZ',
        'expand_households',
        'summarize',
        'write_tables',
        'write_synthetic_population',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT

    # output_tables action: skip
    output_dir = inject.get_injectable('output_dir')
    assert not os.path.exists(os.path.join(output_dir, 'households.csv'))
    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    inject.clear_cache()
Example #6
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    setup_dirs()

    settings = inject_settings(
        households_sample_size=households_sample_size,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_od=trace_od,
        testing_fail_trip_destination=False,
        check_for_variability=check_for_variability,
        want_dest_choice_sample_tables=False,
        use_shadow_pricing=False
    )  # shadow pricing breaks replicability when sample_size varies

    # FIXME should enable testing_fail_trip_destination?

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    return tour_count
Example #7
0
def test_balancer_step():

    setup_working_dir('example_balance', inherit=True)

    pipeline.run(['balance_trips', 'write_tables'])

    pipeline.close_pipeline()
Example #8
0
def test_full_run2():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs2')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data2')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor', 'setup_data_structures',
        'initial_seed_balancing', 'meta_control_factoring',
        'final_seed_balancing', 'integerize_final_seed_weights',
        'sub_balancing.geography = DISTRICT',
        'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ',
        'expand_households', 'summarize', 'write_results'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    assert isinstance(pipeline.get_table('expanded_household_ids'),
                      pd.DataFrame)

    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Example #9
0
def run(args):
    """
    Run bca4abm. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'.

    """

    if args.working_dir and os.path.exists(args.working_dir):
        os.chdir(args.working_dir)

    if args.config:
        inject.add_injectable('configs_dir', args.config)

    if args.data:
        inject.add_injectable('data_dir', args.data)

    if args.output:
        inject.add_injectable('output_dir', args.output)

    for injectable in ['configs_dir', 'data_dir', 'output_dir']:
        try:
            dir_path = inject.get_injectable(injectable)
        except RuntimeError:
            sys.exit('Error: please specify either a --working_dir '
                     "containing 'configs', 'data', and 'output' folders "
                     'or all three of --config, --data, and --output')
        if not os.path.exists(dir_path):
            sys.exit("Could not find %s '%s'" % (injectable, os.path.abspath(dir_path)))

    if args.pipeline:
        inject.add_injectable('pipeline_file_name', args.pipeline)

    if args.resume:
        override_setting('resume_after', args.resume)

    tracing.config_logger()
    tracing.delete_csv_files()  # only modifies output_dir
    warnings.simplefilter('always')
    logging.captureWarnings(capture=True)

    t0 = tracing.print_elapsed_time()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = setting('resume_after', None)

    if resume_after:
        print('resume_after: %s' % resume_after)

    pipeline.run(models=setting('models'), resume_after=resume_after)

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    t0 = tracing.print_elapsed_time('all models', t0)
Example #10
0
def run(run_list, injectables=None):

    if run_list['multiprocess']:
        logger.info("run multiprocess simulation")
        mp_tasks.run_multiprocess(run_list, injectables)
    else:
        logger.info("run single process simulation")
        pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])
        pipeline.close_pipeline()
        mem.log_global_hwm()
Example #11
0
def run(run_list, injectables=None):

    if run_list['multiprocess']:
        logger.info("run multiprocess simulation")
        mp_tasks.run_multiprocess(run_list, injectables)
    else:
        logger.info("run single process simulation")
        pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])
        pipeline.close_pipeline()
        chunk.log_write_hwm()
Example #12
0
def test_zero_chunk_size():

    settings = inject_settings(chunk_size=0)

    inject.clear_cache()

    tracing.config_logger()

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after='aggregate_od_processor')

    pipeline.close_pipeline()
Example #13
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..',
                               'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    inject_settings(configs_dir,
                    households_sample_size=households_sample_size,
                    chunk_size=chunk_size,
                    trace_hh_id=trace_hh_id,
                    trace_od=trace_od,
                    check_for_variability=check_for_variability)

    orca.clear_cache()

    tracing.config_logger()

    # assert orca.get_injectable("chunk_size") == chunk_size

    _MODELS = [
        'compute_accessibility', 'school_location_sample',
        'school_location_logsums', 'school_location_simulate',
        'workplace_location_sample', 'workplace_location_logsums',
        'workplace_location_simulate', 'auto_ownership_simulate',
        'cdap_simulate', 'mandatory_tour_frequency', 'mandatory_scheduling',
        'non_mandatory_tour_frequency', 'destination_choice',
        'non_mandatory_scheduling', 'tour_mode_choice_simulate',
        'create_simple_trips', 'trip_mode_choice_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    pipeline.close()

    orca.clear_cache()

    return tour_count
Example #14
0
def run(args):
    """
    Run the models. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'. Both '--config' and '--data' can be
    specified multiple times. Directories listed first take precedence.

    """

    from activitysim import abm  # register injectables

    tracing.config_logger(basic=True)
    handle_standard_args(args)  # possibly update injectables
    tracing.config_logger(
        basic=False)  # update using possibly new logging configs
    config.filter_warnings()
    logging.captureWarnings(capture=True)

    log_settings()

    t0 = tracing.print_elapsed_time()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = config.setting('resume_after', None)

    # cleanup if not resuming
    if not resume_after:
        cleanup_output_files()
    elif config.setting('cleanup_trace_files_on_resume', False):
        tracing.delete_trace_files()

    if config.setting('multiprocess', False):
        logger.info('run multiprocess simulation')

        from activitysim.core import mp_tasks
        run_list = mp_tasks.get_run_list()
        injectables = {k: inject.get_injectable(k) for k in INJECTABLES}
        mp_tasks.run_multiprocess(run_list, injectables)
    else:
        logger.info('run single process simulation')

        pipeline.run(models=config.setting('models'),
                     resume_after=resume_after)
        pipeline.close_pipeline()
        chunk.log_write_hwm()

    tracing.print_elapsed_time('all models', t0)
Example #15
0
def run_abm(models,
            resume_after=None,
            chunk_size=None,
            trace_hh_id=None,
            trace_od=None):

    settings = inject_settings(chunk_size=chunk_size,
                               trace_hh_id=trace_hh_id,
                               trace_od=trace_od)

    inject.clear_cache()

    tracing.config_logger()

    pipeline.run(models=models, resume_after=resume_after)
Example #16
0
def test_run_4step():

    settings = inject_settings(chunk_size=None,
                               trace_hh_id=None,
                               trace_od=None)

    inject.clear_cache()

    tracing.config_logger()

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=None)

    pipeline.close_pipeline()
Example #17
0
def test_pipeline_run():

    orca.orca._INJECTABLES.pop('skim_dict', None)
    orca.orca._INJECTABLES.pop('skim_stack', None)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'step1',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    table1 = pipeline.get_table("table1").column1

    # test that model arg is passed to step
    pipeline.run_model('step2.table_name=table2')

    table2 = pipeline.get_table("table2").column1

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "not in checkpointed tables" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table1", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()
    orca.clear_cache()

    close_handlers()
Example #18
0
def test_mini_pipeline_run():

    setup_dirs()

    inject_settings(households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
                    write_skim_cache=True
                    )

    _MODELS = [
        'initialize_landuse',
        'compute_accessibility',
        'initialize_households',
        'school_location',
        'workplace_location',
        'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    regress_mini_auto()

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    regress_mini_mtf()
    regress_mini_location_choice_logsums()

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "never checkpointed" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    # should create optional workplace_location_sample table
    workplace_location_sample_df = pipeline.get_table("workplace_location_sample")
    assert 'mode_choice_logsum' in workplace_location_sample_df

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Example #19
0
def test_full_run1():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    # data_dir = os.path.join(os.path.dirname(__file__), 'data')
    data_dir = os.path.join(os.path.dirname(__file__),
                            '..', '..', 'example', 'data')
    orca.add_injectable("data_dir", data_dir)

    # scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios')
    scenarios_dir = os.path.join(os.path.dirname(__file__),
                                 '..', '..', 'example', 'scenarios')
    orca.add_injectable("scenarios_dir", scenarios_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    # run list from settings file is dict with list of 'steps' and optional 'resume_after'
    run_list = setting('run_list')
    assert 'steps' in run_list, "Did not find steps in run_list"

    # list of steps and possible resume_after in run_list
    steps = run_list.get('steps')

    pipeline.run(models=steps, resume_after=None)

    # geo_crosswalk = pipeline.get_table('geo_crosswalk')
    # assert geo_crosswalk.index.name == 'TAZ'
    # assert 'FAF4' in geo_crosswalk.columns
    # assert 'FIPS' in geo_crosswalk.columns
    #
    assert os.path.exists(os.path.join(output_dir, 'naics_set.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Example #20
0
def test_mini_pipeline_run():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')

    setup_dirs(configs_dir)

    inject_settings(configs_dir,
                    households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
                    # use_shadow_pricing=True
                    )

    _MODELS = [
        'initialize_landuse',
        'compute_accessibility',
        'initialize_households',
        'school_location',
        'workplace_location',
        'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    regress_mini_auto()

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    regress_mini_mtf()

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "never checkpointed" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Example #21
0
def test_load_cached_accessibility():

    inject.clear_cache()
    inject.reinject_decorated_tables()

    data_dir = [
        os.path.join(os.path.dirname(__file__), 'data'),
        example_path('data')
    ]
    setup_dirs(data_dir=data_dir)

    #
    # add OPTIONAL ceched table accessibility to input_table_list
    # activitysim.abm.tables.land_use.accessibility() will load this table if listed here
    # presumably independently calculated outside activitysim or a cached copy created during a previous run
    #
    settings = config.read_settings_file('settings.yaml', mandatory=True)
    input_table_list = settings.get('input_table_list')
    input_table_list.append({
        'tablename': 'accessibility',
        'filename': 'cached_accessibility.csv',
        'index_col': 'zone_id'
    })
    inject_settings(households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
                    input_table_list=input_table_list)

    _MODELS = [
        'initialize_landuse',
        # 'compute_accessibility',  # we load accessibility table ordinarily created by compute_accessibility
        'initialize_households',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    accessibility_df = pipeline.get_table("accessibility")

    assert 'auPkRetail' in accessibility_df

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Example #22
0
def test_pipeline_run():

    inject.add_step('step1', steps.step1)
    inject.add_step('step2', steps.step2)
    inject.add_step('step3', steps.step3)
    inject.add_step('step_add_col', steps.step_add_col)
    inject.dump_state()

    _MODELS = [
        'step1', 'step2', 'step3',
        'step_add_col.table_name=table2;column_name=c2'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    checkpoints = pipeline.get_checkpoints()
    print("checkpoints\n", checkpoints)

    c2 = pipeline.get_table("table2").c2

    # get table from
    pipeline.get_table("table1", checkpoint_name="step3")

    # try to get a table from a step before it was checkpointed
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table2", checkpoint_name="step1")
    assert "not in checkpoint 'step1'" in str(excinfo.value)

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "never checkpointed" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table1", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()

    close_handlers()
Example #23
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..',
                               'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    settings = inject_settings(configs_dir,
                               households_sample_size=households_sample_size,
                               chunk_size=chunk_size,
                               trace_hh_id=trace_hh_id,
                               trace_od=trace_od,
                               check_for_variability=check_for_variability)

    orca.clear_cache()

    tracing.config_logger()

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    pipeline.close_pipeline()

    orca.clear_cache()

    return tour_count
Example #24
0
def test_weighting():

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..',
                               'example_survey_weighting', 'configs')
    inject.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), '..', '..',
                            'example_survey_weighting', 'data')
    inject.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    inject.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor', 'setup_data_structures',
        'initial_seed_balancing', 'meta_control_factoring',
        'final_seed_balancing', 'summarize', 'write_tables'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    summary_hh_weights = pipeline.get_table('summary_hh_weights')
    total_summary_hh_weights = summary_hh_weights[
        'SUBREGCluster_balanced_weight'].sum()

    seed_households = pd.read_csv(os.path.join(data_dir,
                                               'seed_households.csv'))
    total_seed_households_weights = seed_households['HHweight'].sum()

    assert abs(total_summary_hh_weights - total_seed_households_weights) < 1

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    inject.clear_cache()
Example #25
0
def test_full_run2_repop_replace():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor.table_list=repop_input_table_list',
        'repop_setup_data_structures',
        'initial_seed_balancing.final=true',
        'integerize_final_seed_weights.repop',
        'repop_balancing',
        'expand_households.repop;replace',
        'write_synthetic_population.repop',
        'write_tables.repop',
    ]

    pipeline.run(models=_MODELS, resume_after='summarize')

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Example #26
0
def test_full_run1():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor', 'setup_data_structures',
        'initial_seed_balancing', 'meta_control_factoring',
        'final_seed_balancing', 'integerize_final_seed_weights',
        'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ',
        'expand_households', 'synthesize_population', 'write_results',
        'summarize'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT

    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Example #27
0
def run():
    config.handle_standard_args()

    # specify None for a pseudo random base seed
    # inject.add_injectable('rng_base_seed', 0)

    tracing.config_logger()
    config.filter_warnings()

    tracing.delete_csv_files()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = setting('resume_after', None)

    if resume_after:
        print("resume_after", resume_after)

    pipeline.run(models=setting('models'), resume_after=resume_after)

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()
Example #28
0
def test_mini_pipeline_run():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    tracing.config_logger()

    # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    _MODELS = [
        'initialize', 'compute_accessibility', 'school_location_sample',
        'school_location_logsums', 'school_location_simulate',
        'workplace_location_sample', 'workplace_location_logsums',
        'workplace_location_simulate', 'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    auto_choice = pipeline.get_table("households").auto_ownership

    # regression test: these are among the first 10 households in households table
    hh_ids = [464138, 1918238, 2201602]
    choices = [0, 1, 2]
    expected_choice = pd.Series(choices,
                                index=pd.Index(hh_ids, name="HHID"),
                                name='auto_ownership')

    print "auto_choice\n", auto_choice.head(10)
    pdt.assert_series_equal(auto_choice[hh_ids], expected_choice)

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency

    # these choices are nonsensical as the test mandatory_tour_frequency spec is very truncated
    per_ids = [24375, 92744, 172491]
    choices = ['school2', 'work_and_school', 'work1']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='PERID'),
                                name='mandatory_tour_frequency')

    print "mtf_choice\n", mtf_choice.head(20)
    # mtf_choice
    # PERID
    # 23647                 NaN
    # 24203                 NaN
    # 24375             school2
    # 24687                 NaN
    # 24824                 NaN
    # 24975                 NaN
    # 25027                 NaN
    # 25117                 NaN
    # 25772                 NaN
    # 25871                 NaN
    # 26284                 NaN
    # 26863                 NaN
    # 27059                 NaN
    # 92233                 NaN
    # 92382             school1
    # 92744     work_and_school
    # 92823                 NaN
    # 93172             school2
    # 93774                 NaN
    # 172491              work1
    # Name: mandatory_tour_frequency, dtype: object
    pdt.assert_series_equal(mtf_choice[per_ids], expected_choice)

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "not in checkpointed tables" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()
    orca.clear_cache()

    close_handlers()
Example #29
0
from activitysim.core import pipeline
import extensions

handle_standard_args()

# comment out the line below to default base seed to 0 random seed
# so that run results are reproducible
# pipeline.set_rn_generator_base_seed(seed=None)

tracing.config_logger()

t0 = print_elapsed_time()

MODELS = setting('models')

# If you provide a resume_after argument to pipeline.run
# the pipeline manager will attempt to load checkpointed tables from the checkpoint store
# and resume pipeline processing on the next submodel step after the specified checkpoint
resume_after = setting('resume_after', None)

if resume_after:
    print "resume_after", resume_after

pipeline.run(models=MODELS, resume_after=resume_after)

# tables will no longer be available after pipeline is closed
pipeline.close_pipeline()

t0 = print_elapsed_time("all models", t0)
Example #30
0
from census_getter.util import setting

handle_standard_args()

tracing.config_logger()

t0 = print_elapsed_time()

logger = logging.getLogger('census_getter')

# get the run list (name was possibly specified on the command line with the -m option)
run_list_name = inject.get_injectable('run_list_name', 'run_list')

# run list from settings file is dict with list of 'steps' and optional 'resume_after'
run_list = setting(run_list_name)
assert 'steps' in run_list, "Did not find steps in run_list"

# list of steps and possible resume_after in run_list
steps = run_list.get('steps')
resume_after = run_list.get('resume_after', None)

if resume_after:
    print "resume_after", resume_after

pipeline.run(models=steps, resume_after=resume_after)

# tables will no longer be available after pipeline is closed
pipeline.close_pipeline()

t0 = ("all models", t0)
Example #31
0
def run(args):
    """
    Run the models. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'. Both '--config' and '--data' can be
    specified multiple times. Directories listed first take precedence.

    returns:
        int: sys.exit exit code
    """

    # register abm steps and other abm-specific injectables
    # by default, assume we are running activitysim.abm
    # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables
    # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command
    if not inject.is_injectable('preload_injectables'):
        from activitysim import abm  # register abm steps and other abm-specific injectables

    tracing.config_logger(basic=True)
    handle_standard_args(args)  # possibly update injectables

    # legacy support for run_list setting nested 'models' and 'resume_after' settings
    if config.setting('run_list'):
        warnings.warn(
            "Support for 'run_list' settings group will be removed.\n"
            "The run_list.steps setting is renamed 'models'.\n"
            "The run_list.resume_after setting is renamed 'resume_after'.\n"
            "Specify both 'models' and 'resume_after' directly in settings config file.",
            FutureWarning)
        run_list = config.setting('run_list')
        if 'steps' in run_list:
            assert not config.setting('models'), \
                f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!"
            config.override_setting('models', run_list['steps'])

        if 'resume_after' in run_list:
            assert not config.setting('resume_after'), \
                f"Don't expect 'resume_after' both in run_list and as stand-alone setting!"
            config.override_setting('resume_after', run_list['resume_after'])

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = config.setting('resume_after', None)

    # cleanup if not resuming
    if not resume_after:
        cleanup_output_files()
    elif config.setting('cleanup_trace_files_on_resume', False):
        tracing.delete_trace_files()

    tracing.config_logger(
        basic=False)  # update using possibly new logging configs
    config.filter_warnings()
    logging.captureWarnings(capture=True)

    # directories
    for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']:
        logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None)))

    log_settings = inject.get_injectable('log_settings', {})
    for k in log_settings:
        logger.info('SETTING %s: %s' % (k, config.setting(k)))

    t0 = tracing.print_elapsed_time()

    if config.setting('multiprocess', False):
        logger.info('run multiprocess simulation')

        from activitysim.core import mp_tasks
        run_list = mp_tasks.get_run_list()
        injectables = {k: inject.get_injectable(k) for k in INJECTABLES}
        mp_tasks.run_multiprocess(run_list, injectables)

        assert not pipeline.is_open()

        if config.setting('cleanup_pipeline_after_run', False):
            pipeline.cleanup_pipeline()

    else:
        logger.info('run single process simulation')

        pipeline.run(models=config.setting('models'),
                     resume_after=resume_after)

        if config.setting('cleanup_pipeline_after_run', False):
            pipeline.cleanup_pipeline(
            )  # has side effect of closing open pipeline
        else:
            pipeline.close_pipeline()

        chunk.log_write_hwm()

    tracing.print_elapsed_time('all models', t0)

    return 0