Ejemplo n.º 1
0
def test_mp_run():

    mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp')
    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir])

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    tracing.config_logger()

    run_list = mp_tasks.get_run_list()
    mp_tasks.print_run_list(run_list)

    # do this after config.handle_standard_args, as command line args may override injectables
    injectables = ['data_dir', 'configs_dir', 'output_dir']
    injectables = {k: inject.get_injectable(k) for k in injectables}

    # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])

    mp_tasks.run_multiprocess(run_list, injectables)
    pipeline.open_pipeline('_')
    regress_mini_auto()
    pipeline.close_pipeline()
Ejemplo n.º 2
0
def test_mp_run():

    mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp')
    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir])

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    tracing.config_logger()

    run_list = mp_tasks.get_run_list()
    mp_tasks.print_run_list(run_list)

    # do this after config.handle_standard_args, as command line args may override injectables
    injectables = ['data_dir', 'configs_dir', 'output_dir']
    injectables = {k: inject.get_injectable(k) for k in injectables}

    # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])

    mp_tasks.run_multiprocess(run_list, injectables)
    pipeline.open_pipeline('_')
    regress_mini_auto()
    pipeline.close_pipeline()
Ejemplo n.º 3
0
def test_config_logger(capsys):

    add_canonical_dirs()

    tracing.config_logger()

    logger = logging.getLogger('popsim')

    file_handlers = [
        h for h in logger.handlers if type(h) is logging.FileHandler
    ]
    assert len(file_handlers) == 1
    asim_logger_baseFilename = file_handlers[0].baseFilename

    print("handlers:", logger.handlers)

    logger.info('test_config_logger')
    logger.info('log_info')
    logger.warning('log_warn1')

    out, err = capsys.readouterr()

    # don't consume output
    print(out)

    assert "could not find conf file" not in out
    assert 'log_warn1' in out
    assert 'log_info' not in out

    with open(asim_logger_baseFilename, 'r') as content_file:
        content = content_file.read()
        print(content)
    assert 'log_warn1' in content
    assert 'log_info' in content
Ejemplo n.º 4
0
def setup_dirs(ancillary_configs_dir=None, data_dir=None):

    # ancillary_configs_dir is used by run_mp to test multiprocess

    test_pipeline_configs_dir = os.path.join(os.path.dirname(__file__),
                                             'configs')
    example_configs_dir = example_path('configs')
    configs_dir = [test_pipeline_configs_dir, example_configs_dir]

    if ancillary_configs_dir is not None:
        configs_dir = [ancillary_configs_dir] + configs_dir

    inject.add_injectable('configs_dir', configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable('output_dir', output_dir)

    if not data_dir:
        data_dir = example_path('data')

    inject.add_injectable('data_dir', data_dir)

    inject.clear_cache()

    tracing.config_logger()

    tracing.delete_output_files('csv')
    tracing.delete_output_files('txt')
    tracing.delete_output_files('yaml')
    tracing.delete_output_files('omx')
Ejemplo n.º 5
0
def test_full_run2():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs2')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data2')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor', 'setup_data_structures',
        'initial_seed_balancing', 'meta_control_factoring',
        'final_seed_balancing', 'integerize_final_seed_weights',
        'sub_balancing.geography = DISTRICT',
        'sub_balancing.geography = TRACT', 'sub_balancing.geography=TAZ',
        'expand_households', 'summarize', 'write_results'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    assert isinstance(pipeline.get_table('expanded_household_ids'),
                      pd.DataFrame)

    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Ejemplo n.º 6
0
def config_logger():
    """ActivitySim logger
    """
    _tracing.config_logger()
    _logging.captureWarnings(capture=True)
    _warnings.simplefilter("always")

    logger = _logging.getLogger('asimtbm')
    logger.info("Setup logger")
Ejemplo n.º 7
0
def run(args):
    """
    Run bca4abm. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'.

    """

    if args.working_dir and os.path.exists(args.working_dir):
        os.chdir(args.working_dir)

    if args.config:
        inject.add_injectable('configs_dir', args.config)

    if args.data:
        inject.add_injectable('data_dir', args.data)

    if args.output:
        inject.add_injectable('output_dir', args.output)

    for injectable in ['configs_dir', 'data_dir', 'output_dir']:
        try:
            dir_path = inject.get_injectable(injectable)
        except RuntimeError:
            sys.exit('Error: please specify either a --working_dir '
                     "containing 'configs', 'data', and 'output' folders "
                     'or all three of --config, --data, and --output')
        if not os.path.exists(dir_path):
            sys.exit("Could not find %s '%s'" % (injectable, os.path.abspath(dir_path)))

    if args.pipeline:
        inject.add_injectable('pipeline_file_name', args.pipeline)

    if args.resume:
        override_setting('resume_after', args.resume)

    tracing.config_logger()
    tracing.delete_csv_files()  # only modifies output_dir
    warnings.simplefilter('always')
    logging.captureWarnings(capture=True)

    t0 = tracing.print_elapsed_time()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = setting('resume_after', None)

    if resume_after:
        print('resume_after: %s' % resume_after)

    pipeline.run(models=setting('models'), resume_after=resume_after)

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    t0 = tracing.print_elapsed_time('all models', t0)
Ejemplo n.º 8
0
def test_zero_chunk_size():

    settings = inject_settings(chunk_size=0)

    inject.clear_cache()

    tracing.config_logger()

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after='aggregate_od_processor')

    pipeline.close_pipeline()
Ejemplo n.º 9
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..',
                               'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    inject_settings(configs_dir,
                    households_sample_size=households_sample_size,
                    chunk_size=chunk_size,
                    trace_hh_id=trace_hh_id,
                    trace_od=trace_od,
                    check_for_variability=check_for_variability)

    orca.clear_cache()

    tracing.config_logger()

    # assert orca.get_injectable("chunk_size") == chunk_size

    _MODELS = [
        'compute_accessibility', 'school_location_sample',
        'school_location_logsums', 'school_location_simulate',
        'workplace_location_sample', 'workplace_location_logsums',
        'workplace_location_simulate', 'auto_ownership_simulate',
        'cdap_simulate', 'mandatory_tour_frequency', 'mandatory_scheduling',
        'non_mandatory_tour_frequency', 'destination_choice',
        'non_mandatory_scheduling', 'tour_mode_choice_simulate',
        'create_simple_trips', 'trip_mode_choice_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    pipeline.close()

    orca.clear_cache()

    return tour_count
Ejemplo n.º 10
0
def run(args):
    """
    Run the models. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'. Both '--config' and '--data' can be
    specified multiple times. Directories listed first take precedence.

    """

    from activitysim import abm  # register injectables

    tracing.config_logger(basic=True)
    handle_standard_args(args)  # possibly update injectables
    tracing.config_logger(
        basic=False)  # update using possibly new logging configs
    config.filter_warnings()
    logging.captureWarnings(capture=True)

    log_settings()

    t0 = tracing.print_elapsed_time()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = config.setting('resume_after', None)

    # cleanup if not resuming
    if not resume_after:
        cleanup_output_files()
    elif config.setting('cleanup_trace_files_on_resume', False):
        tracing.delete_trace_files()

    if config.setting('multiprocess', False):
        logger.info('run multiprocess simulation')

        from activitysim.core import mp_tasks
        run_list = mp_tasks.get_run_list()
        injectables = {k: inject.get_injectable(k) for k in INJECTABLES}
        mp_tasks.run_multiprocess(run_list, injectables)
    else:
        logger.info('run single process simulation')

        pipeline.run(models=config.setting('models'),
                     resume_after=resume_after)
        pipeline.close_pipeline()
        chunk.log_write_hwm()

    tracing.print_elapsed_time('all models', t0)
Ejemplo n.º 11
0
def run_abm(models,
            resume_after=None,
            chunk_size=None,
            trace_hh_id=None,
            trace_od=None):

    settings = inject_settings(chunk_size=chunk_size,
                               trace_hh_id=trace_hh_id,
                               trace_od=trace_od)

    inject.clear_cache()

    tracing.config_logger()

    pipeline.run(models=models, resume_after=resume_after)
Ejemplo n.º 12
0
def test_run_4step():

    settings = inject_settings(chunk_size=None,
                               trace_hh_id=None,
                               trace_od=None)

    inject.clear_cache()

    tracing.config_logger()

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=None)

    pipeline.close_pipeline()
Ejemplo n.º 13
0
def test_full_run1():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor',
        'setup_data_structures',
        'initial_seed_balancing',
        'meta_control_factoring',
        'final_seed_balancing',
        'integerize_final_seed_weights',
        'sub_balancing.geography = TRACT',
        'sub_balancing.geography=TAZ',
        'expand_households',
        'summarize',
        'write_tables',
        'write_synthetic_population',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT

    # output_tables action: skip
    assert not os.path.exists(os.path.join(output_dir, 'households.csv'))
    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Ejemplo n.º 14
0
def setup():

    orca.orca._INJECTABLES.pop('skim_dict', None)
    orca.orca._INJECTABLES.pop('skim_stack', None)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    orca.clear_cache()

    tracing.config_logger()
Ejemplo n.º 15
0
def setup_dirs():

    configs_dir = os.path.join(os.path.dirname(__file__), "configs")
    mp_configs_dir = os.path.join(os.path.dirname(__file__), "configs_mp")
    inject.add_injectable("configs_dir", [mp_configs_dir, configs_dir])

    output_dir = os.path.join(os.path.dirname(__file__), "output")
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), "data")
    inject.add_injectable("data_dir", data_dir)

    tracing.config_logger()

    tracing.delete_output_files("csv")
    tracing.delete_output_files("txt")
    tracing.delete_output_files("yaml")
Ejemplo n.º 16
0
def setup_dirs(configs_dir):

    inject.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    inject.clear_cache()

    tracing.config_logger()

    tracing.delete_output_files('csv')
    tracing.delete_output_files('txt')
    tracing.delete_output_files('yaml')
Ejemplo n.º 17
0
def test_pipeline_run():

    orca.orca._INJECTABLES.pop('skim_dict', None)
    orca.orca._INJECTABLES.pop('skim_stack', None)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'step1',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    table1 = pipeline.get_table("table1").column1

    # test that model arg is passed to step
    pipeline.run_model('step2.table_name=table2')

    table2 = pipeline.get_table("table2").column1

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "not in checkpointed tables" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("table1", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()
    orca.clear_cache()

    close_handlers()
Ejemplo n.º 18
0
def setup_dirs(configs_dir, data_dir):

    print(f"configs_dir: {configs_dir}")
    inject.add_injectable('configs_dir', configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable('output_dir', output_dir)

    print(f"data_dir: {data_dir}")
    inject.add_injectable('data_dir', data_dir)

    inject.clear_cache()

    tracing.config_logger()

    tracing.delete_output_files('csv')
    tracing.delete_output_files('txt')
    tracing.delete_output_files('yaml')
    tracing.delete_output_files('omx')
Ejemplo n.º 19
0
def setup_function():

    inject.reinject_decorated_tables()

    inject.remove_injectable('skim_dict')
    inject.remove_injectable('skim_stack')

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    inject.clear_cache()

    tracing.config_logger()
Ejemplo n.º 20
0
def test_full_run1():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    # data_dir = os.path.join(os.path.dirname(__file__), 'data')
    data_dir = os.path.join(os.path.dirname(__file__),
                            '..', '..', 'example', 'data')
    orca.add_injectable("data_dir", data_dir)

    # scenarios_dir = os.path.join(os.path.dirname(__file__), 'scenarios')
    scenarios_dir = os.path.join(os.path.dirname(__file__),
                                 '..', '..', 'example', 'scenarios')
    orca.add_injectable("scenarios_dir", scenarios_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    # run list from settings file is dict with list of 'steps' and optional 'resume_after'
    run_list = setting('run_list')
    assert 'steps' in run_list, "Did not find steps in run_list"

    # list of steps and possible resume_after in run_list
    steps = run_list.get('steps')

    pipeline.run(models=steps, resume_after=None)

    # geo_crosswalk = pipeline.get_table('geo_crosswalk')
    # assert geo_crosswalk.index.name == 'TAZ'
    # assert 'FAF4' in geo_crosswalk.columns
    # assert 'FIPS' in geo_crosswalk.columns
    #
    assert os.path.exists(os.path.join(output_dir, 'naics_set.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Ejemplo n.º 21
0
def full_run(resume_after=None,
             chunk_size=0,
             households_sample_size=HOUSEHOLDS_SAMPLE_SIZE,
             trace_hh_id=None,
             trace_od=None,
             check_for_variability=None):

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..',
                               'example', 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    settings = inject_settings(configs_dir,
                               households_sample_size=households_sample_size,
                               chunk_size=chunk_size,
                               trace_hh_id=trace_hh_id,
                               trace_od=trace_od,
                               check_for_variability=check_for_variability)

    orca.clear_cache()

    tracing.config_logger()

    MODELS = settings['models']

    pipeline.run(models=MODELS, resume_after=resume_after)

    tours = pipeline.get_table('tours')
    tour_count = len(tours.index)

    pipeline.close_pipeline()

    orca.clear_cache()

    return tour_count
Ejemplo n.º 22
0
def test_weighting():

    configs_dir = os.path.join(os.path.dirname(__file__), '..', '..',
                               'example_survey_weighting', 'configs')
    inject.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), '..', '..',
                            'example_survey_weighting', 'data')
    inject.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    inject.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor', 'setup_data_structures',
        'initial_seed_balancing', 'meta_control_factoring',
        'final_seed_balancing', 'summarize', 'write_tables'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    summary_hh_weights = pipeline.get_table('summary_hh_weights')
    total_summary_hh_weights = summary_hh_weights[
        'SUBREGCluster_balanced_weight'].sum()

    seed_households = pd.read_csv(os.path.join(data_dir,
                                               'seed_households.csv'))
    total_seed_households_weights = seed_households['HHweight'].sum()

    assert abs(total_summary_hh_weights - total_seed_households_weights) < 1

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    inject.clear_cache()
Ejemplo n.º 23
0
def test_full_run2_repop_replace():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    orca.clear_cache()

    tracing.config_logger()

    _MODELS = [
        'input_pre_processor.table_list=repop_input_table_list',
        'repop_setup_data_structures',
        'initial_seed_balancing.final=true',
        'integerize_final_seed_weights.repop',
        'repop_balancing',
        'expand_households.repop;replace',
        'write_synthetic_population.repop',
        'write_tables.repop',
    ]

    pipeline.run(models=_MODELS, resume_after='summarize')

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_REPOP_COUNT

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    orca.clear_cache()
Ejemplo n.º 24
0
def run():
    config.handle_standard_args()

    # specify None for a pseudo random base seed
    # inject.add_injectable('rng_base_seed', 0)

    tracing.config_logger()
    config.filter_warnings()

    tracing.delete_csv_files()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = setting('resume_after', None)

    if resume_after:
        print("resume_after", resume_after)

    pipeline.run(models=setting('models'), resume_after=resume_after)

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()
Ejemplo n.º 25
0
def run(args):
    """
    Run the models. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'. Both '--config' and '--data' can be
    specified multiple times. Directories listed first take precedence.

    returns:
        int: sys.exit exit code
    """

    # register abm steps and other abm-specific injectables
    # by default, assume we are running activitysim.abm
    # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables
    # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command
    if not inject.is_injectable('preload_injectables'):
        from activitysim import abm  # register abm steps and other abm-specific injectables

    tracing.config_logger(basic=True)
    handle_standard_args(args)  # possibly update injectables

    # legacy support for run_list setting nested 'models' and 'resume_after' settings
    if config.setting('run_list'):
        warnings.warn(
            "Support for 'run_list' settings group will be removed.\n"
            "The run_list.steps setting is renamed 'models'.\n"
            "The run_list.resume_after setting is renamed 'resume_after'.\n"
            "Specify both 'models' and 'resume_after' directly in settings config file.",
            FutureWarning)
        run_list = config.setting('run_list')
        if 'steps' in run_list:
            assert not config.setting('models'), \
                f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!"
            config.override_setting('models', run_list['steps'])

        if 'resume_after' in run_list:
            assert not config.setting('resume_after'), \
                f"Don't expect 'resume_after' both in run_list and as stand-alone setting!"
            config.override_setting('resume_after', run_list['resume_after'])

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = config.setting('resume_after', None)

    # cleanup if not resuming
    if not resume_after:
        cleanup_output_files()
    elif config.setting('cleanup_trace_files_on_resume', False):
        tracing.delete_trace_files()

    tracing.config_logger(
        basic=False)  # update using possibly new logging configs
    config.filter_warnings()
    logging.captureWarnings(capture=True)

    # directories
    for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']:
        logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None)))

    log_settings = inject.get_injectable('log_settings', {})
    for k in log_settings:
        logger.info('SETTING %s: %s' % (k, config.setting(k)))

    t0 = tracing.print_elapsed_time()

    if config.setting('multiprocess', False):
        logger.info('run multiprocess simulation')

        from activitysim.core import mp_tasks
        run_list = mp_tasks.get_run_list()
        injectables = {k: inject.get_injectable(k) for k in INJECTABLES}
        mp_tasks.run_multiprocess(run_list, injectables)

        assert not pipeline.is_open()

        if config.setting('cleanup_pipeline_after_run', False):
            pipeline.cleanup_pipeline()

    else:
        logger.info('run single process simulation')

        pipeline.run(models=config.setting('models'),
                     resume_after=resume_after)

        if config.setting('cleanup_pipeline_after_run', False):
            pipeline.cleanup_pipeline(
            )  # has side effect of closing open pipeline
        else:
            pipeline.close_pipeline()

        chunk.log_write_hwm()

    tracing.print_elapsed_time('all models', t0)

    return 0
Ejemplo n.º 26
0
import os

os.getcwd()

os.chdir('rFirm')

from activitysim.core import inject_defaults

from activitysim.core import tracing
from activitysim.core import pipeline
from activitysim.core import inject
from activitysim.core.config import setting
import rFirm

tracing.config_logger()

pipeline.open_pipeline('_')

# pipeline.preload_injectables()


df = pipeline.get_table("table1").to_frame()


import pandas as pd

file_path = "/Users/jeff.doyle/work/rFirm/example/regression_data/results/firm_sim_types/outputs/Firms.csv"
rfirms = pd.read_csv(file_path, comment='#'). \
    rename(columns={'BusID': 'bus_id', 'TAZ1': 'TAZ', 'Model_EmpCat': 'model_emp_cat'}). \
    set_index('bus_id')
Ejemplo n.º 27
0

import pandas as pd
import numpy as np
import os
import time
import extensions

# you will want to configure this with the locations of the canonical datasets
DATA_REPO = "C:/projects/sandag-asim/toRSG/output/"
DATA_REPO = "E:/activitysim/project/output/"
DATA_REPO = "/Users/jeff.doyle/work/activitysim-data/sandag_zone/output/"

COMPARE_RESULTS = False

tracing.config_logger()
logger = logging.getLogger('activitysim')


@inject.injectable(override=True)
def output_dir():
    if not os.path.exists('output'):
        os.makedirs('output')  # make directory if needed
    return 'output'


@inject.injectable(override=True)
def data_dir():
    return os.path.join(DATA_REPO)

Ejemplo n.º 28
0
def test_mini_pipeline_run():

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    orca.add_injectable("configs_dir", configs_dir)

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    orca.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    orca.add_injectable("data_dir", data_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    orca.clear_cache()

    tracing.config_logger()

    # assert len(orca.get_table("households").index) == HOUSEHOLDS_SAMPLE_SIZE

    _MODELS = [
        'initialize', 'compute_accessibility', 'school_location_sample',
        'school_location_logsums', 'school_location_simulate',
        'workplace_location_sample', 'workplace_location_logsums',
        'workplace_location_simulate', 'auto_ownership_simulate'
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    auto_choice = pipeline.get_table("households").auto_ownership

    # regression test: these are among the first 10 households in households table
    hh_ids = [464138, 1918238, 2201602]
    choices = [0, 1, 2]
    expected_choice = pd.Series(choices,
                                index=pd.Index(hh_ids, name="HHID"),
                                name='auto_ownership')

    print "auto_choice\n", auto_choice.head(10)
    pdt.assert_series_equal(auto_choice[hh_ids], expected_choice)

    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    mtf_choice = pipeline.get_table("persons").mandatory_tour_frequency

    # these choices are nonsensical as the test mandatory_tour_frequency spec is very truncated
    per_ids = [24375, 92744, 172491]
    choices = ['school2', 'work_and_school', 'work1']
    expected_choice = pd.Series(choices,
                                index=pd.Index(per_ids, name='PERID'),
                                name='mandatory_tour_frequency')

    print "mtf_choice\n", mtf_choice.head(20)
    # mtf_choice
    # PERID
    # 23647                 NaN
    # 24203                 NaN
    # 24375             school2
    # 24687                 NaN
    # 24824                 NaN
    # 24975                 NaN
    # 25027                 NaN
    # 25117                 NaN
    # 25772                 NaN
    # 25871                 NaN
    # 26284                 NaN
    # 26863                 NaN
    # 27059                 NaN
    # 92233                 NaN
    # 92382             school1
    # 92744     work_and_school
    # 92823                 NaN
    # 93172             school2
    # 93774                 NaN
    # 172491              work1
    # Name: mandatory_tour_frequency, dtype: object
    pdt.assert_series_equal(mtf_choice[per_ids], expected_choice)

    # try to get a non-existant table
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("bogus")
    assert "not in checkpointed tables" in str(excinfo.value)

    # try to get an existing table from a non-existant checkpoint
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.get_table("households", checkpoint_name="bogus")
    assert "not in checkpoints" in str(excinfo.value)

    pipeline.close_pipeline()
    orca.clear_cache()

    close_handlers()
Ejemplo n.º 29
0
def run(args):
    """
    Run the models. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'. Both '--config' and '--data' can be
    specified multiple times. Directories listed first take precedence.

    returns:
        int: sys.exit exit code
    """

    # register abm steps and other abm-specific injectables
    # by default, assume we are running activitysim.abm
    # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables
    # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command
    if not inject.is_injectable('preload_injectables'):
        from activitysim import abm  # register abm steps and other abm-specific injectables

    tracing.config_logger(basic=True)
    handle_standard_args(args)  # possibly update injectables

    # legacy support for run_list setting nested 'models' and 'resume_after' settings
    if config.setting('run_list'):
        warnings.warn(
            "Support for 'run_list' settings group will be removed.\n"
            "The run_list.steps setting is renamed 'models'.\n"
            "The run_list.resume_after setting is renamed 'resume_after'.\n"
            "Specify both 'models' and 'resume_after' directly in settings config file.",
            FutureWarning)
        run_list = config.setting('run_list')
        if 'steps' in run_list:
            assert not config.setting('models'), \
                f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!"
            config.override_setting('models', run_list['steps'])

        if 'resume_after' in run_list:
            assert not config.setting('resume_after'), \
                f"Don't expect 'resume_after' both in run_list and as stand-alone setting!"
            config.override_setting('resume_after', run_list['resume_after'])

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = config.setting('resume_after', None)

    # cleanup if not resuming
    if not resume_after:
        cleanup_output_files()
    elif config.setting('cleanup_trace_files_on_resume', False):
        tracing.delete_trace_files()

    tracing.config_logger(
        basic=False)  # update using possibly new logging configs
    config.filter_warnings()
    logging.captureWarnings(capture=True)

    # directories
    for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']:
        logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None)))

    log_settings = inject.get_injectable('log_settings', {})
    for k in log_settings:
        logger.info('SETTING %s: %s' % (k, config.setting(k)))

    # OMP_NUM_THREADS: openmp
    # OPENBLAS_NUM_THREADS: openblas
    # MKL_NUM_THREADS: mkl
    for env in ['MKL_NUM_THREADS', 'OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS']:
        logger.info(f"ENV {env}: {os.getenv(env)}")

    np_info_keys = [
        'atlas_blas_info', 'atlas_blas_threads_info', 'atlas_info',
        'atlas_threads_info', 'blas_info', 'blas_mkl_info', 'blas_opt_info',
        'lapack_info', 'lapack_mkl_info', 'lapack_opt_info', 'mkl_info'
    ]

    for cfg_key in np_info_keys:
        info = np.__config__.get_info(cfg_key)
        if info:
            for info_key in ['libraries']:
                if info_key in info:
                    logger.info(
                        f"NUMPY {cfg_key} {info_key}: {info[info_key]}")

    t0 = tracing.print_elapsed_time()

    try:
        if config.setting('multiprocess', False):
            logger.info('run multiprocess simulation')

            from activitysim.core import mp_tasks
            injectables = {k: inject.get_injectable(k) for k in INJECTABLES}
            mp_tasks.run_multiprocess(injectables)

            assert not pipeline.is_open()

            if config.setting('cleanup_pipeline_after_run', False):
                pipeline.cleanup_pipeline()

        else:
            logger.info('run single process simulation')

            pipeline.run(models=config.setting('models'),
                         resume_after=resume_after)

            if config.setting('cleanup_pipeline_after_run', False):
                pipeline.cleanup_pipeline(
                )  # has side effect of closing open pipeline
            else:
                pipeline.close_pipeline()

            mem.log_global_hwm()  # main process
    except Exception:
        # log time until error and the error traceback
        tracing.print_elapsed_time('all models until this error', t0)
        logger.exception('activitysim run encountered an unrecoverable error')
        raise

    chunk.consolidate_logs()
    mem.consolidate_logs()

    tracing.print_elapsed_time('all models', t0)

    return 0