Esempio n. 1
0
def run(args):
    """
    Run bca4abm. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'.

    """

    if args.working_dir and os.path.exists(args.working_dir):
        os.chdir(args.working_dir)

    if args.config:
        inject.add_injectable('configs_dir', args.config)

    if args.data:
        inject.add_injectable('data_dir', args.data)

    if args.output:
        inject.add_injectable('output_dir', args.output)

    for injectable in ['configs_dir', 'data_dir', 'output_dir']:
        try:
            dir_path = inject.get_injectable(injectable)
        except RuntimeError:
            sys.exit('Error: please specify either a --working_dir '
                     "containing 'configs', 'data', and 'output' folders "
                     'or all three of --config, --data, and --output')
        if not os.path.exists(dir_path):
            sys.exit("Could not find %s '%s'" % (injectable, os.path.abspath(dir_path)))

    if args.pipeline:
        inject.add_injectable('pipeline_file_name', args.pipeline)

    if args.resume:
        override_setting('resume_after', args.resume)

    tracing.config_logger()
    tracing.delete_csv_files()  # only modifies output_dir
    warnings.simplefilter('always')
    logging.captureWarnings(capture=True)

    t0 = tracing.print_elapsed_time()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = setting('resume_after', None)

    if resume_after:
        print('resume_after: %s' % resume_after)

    pipeline.run(models=setting('models'), resume_after=resume_after)

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    t0 = tracing.print_elapsed_time('all models', t0)
Esempio n. 2
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into eh pipeline
    """

    logger.info("preload_injectables")

    t0 = tracing.print_elapsed_time()

    if inject.get_injectable('skim_dict', None) is not None:
        t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)

    if inject.get_injectable('skim_stack', None) is not None:
        t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)
Esempio n. 3
0
def run(args):
    """
    Run the models. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'. Both '--config' and '--data' can be
    specified multiple times. Directories listed first take precedence.

    """

    from activitysim import abm  # register injectables

    tracing.config_logger(basic=True)
    handle_standard_args(args)  # possibly update injectables
    tracing.config_logger(
        basic=False)  # update using possibly new logging configs
    config.filter_warnings()
    logging.captureWarnings(capture=True)

    log_settings()

    t0 = tracing.print_elapsed_time()

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = config.setting('resume_after', None)

    # cleanup if not resuming
    if not resume_after:
        cleanup_output_files()
    elif config.setting('cleanup_trace_files_on_resume', False):
        tracing.delete_trace_files()

    if config.setting('multiprocess', False):
        logger.info('run multiprocess simulation')

        from activitysim.core import mp_tasks
        run_list = mp_tasks.get_run_list()
        injectables = {k: inject.get_injectable(k) for k in INJECTABLES}
        mp_tasks.run_multiprocess(run_list, injectables)
    else:
        logger.info('run single process simulation')

        pipeline.run(models=config.setting('models'),
                     resume_after=resume_after)
        pipeline.close_pipeline()
        chunk.log_write_hwm()

    tracing.print_elapsed_time('all models', t0)
Esempio n. 4
0
def initialize_households():

    trace_label = 'initialize_households'

    model_settings = config.read_model_settings('initialize_households.yaml', mandatory=True)
    annotate_tables(model_settings, trace_label)

    # - initialize shadow_pricing size tables after annotating household and person tables
    # since these are scaled to model size, they have to be created while single-process
    shadow_pricing.add_size_tables()

    # - preload person_windows
    t0 = tracing.print_elapsed_time()
    inject.get_table('person_windows').to_frame()
    t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
Esempio n. 5
0
def annotate_tables(model_settings, trace_label):

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning("annotate_tables setting is empty - nothing to do!")

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']
        df = inject.get_table(tablename).to_frame()

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:
            logger.info("renaming %s columns %s" % (tablename, column_map,))
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info("annotated %s SPEC %s" % (tablename, annotate['SPEC'],))
            expressions.assign_columns(
                df=df,
                model_settings=annotate,
                trace_label=trace_label)

        # fixme - narrow?

        # - write table to pipeline
        pipeline.replace_table(tablename, df)
Esempio n. 6
0
def preload_injectables():
    """
    called after pipeline is
    """

    # could simply list injectables as arguments, but this way we can report timing...

    logger.info("preload_injectables")

    t0 = tracing.print_elapsed_time()

    if inject.get_injectable('skim_dict', None) is not None:
        t0 = tracing.print_elapsed_time("preload skim_dict")

    if inject.get_injectable('skim_stack', None) is not None:
        t0 = tracing.print_elapsed_time("preload skim_stack")
Esempio n. 7
0
def initialize_households():

    trace_label = 'initialize_households'

    model_settings = config.read_model_settings('initialize_households.yaml',
                                                mandatory=True)
    annotate_tables(model_settings, trace_label)

    # - initialize shadow_pricing size tables after annotating household and person tables
    # since these are scaled to model size, they have to be created while single-process
    shadow_pricing.add_size_tables()

    # - preload person_windows
    t0 = tracing.print_elapsed_time()
    inject.get_table('person_windows').to_frame()
    t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
Esempio n. 8
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into the pipeline
    """

    logger.info("preload_injectables")

    inject.add_step('track_skim_usage', track_skim_usage)
    inject.add_step('write_data_dictionary', write_data_dictionary)
    inject.add_step('write_tables', write_tables)

    table_list = config.setting('input_table_list')

    # default ActivitySim table names and indices
    if table_list is None:
        logger.warn("No 'input_table_list' found in settings. This will be a "
                    "required setting in upcoming versions of ActivitySim.")

        new_settings = inject.get_injectable('settings')
        new_settings['input_table_list'] = DEFAULT_TABLE_LIST
        inject.add_injectable('settings', new_settings)

    t0 = tracing.print_elapsed_time()

    # FIXME - still want to do this?
    # if inject.get_injectable('skim_dict', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)
    #
    # if inject.get_injectable('skim_stack', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)

    return True
Esempio n. 9
0
def annotate_tables(model_settings, trace_label):

    trace_label = tracing.extend_trace_label(trace_label, 'annotate_tables')

    chunk.log_rss(trace_label)

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning(
            f"{trace_label} - annotate_tables setting is empty - nothing to do!"
        )

    assert isinstance(annotate_tables, list), \
        f"annotate_tables settings should be a list but is {type(annotate_tables)}"

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']

        chunk.log_rss(f"{trace_label}.pre-get_table.{tablename}")

        df = inject.get_table(tablename).to_frame()
        chunk.log_df(trace_label, tablename, df)

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(
                f"Setting 'column_map' has been changed to 'rename_columns'. "
                f"Support for 'column_map' in annotate_tables  will be removed in future versions.",
                FutureWarning)

            logger.info(
                f"{trace_label} - renaming {tablename} columns {column_map}")
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info(
                f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}"
            )
            expressions.assign_columns(df=df,
                                       model_settings=annotate,
                                       trace_label=trace_label)

        chunk.log_df(trace_label, tablename, df)

        # - write table to pipeline
        pipeline.replace_table(tablename, df)

        del df
        chunk.log_df(trace_label, tablename, None)
Esempio n. 10
0
def initialize():
    """
    Because random seed is set differently for each step, the sampling of households depends
    on which step they are initially loaded in so we force them to load here and they get
    stored to the pipeline,
    """

    t0 = tracing.print_elapsed_time()
    inject.get_table('land_use').to_frame()
    t0 = tracing.print_elapsed_time("preload land_use", t0, debug=True)

    inject.get_table('households').to_frame()
    t0 = tracing.print_elapsed_time("preload households", t0, debug=True)

    inject.get_table('persons').to_frame()
    t0 = tracing.print_elapsed_time("preload persons", t0, debug=True)

    inject.get_table('person_windows').to_frame()
    t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
Esempio n. 11
0
def load_skims(omx_file_path, skim_info, skim_buffers):

    read_cache = config.setting('read_skim_cache')
    write_cache = config.setting('write_skim_cache')
    assert not (read_cache and write_cache), \
        "read_skim_cache and write_skim_cache are both True in settings file. I am assuming this is a mistake"

    skim_data = skim_data_from_buffers(skim_buffers, skim_info)

    t0 = tracing.print_elapsed_time()

    if read_cache:
        read_skim_cache(skim_info, skim_data)
        t0 = tracing.print_elapsed_time("read_skim_cache", t0)
    else:
        read_skims_from_omx(skim_info, skim_data, omx_file_path)
        t0 = tracing.print_elapsed_time("read_skims_from_omx", t0)

    if write_cache:
        write_skim_cache(skim_info, skim_data)
        t0 = tracing.print_elapsed_time("write_skim_cache", t0)
Esempio n. 12
0
def choose_parking_location(segment_name, trips, alternatives, model_settings,
                            want_sample_table, skims, chunk_size, trace_hh_id,
                            trace_label):

    logger.info("choose_parking_location %s with %d trips", trace_label,
                trips.shape[0])

    t0 = print_elapsed_time()

    alt_dest_col_name = model_settings['ALT_DEST_COL_NAME']
    destination_sample = interaction_dataset(trips,
                                             alternatives,
                                             alt_index_id=alt_dest_col_name)
    destination_sample.index = np.repeat(trips.index.values, len(alternatives))
    destination_sample.index.name = trips.index.name
    destination_sample = destination_sample[[alt_dest_col_name]].copy()

    # # - trip_destination_simulate
    destinations = parking_destination_simulate(
        segment_name=segment_name,
        trips=trips,
        destination_sample=destination_sample,
        model_settings=model_settings,
        skims=skims,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    if want_sample_table:
        # FIXME - sample_table
        destination_sample.set_index(model_settings['ALT_DEST_COL_NAME'],
                                     append=True,
                                     inplace=True)
    else:
        destination_sample = None

    t0 = print_elapsed_time("%s.parking_location_simulate" % trace_label, t0)

    return destinations, destination_sample
Esempio n. 13
0
def initialize():
    """

    Because random seed is set differently for each step, the sampling of households depends
    on which step they are initially loaded in.

    We load them explicitly up front, so that
    """

    t0 = tracing.print_elapsed_time()
    inject.get_table('land_use').to_frame()
    t0 = tracing.print_elapsed_time("preload land_use")

    inject.get_table('households').to_frame()
    t0 = tracing.print_elapsed_time("preload households")

    inject.get_table('persons').to_frame()
    t0 = tracing.print_elapsed_time("preload persons")

    inject.get_table('person_windows').to_frame()
    t0 = tracing.print_elapsed_time("preload person_windows")

    pass
Esempio n. 14
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into the pipeline
    """

    logger.info("preload_injectables")

    inject.add_step('track_skim_usage', track_skim_usage)
    inject.add_step('write_data_dictionary', write_data_dictionary)
    inject.add_step('write_tables', write_tables)

    table_list = config.setting('input_table_list')

    # default ActivitySim table names and indices
    if table_list is None:
        logger.warning(
            "No 'input_table_list' found in settings. This will be a "
            "required setting in upcoming versions of ActivitySim.")

        new_settings = inject.get_injectable('settings')
        new_settings['input_table_list'] = DEFAULT_TABLE_LIST
        inject.add_injectable('settings', new_settings)

    # FIXME undocumented feature
    if config.setting('write_raw_tables'):

        # write raw input tables as csv (before annotation)
        csv_dir = config.output_file_path('raw_tables')
        if not os.path.exists(csv_dir):
            os.makedirs(csv_dir)  # make directory if needed

        table_names = [t['tablename'] for t in table_list]
        for t in table_names:
            df = inject.get_table(t).to_frame()
            if t == 'households':
                df.drop(columns='chunk_id', inplace=True)
            df.to_csv(os.path.join(csv_dir, '%s.csv' % t), index=True)

    t0 = tracing.print_elapsed_time()

    # FIXME - still want to do this?
    # if inject.get_injectable('skim_dict', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)
    #
    # if inject.get_injectable('skim_stack', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)

    return True
Esempio n. 15
0
def annotate_tables(model_settings, trace_label):

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning("annotate_tables setting is empty - nothing to do!")

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']
        df = inject.get_table(tablename).to_frame()

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(
                "annotate_tables option 'column_map' renamed 'rename_columns' and moved"
                "to settings.yaml. Support for 'column_map' in annotate_tables will be "
                "removed in future versions.", FutureWarning)

            logger.info("renaming %s columns %s" % (
                tablename,
                column_map,
            ))
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info("annotated %s SPEC %s" % (
                tablename,
                annotate['SPEC'],
            ))
            expressions.assign_columns(df=df,
                                       model_settings=annotate,
                                       trace_label=trace_label)

        # fixme - narrow?

        # - write table to pipeline
        pipeline.replace_table(tablename, df)
Esempio n. 16
0
def annotate_tables(model_settings, trace_label):

    annotate_tables = model_settings.get('annotate_tables', [])

    if not annotate_tables:
        logger.warning(f"{trace_label} - annotate_tables setting is empty - nothing to do!")

    assert isinstance(annotate_tables, list), \
        f"annotate_tables settings should be a list but is {type(annotate_tables)}"

    t0 = tracing.print_elapsed_time()

    for table_info in annotate_tables:

        tablename = table_info['tablename']

        df = inject.get_table(tablename).to_frame()

        # - rename columns
        column_map = table_info.get('column_map', None)
        if column_map:

            warnings.warn(f"{trace_label} - annotate_tables option 'column_map' renamed 'rename_columns' "
                          f"and moved to global settings file. Support for 'column_map' in annotate_tables "
                          f"will be removed in future versions.",
                          FutureWarning)

            logger.info(f"{trace_label} - renaming {tablename} columns {column_map}")
            df.rename(columns=column_map, inplace=True)

        # - annotate
        annotate = table_info.get('annotate', None)
        if annotate:
            logger.info(f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}")
            expressions.assign_columns(
                df=df,
                model_settings=annotate,
                trace_label=trace_label)

        # fixme - narrow?

        # - write table to pipeline
        pipeline.replace_table(tablename, df)
Esempio n. 17
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into the pipeline
    """

    logger.info("preload_injectables")

    inject.add_step('track_skim_usage', track_skim_usage)
    inject.add_step('write_data_dictionary', write_data_dictionary)
    inject.add_step('write_tables', write_tables)

    t0 = tracing.print_elapsed_time()

    # FIXME - still want to do this?
    # if inject.get_injectable('skim_dict', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)
    #
    # if inject.get_injectable('skim_stack', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)

    return True
Esempio n. 18
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into the pipeline
    """

    logger.info("preload_injectables")

    inject.add_step('track_skim_usage', track_skim_usage)
    inject.add_step('write_data_dictionary', write_data_dictionary)
    inject.add_step('write_tables', write_tables)

    t0 = tracing.print_elapsed_time()

    # FIXME - still want to do this?
    # if inject.get_injectable('skim_dict', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)
    #
    # if inject.get_injectable('skim_stack', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)

    return True
Esempio n. 19
0
def run(args):
    """
    Run the models. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'. Both '--config' and '--data' can be
    specified multiple times. Directories listed first take precedence.

    returns:
        int: sys.exit exit code
    """

    # register abm steps and other abm-specific injectables
    # by default, assume we are running activitysim.abm
    # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables
    # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command
    if not inject.is_injectable('preload_injectables'):
        from activitysim import abm  # register abm steps and other abm-specific injectables

    tracing.config_logger(basic=True)
    handle_standard_args(args)  # possibly update injectables

    # legacy support for run_list setting nested 'models' and 'resume_after' settings
    if config.setting('run_list'):
        warnings.warn(
            "Support for 'run_list' settings group will be removed.\n"
            "The run_list.steps setting is renamed 'models'.\n"
            "The run_list.resume_after setting is renamed 'resume_after'.\n"
            "Specify both 'models' and 'resume_after' directly in settings config file.",
            FutureWarning)
        run_list = config.setting('run_list')
        if 'steps' in run_list:
            assert not config.setting('models'), \
                f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!"
            config.override_setting('models', run_list['steps'])

        if 'resume_after' in run_list:
            assert not config.setting('resume_after'), \
                f"Don't expect 'resume_after' both in run_list and as stand-alone setting!"
            config.override_setting('resume_after', run_list['resume_after'])

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = config.setting('resume_after', None)

    # cleanup if not resuming
    if not resume_after:
        cleanup_output_files()
    elif config.setting('cleanup_trace_files_on_resume', False):
        tracing.delete_trace_files()

    tracing.config_logger(
        basic=False)  # update using possibly new logging configs
    config.filter_warnings()
    logging.captureWarnings(capture=True)

    # directories
    for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']:
        logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None)))

    log_settings = inject.get_injectable('log_settings', {})
    for k in log_settings:
        logger.info('SETTING %s: %s' % (k, config.setting(k)))

    # OMP_NUM_THREADS: openmp
    # OPENBLAS_NUM_THREADS: openblas
    # MKL_NUM_THREADS: mkl
    for env in ['MKL_NUM_THREADS', 'OMP_NUM_THREADS', 'OPENBLAS_NUM_THREADS']:
        logger.info(f"ENV {env}: {os.getenv(env)}")

    np_info_keys = [
        'atlas_blas_info', 'atlas_blas_threads_info', 'atlas_info',
        'atlas_threads_info', 'blas_info', 'blas_mkl_info', 'blas_opt_info',
        'lapack_info', 'lapack_mkl_info', 'lapack_opt_info', 'mkl_info'
    ]

    for cfg_key in np_info_keys:
        info = np.__config__.get_info(cfg_key)
        if info:
            for info_key in ['libraries']:
                if info_key in info:
                    logger.info(
                        f"NUMPY {cfg_key} {info_key}: {info[info_key]}")

    t0 = tracing.print_elapsed_time()

    try:
        if config.setting('multiprocess', False):
            logger.info('run multiprocess simulation')

            from activitysim.core import mp_tasks
            injectables = {k: inject.get_injectable(k) for k in INJECTABLES}
            mp_tasks.run_multiprocess(injectables)

            assert not pipeline.is_open()

            if config.setting('cleanup_pipeline_after_run', False):
                pipeline.cleanup_pipeline()

        else:
            logger.info('run single process simulation')

            pipeline.run(models=config.setting('models'),
                         resume_after=resume_after)

            if config.setting('cleanup_pipeline_after_run', False):
                pipeline.cleanup_pipeline(
                )  # has side effect of closing open pipeline
            else:
                pipeline.close_pipeline()

            mem.log_global_hwm()  # main process
    except Exception:
        # log time until error and the error traceback
        tracing.print_elapsed_time('all models until this error', t0)
        logger.exception('activitysim run encountered an unrecoverable error')
        raise

    chunk.consolidate_logs()
    mem.consolidate_logs()

    tracing.print_elapsed_time('all models', t0)

    return 0
Esempio n. 20
0
    data_dir = '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data'
    data_dir = '../example/data'

    # inject.add_injectable('data_dir', '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data')
    inject.add_injectable('data_dir', ['ancillary_data', data_dir])
    # inject.add_injectable('data_dir', ['ancillary_data', '../activitysim/abm/test/data'])
    inject.add_injectable('configs_dir', ['configs', '../example/configs'])

    injectables = config.handle_standard_args()

    tracing.config_logger()
    config.filter_warnings()

    log_settings(injectables)

    t0 = tracing.print_elapsed_time()

    # cleanup if not resuming
    if not config.setting('resume_after', False):
        cleanup_output_files()

    run_list = mp_tasks.get_run_list()

    if run_list['multiprocess']:
        # do this after config.handle_standard_args, as command line args may override injectables
        injectables = list(set(injectables) | set(['data_dir', 'configs_dir', 'output_dir']))
        injectables = {k: inject.get_injectable(k) for k in injectables}
    else:
        injectables = None

    run(run_list, injectables)
Esempio n. 21
0
    data_dir = '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data'
    data_dir = '../example/data'

    # inject.add_injectable('data_dir', '/Users/jeff.doyle/work/activitysim-data/mtc_tm1/data')
    inject.add_injectable('data_dir', ['ancillary_data', data_dir])
    # inject.add_injectable('data_dir', ['ancillary_data', '../activitysim/abm/test/data'])
    inject.add_injectable('configs_dir', ['configs', '../example/configs'])

    injectables = config.handle_standard_args()

    tracing.config_logger()
    config.filter_warnings()

    log_settings(injectables)

    t0 = tracing.print_elapsed_time()

    # cleanup if not resuming
    if not config.setting('resume_after', False):
        cleanup_output_files()

    run_list = mp_tasks.get_run_list()

    if run_list['multiprocess']:
        # do this after config.handle_standard_args, as command line args may override injectables
        injectables = list(
            set(injectables) | set(['data_dir', 'configs_dir', 'output_dir']))
        injectables = {k: inject.get_injectable(k) for k in injectables}
    else:
        injectables = None
def non_mandatory_tour_frequency(persons_merged,
                                 non_mandatory_tour_frequency_alts,
                                 non_mandatory_tour_frequency_spec,
                                 non_mandatory_tour_frequency_settings,
                                 chunk_size,
                                 trace_hh_id):

    """
    This model predicts the frequency of making non-mandatory trips
    (alternatives for this model come from a separate csv file which is
    configured by the user) - these trips include escort, shopping, othmaint,
    othdiscr, eatout, and social trips in various combination.
    """

    t0 = print_elapsed_time()

    choosers = persons_merged.to_frame()
    alts = non_mandatory_tour_frequency_alts.to_frame()

    # filter based on results of CDAP
    choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])]

    logger.info("Running non_mandatory_tour_frequency with %d persons" % len(choosers))

    constants = config.get_model_constants(non_mandatory_tour_frequency_settings)

    choices_list = []
    # segment by person type and pick the right spec for each person type
    for name, segment in choosers.groupby('ptype_cat'):

        logger.info("Running segment '%s' of size %d" % (name, len(segment)))

        choices = asim.interaction_simulate(
            segment,
            alts,
            # notice that we pick the column for the segment for each segment we run
            spec=non_mandatory_tour_frequency_spec[[name]],
            locals_d=constants,
            chunk_size=chunk_size,
            trace_label=trace_hh_id and 'non_mandatory_tour_frequency.%s' % name,
            trace_choice_name='non_mandatory_tour_frequency')

        choices_list.append(choices)

        t0 = print_elapsed_time("non_mandatory_tour_frequency.%s" % name, t0)

        # FIXME - force garbage collection
        # mem = asim.memory_info()
        # logger.info('memory_info ptype %s, %s' % (name, mem))

    choices = pd.concat(choices_list)

    # FIXME - no need to reindex?
    orca.add_column("persons", "non_mandatory_tour_frequency", choices)

    create_non_mandatory_tours_table()

    pipeline.add_dependent_columns("persons", "persons_nmtf")

    if trace_hh_id:
        trace_columns = ['non_mandatory_tour_frequency']
        tracing.trace_df(orca.get_table('persons_merged').to_frame(),
                         label="non_mandatory_tour_frequency",
                         columns=trace_columns,
                         warn_if_empty=True)
Esempio n. 23
0
def build_cdap_spec(interaction_coefficients, hhsize,
                    trace_spec=False, trace_label=None, cache=True):
    """
    Build a spec file for computing utilities of alternative household member interaction patterns
    for households of specified size.

    We generate this spec automatically from a table of rules and coefficients because the
    interaction rules are fairly simple and can be expressed compactly whereas
    there is a lot of redundancy between the spec files for different household sizes, as well as
    in the vectorized expression of the interaction alternatives within the spec file itself

    interaction_coefficients has five columns:
        activity
            A single character activity type name (M, N, or H)
        interaction_ptypes
            List of ptypes in the interaction (in order of increasing ptype) or empty for wildcards
            (meaning that the interaction applies to all ptypes in that size hh)
        cardinality
            the number of persons in the interaction (e.g. 3 for a 3-way interaction)
        slug
            a human friendly efficient name so we can dump a readable spec trace file for debugging
            this slug is replaced with the numerical coefficient value after we dump the trace file
        coefficient
            The coefficient to apply for all hh interactions for this activity and set of ptypes

    The generated spec will have the eval expression in the index, and a utility column for each
    alternative (e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize 2)

    In order to be able to dump the spec in a human-friendly fashion to facilitate debugging the
    cdap_interaction_coefficients table, we first populate utility columns in the spec file
    with the coefficient slugs, dump the spec file, and then replace the slugs with coefficients.

    Parameters
    ----------
    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes
    hhsize : int
        household size for which the spec should be built.

    Returns
    -------
    spec: pandas.DataFrame

    """

    t0 = tracing.print_elapsed_time()

    # if DUMP:
    #     # dump the interaction_coefficients table because it has been preprocessed
    #     tracing.trace_df(interaction_coefficients,
    #                      '%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize),
    #                      transpose=False, slicer='NONE')

    # cdap spec is same for all households of MAX_HHSIZE and greater
    hhsize = min(hhsize, MAX_HHSIZE)

    if cache:
        spec = get_cached_spec(hhsize)
        if spec is not None:
            return spec

    expression_name = "Expression"

    # generate a list of activity pattern alternatives for this hhsize
    # e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize=2
    alternatives = [''.join(tup) for tup in itertools.product('HMN', repeat=hhsize)]

    # spec df has expression column plus a column for each alternative
    spec = pd.DataFrame(columns=[expression_name] + alternatives)

    # Before processing the interaction_coefficients, we add add rows to the spec to carry
    # the alternative utilities previously computed for each individual into all hh alternative
    # columns in which the individual assigned that alternative. The Expression column contains
    # the name of the choosers column with that individuals utility for the individual alternative
    # and the hh alternative columns that should receive that utility are given a value of 1
    # e.g. M_p1 is a column in choosers with the individual utility to person p1 of alternative M
    #   Expression   MM   MN   MH   NM   NN   NH   HM   HN   HH
    #         M_p1  1.0  1.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0
    #         N_p1  0.0  0.0  0.0  1.0  1.0  1.0  0.0  0.0  0.0
    for pnum in range(1, hhsize+1):
        for activity in ['M', 'N', 'H']:

            new_row_index = len(spec)
            spec.loc[new_row_index, expression_name] = add_pn(activity, pnum)

            # list of alternative columns where person pnum has expression activity
            # e.g. for M_p1 we want the columns where activity M is in position p1
            alternative_columns = [alt for alt in alternatives if alt[pnum - 1] == activity]
            spec.loc[new_row_index, alternative_columns] = 1

    # ignore rows whose cardinality exceeds hhsize
    relevant_rows = interaction_coefficients.cardinality <= hhsize

    # for each row in the interaction_coefficients table
    for row in interaction_coefficients[relevant_rows].itertuples():

        # if it is a wildcard all_people interaction
        if not row.interaction_ptypes:

            # wildcard interactions only apply if the interaction includes all household members
            # this will be the case if the cardinality of the wildcard equals the hhsize
            # conveniently, the slug is given the name of the alternative column (e.g. HHHH)

            # conveniently, for wildcards, the slug has been assigned the name of the alternative
            # (e.g. HHHH) that it applies to, since the interaction includes all household members
            # and there are no ptypes to append to it

            # FIXME - should we be doing this for greater than HH_MAXSIZE households?
            if row.slug in alternatives:
                spec.loc[len(spec), [expression_name, row.slug]] = ['1', row.slug]

            continue

        if not (0 <= row.cardinality <= MAX_INTERACTION_CARDINALITY):
            raise RuntimeError("Bad row cardinality %d for %s" % (row.cardinality, row.slug))

        # for all other interaction rules, we need to generate a row in the spec for each
        # possible combination of interacting persons
        # e.g. for (1, 2), (1,3), (2,3) for a coefficient with cardinality 2 in hhsize 3
        for tup in itertools.combinations(list(range(1, hhsize+1)), row.cardinality):

            # determine the name of the chooser column with the ptypes for this interaction
            if row.cardinality == 1:
                interaction_column = "ptype_p%d" % tup[0]
            else:
                # column named (e.g.) p1_p3 for an interaction between p1 and p3
                interaction_column = '_'.join(['p%s' % pnum for pnum in tup])

            # build expression that evaluates True iff the interaction is between specified ptypes
            # (e.g.) p1_p3==13 for an interaction between p1 and p3 of ptypes 1 and 3 (or 3 and1 )
            expression = "%s==%s" % (interaction_column, row.interaction_ptypes)

            # create list of columns with names matching activity for each of the persons in tup
            # e.g. ['MMM', 'MMN', 'MMH'] for an interaction between p1 and p3 with activity 'M'
            # alternative_columns = \
            #     filter(lambda alt: all([alt[p - 1] == row.activity for p in tup]), alternatives)
            alternative_columns = \
                [alt for alt in alternatives if all([alt[p - 1] == row.activity for p in tup])]

            # a row for this interaction may already exist,
            # e.g. if there are rules for both HH13 and MM13, we don't need to add rows for both
            # since they are triggered by the same expressions (e.g. p1_p2==13, p1_p3=13,...)
            existing_row_index = (spec[expression_name] == expression)
            if (existing_row_index).any():
                # if the rows exist, simply update the appropriate alternative columns in spec
                spec.loc[existing_row_index, alternative_columns] = row.slug
                spec.loc[existing_row_index, expression_name] = expression
            else:
                # otherwise, add a new row to spec
                new_row_index = len(spec)
                spec.loc[new_row_index, alternative_columns] = row.slug
                spec.loc[new_row_index, expression_name] = expression

    # eval expression goes in the index
    spec.set_index(expression_name, inplace=True)

    simulate.uniquify_spec_index(spec)

    if trace_spec:
        tracing.trace_df(spec, '%s.hhsize%d_spec' % (trace_label, hhsize),
                         transpose=False, slicer='NONE')

    # replace slug with coefficient
    d = interaction_coefficients.set_index('slug')['coefficient'].to_dict()
    for c in spec.columns:
        spec[c] =\
            spec[c].map(lambda x: d.get(x, x or 0.0)).fillna(0)

    if trace_spec:
        tracing.trace_df(spec, '%s.hhsize%d_spec_patched' % (trace_label, hhsize),
                         transpose=False, slicer='NONE')

    if cache:
        cache_spec(hhsize, spec)

    t0 = tracing.print_elapsed_time("build_cdap_spec hh_size %s" % hhsize, t0)

    return spec
Esempio n. 24
0
def run(args):
    """
    Run the models. Specify a project folder using the '--working_dir' option,
    or point to the config, data, and output folders directly with
    '--config', '--data', and '--output'. Both '--config' and '--data' can be
    specified multiple times. Directories listed first take precedence.

    returns:
        int: sys.exit exit code
    """

    # register abm steps and other abm-specific injectables
    # by default, assume we are running activitysim.abm
    # other callers (e.g. populationsim) will have to arrange to register their own steps and injectables
    # (presumably) in a custom run_simulation.py instead of using the 'activitysim run' command
    if not inject.is_injectable('preload_injectables'):
        from activitysim import abm  # register abm steps and other abm-specific injectables

    tracing.config_logger(basic=True)
    handle_standard_args(args)  # possibly update injectables

    # legacy support for run_list setting nested 'models' and 'resume_after' settings
    if config.setting('run_list'):
        warnings.warn(
            "Support for 'run_list' settings group will be removed.\n"
            "The run_list.steps setting is renamed 'models'.\n"
            "The run_list.resume_after setting is renamed 'resume_after'.\n"
            "Specify both 'models' and 'resume_after' directly in settings config file.",
            FutureWarning)
        run_list = config.setting('run_list')
        if 'steps' in run_list:
            assert not config.setting('models'), \
                f"Don't expect 'steps' in run_list and 'models' as stand-alone setting!"
            config.override_setting('models', run_list['steps'])

        if 'resume_after' in run_list:
            assert not config.setting('resume_after'), \
                f"Don't expect 'resume_after' both in run_list and as stand-alone setting!"
            config.override_setting('resume_after', run_list['resume_after'])

    # If you provide a resume_after argument to pipeline.run
    # the pipeline manager will attempt to load checkpointed tables from the checkpoint store
    # and resume pipeline processing on the next submodel step after the specified checkpoint
    resume_after = config.setting('resume_after', None)

    # cleanup if not resuming
    if not resume_after:
        cleanup_output_files()
    elif config.setting('cleanup_trace_files_on_resume', False):
        tracing.delete_trace_files()

    tracing.config_logger(
        basic=False)  # update using possibly new logging configs
    config.filter_warnings()
    logging.captureWarnings(capture=True)

    # directories
    for k in ['configs_dir', 'settings_file_name', 'data_dir', 'output_dir']:
        logger.info('SETTING %s: %s' % (k, inject.get_injectable(k, None)))

    log_settings = inject.get_injectable('log_settings', {})
    for k in log_settings:
        logger.info('SETTING %s: %s' % (k, config.setting(k)))

    t0 = tracing.print_elapsed_time()

    if config.setting('multiprocess', False):
        logger.info('run multiprocess simulation')

        from activitysim.core import mp_tasks
        run_list = mp_tasks.get_run_list()
        injectables = {k: inject.get_injectable(k) for k in INJECTABLES}
        mp_tasks.run_multiprocess(run_list, injectables)

        assert not pipeline.is_open()

        if config.setting('cleanup_pipeline_after_run', False):
            pipeline.cleanup_pipeline()

    else:
        logger.info('run single process simulation')

        pipeline.run(models=config.setting('models'),
                     resume_after=resume_after)

        if config.setting('cleanup_pipeline_after_run', False):
            pipeline.cleanup_pipeline(
            )  # has side effect of closing open pipeline
        else:
            pipeline.close_pipeline()

        chunk.log_write_hwm()

    tracing.print_elapsed_time('all models', t0)

    return 0
Esempio n. 25
0
#     --resume : resume_after
handle_standard_args()

tracing.config_logger()

warnings.simplefilter("always")

logging.captureWarnings(capture=True)

old_settings = np.seterr(divide='raise',
                         over='raise',
                         invalid='raise',
                         under='ignore')
print "numpy.geterr: %s" % np.geterr()

t0 = tracing.print_elapsed_time()

MODELS = setting('models')

# If you provide a resume_after argument to pipeline.run
# the pipeline manager will attempt to load checkpointed tables from the checkpoint store
# and resume pipeline processing on the next submodel step after the specified checkpoint
resume_after = setting('resume_after', None)

if resume_after:
    print "resume_after", resume_after

pipeline.run(models=MODELS, resume_after=resume_after)

# tables will no longer be available after pipeline is closed
pipeline.close_pipeline()
Esempio n. 26
0
from census_getter import steps

from activitysim.core import tracing
from activitysim.core import pipeline
from activitysim.core import inject

from activitysim.core.config import handle_standard_args
from activitysim.core.tracing import print_elapsed_time

from census_getter.util import setting

handle_standard_args()

tracing.config_logger()

t0 = print_elapsed_time()

logger = logging.getLogger('census_getter')

# get the run list (name was possibly specified on the command line with the -m option)
run_list_name = inject.get_injectable('run_list_name', 'run_list')

# run list from settings file is dict with list of 'steps' and optional 'resume_after'
run_list = setting(run_list_name)
assert 'steps' in run_list, "Did not find steps in run_list"

# list of steps and possible resume_after in run_list
steps = run_list.get('steps')
resume_after = run_list.get('resume_after', None)

if resume_after:
Esempio n. 27
0
from activitysim.core.tracing import print_elapsed_time
from activitysim.core.config import handle_standard_args
from activitysim.core.config import setting

from activitysim.core import pipeline
import extensions

handle_standard_args()

# comment out the line below to default base seed to 0 random seed
# so that run results are reproducible
# pipeline.set_rn_generator_base_seed(seed=None)

tracing.config_logger()

t0 = print_elapsed_time()

MODELS = setting('models')

# If you provide a resume_after argument to pipeline.run
# the pipeline manager will attempt to load checkpointed tables from the checkpoint store
# and resume pipeline processing on the next submodel step after the specified checkpoint
resume_after = setting('resume_after', None)

if resume_after:
    print "resume_after", resume_after

pipeline.run(models=MODELS, resume_after=resume_after)

# tables will no longer be available after pipeline is closed
pipeline.close_pipeline()
Esempio n. 28
0
def compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, skims,
                    trace_label):
    """
    Compute logsums for the tour alt_tdds, which will differ based on their different start, stop
    times of day, which translate to different odt_skim out_period and in_periods.

    In mtctm1, tdds are hourly, but there are only 5 skim time periods, so some of the tdd_alts
    will be the same, once converted to skim time periods. With 5 skim time periods there are
    15 unique out-out period pairs but 190 tdd alternatives.

    For efficiency, rather compute a lot of redundant logsums, we compute logsums for the unique
    (out-period, in-period) pairs and then join them back to the alt_tdds.
    """

    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    network_los = inject.get_injectable('network_los')

    # - in_period and out_period
    assert 'out_period' not in alt_tdd
    assert 'in_period' not in alt_tdd
    alt_tdd['out_period'] = network_los.skim_time_period_label(
        alt_tdd['start'])
    alt_tdd['in_period'] = network_los.skim_time_period_label(alt_tdd['end'])
    alt_tdd['duration'] = alt_tdd['end'] - alt_tdd['start']

    if USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS:
        # compute logsums for all the tour alt_tdds (inefficient)
        logsums = _compute_logsums(alt_tdd, tours_merged, tour_purpose,
                                   model_settings, network_los, skims,
                                   trace_label)
        return logsums

    index_name = alt_tdd.index.name
    deduped_alt_tdds, redupe_columns = dedupe_alt_tdd(alt_tdd, tour_purpose,
                                                      trace_label)

    logger.info(
        f"{trace_label} compute_logsums "
        f"deduped_alt_tdds reduced number of rows by "
        f"{round(100 * (len(alt_tdd) - len(deduped_alt_tdds)) / len(alt_tdd), 2)}% "
        f"from {len(alt_tdd)} to {len(deduped_alt_tdds)} compared to USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS"
    )

    t0 = tracing.print_elapsed_time()

    # - compute logsums for the alt_tdd_periods
    deduped_alt_tdds['logsums'] = \
        _compute_logsums(deduped_alt_tdds, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label)

    # tracing.log_runtime(model_name=trace_label, start_time=t0)

    # redupe - join the alt_tdd_period logsums to alt_tdd to get logsums for alt_tdd
    logsums = pd.merge(alt_tdd.reset_index(),
                       deduped_alt_tdds.reset_index(),
                       on=[index_name] + redupe_columns,
                       how='left').set_index(index_name).logsums

    # this is really expensive
    TRACE = False
    if TRACE:
        trace_logsums_df = logsums.to_frame('representative_logsum')
        trace_logsums_df['brute_force_logsum'] = \
            _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label)
        tracing.trace_df(trace_logsums_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'representative_logsums'),
                         slicer='NONE',
                         transpose=False)

    return logsums
Esempio n. 29
0
def build_cdap_spec(interaction_coefficients, hhsize,
                    trace_spec=False, trace_label=None, cache=True):
    """
    Build a spec file for computing utilities of alternative household member interaction patterns
    for households of specified size.

    We generate this spec automatically from a table of rules and coefficients because the
    interaction rules are fairly simple and can be expressed compactly whereas
    there is a lot of redundancy between the spec files for different household sizes, as well as
    in the vectorized expression of the interaction alternatives within the spec file itself

    interaction_coefficients has five columns:
        activity
            A single character activity type name (M, N, or H)
        interaction_ptypes
            List of ptypes in the interaction (in order of increasing ptype) or empty for wildcards
            (meaning that the interaction applies to all ptypes in that size hh)
        cardinality
            the number of persons in the interaction (e.g. 3 for a 3-way interaction)
        slug
            a human friendly efficient name so we can dump a readable spec trace file for debugging
            this slug is replaced with the numerical coefficient value after we dump the trace file
        coefficient
            The coefficient to apply for all hh interactions for this activity and set of ptypes

    The generated spec will have the eval expression in the index, and a utility column for each
    alternative (e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize 2)

    In order to be able to dump the spec in a human-friendly fashion to facilitate debugging the
    cdap_interaction_coefficients table, we first populate utility columns in the spec file
    with the coefficient slugs, dump the spec file, and then replace the slugs with coefficients.

    Parameters
    ----------
    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes
    hhsize : int
        household size for which the spec should be built.

    Returns
    -------
    spec: pandas.DataFrame

    """

    t0 = tracing.print_elapsed_time()

    # if DUMP:
    #     # dump the interaction_coefficients table because it has been preprocessed
    #     tracing.trace_df(interaction_coefficients,
    #                      '%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize),
    #                      transpose=False, slicer='NONE')

    # cdap spec is same for all households of MAX_HHSIZE and greater
    hhsize = min(hhsize, MAX_HHSIZE)

    if cache:
        spec = get_cached_spec(hhsize)
        if spec is not None:
            return spec

    expression_name = "Expression"

    # generate a list of activity pattern alternatives for this hhsize
    # e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize=2
    alternatives = [''.join(tup) for tup in itertools.product('HMN', repeat=hhsize)]

    # spec df has expression column plus a column for each alternative
    spec = pd.DataFrame(columns=[expression_name] + alternatives)

    # Before processing the interaction_coefficients, we add add rows to the spec to carry
    # the alternative utilities previously computed for each individual into all hh alternative
    # columns in which the individual assigned that alternative. The Expression column contains
    # the name of the choosers column with that individuals utility for the individual alternative
    # and the hh alternative columns that should receive that utility are given a value of 1
    # e.g. M_p1 is a column in choosers with the individual utility to person p1 of alternative M
    #   Expression   MM   MN   MH   NM   NN   NH   HM   HN   HH
    #         M_p1  1.0  1.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0
    #         N_p1  0.0  0.0  0.0  1.0  1.0  1.0  0.0  0.0  0.0
    for pnum in range(1, hhsize+1):
        for activity in ['M', 'N', 'H']:

            new_row_index = len(spec)
            spec.loc[new_row_index, expression_name] = add_pn(activity, pnum)

            # list of alternative columns where person pnum has expression activity
            # e.g. for M_p1 we want the columns where activity M is in position p1
            alternative_columns = [alt for alt in alternatives if alt[pnum - 1] == activity]
            spec.loc[new_row_index, alternative_columns] = 1

    # ignore rows whose cardinality exceeds hhsize
    relevant_rows = interaction_coefficients.cardinality <= hhsize

    # for each row in the interaction_coefficients table
    for row in interaction_coefficients[relevant_rows].itertuples():

        # if it is a wildcard all_people interaction
        if not row.interaction_ptypes:

            # wildcard interactions only apply if the interaction includes all household members
            # this will be the case if the cardinality of the wildcard equals the hhsize
            # conveniently, the slug is given the name of the alternative column (e.g. HHHH)

            # conveniently, for wildcards, the slug has been assigned the name of the alternative
            # (e.g. HHHH) that it applies to, since the interaction includes all household members
            # and there are no ptypes to append to it

            # FIXME - should we be doing this for greater than HH_MAXSIZE households?
            if row.slug in alternatives:
                spec.loc[len(spec), [expression_name, row.slug]] = ['1', row.slug]

            continue

        if not (0 <= row.cardinality <= MAX_INTERACTION_CARDINALITY):
            raise RuntimeError("Bad row cardinality %d for %s" % (row.cardinality, row.slug))

        # for all other interaction rules, we need to generate a row in the spec for each
        # possible combination of interacting persons
        # e.g. for (1, 2), (1,3), (2,3) for a coefficient with cardinality 2 in hhsize 3
        for tup in itertools.combinations(list(range(1, hhsize+1)), row.cardinality):

            # determine the name of the chooser column with the ptypes for this interaction
            if row.cardinality == 1:
                interaction_column = "ptype_p%d" % tup[0]
            else:
                # column named (e.g.) p1_p3 for an interaction between p1 and p3
                interaction_column = '_'.join(['p%s' % pnum for pnum in tup])

            # build expression that evaluates True iff the interaction is between specified ptypes
            # (e.g.) p1_p3==13 for an interaction between p1 and p3 of ptypes 1 and 3 (or 3 and1 )
            expression = "%s==%s" % (interaction_column, row.interaction_ptypes)

            # create list of columns with names matching activity for each of the persons in tup
            # e.g. ['MMM', 'MMN', 'MMH'] for an interaction between p1 and p3 with activity 'M'
            # alternative_columns = \
            #     filter(lambda alt: all([alt[p - 1] == row.activity for p in tup]), alternatives)
            alternative_columns = \
                [alt for alt in alternatives if all([alt[p - 1] == row.activity for p in tup])]

            # a row for this interaction may already exist,
            # e.g. if there are rules for both HH13 and MM13, we don't need to add rows for both
            # since they are triggered by the same expressions (e.g. p1_p2==13, p1_p3=13,...)
            existing_row_index = (spec[expression_name] == expression)
            if (existing_row_index).any():
                # if the rows exist, simply update the appropriate alternative columns in spec
                spec.loc[existing_row_index, alternative_columns] = row.slug
                spec.loc[existing_row_index, expression_name] = expression
            else:
                # otherwise, add a new row to spec
                new_row_index = len(spec)
                spec.loc[new_row_index, alternative_columns] = row.slug
                spec.loc[new_row_index, expression_name] = expression

    # eval expression goes in the index
    spec.set_index(expression_name, inplace=True)

    simulate.uniquify_spec_index(spec)

    if trace_spec:
        tracing.trace_df(spec, '%s.hhsize%d_spec' % (trace_label, hhsize),
                         transpose=False, slicer='NONE')

    # replace slug with coefficient
    d = interaction_coefficients.set_index('slug')['coefficient'].to_dict()
    for c in spec.columns:
        spec[c] =\
            spec[c].map(lambda x: d.get(x, x or 0.0)).fillna(0)

    if trace_spec:
        tracing.trace_df(spec, '%s.hhsize%d_spec_patched' % (trace_label, hhsize),
                         transpose=False, slicer='NONE')

    if cache:
        cache_spec(hhsize, spec)

    t0 = tracing.print_elapsed_time("build_cdap_spec hh_size %s" % hhsize, t0)

    return spec
Esempio n. 30
0
def choose_trip_destination(
        primary_purpose,
        trips,
        alternatives,
        tours_merged,
        model_settings,
        size_term_matrix, skims,
        chunk_size, trace_hh_id,
        trace_label):

    logger.info("choose_trip_destination %s with %d trips", trace_label, trips.shape[0])

    t0 = print_elapsed_time()

    # - trip_destination_sample
    destination_sample = trip_destination_sample(
        primary_purpose=primary_purpose,
        trips=trips,
        alternatives=alternatives,
        model_settings=model_settings,
        size_term_matrix=size_term_matrix, skims=skims,
        chunk_size=chunk_size, trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    dropped_trips = ~trips.index.isin(destination_sample.index.unique())
    if dropped_trips.any():
        logger.warning("%s trip_destination_sample %s trips "
                       "without viable destination alternatives" %
                       (trace_label, dropped_trips.sum()))
        trips = trips[~dropped_trips]

    t0 = print_elapsed_time("%s.trip_destination_sample" % trace_label, t0)

    if trips.empty:
        return pd.Series(index=trips.index)

    # - compute logsums
    compute_logsums(
        primary_purpose=primary_purpose,
        trips=trips,
        destination_sample=destination_sample,
        tours_merged=tours_merged,
        model_settings=model_settings,
        skims=skims,
        chunk_size=chunk_size, trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0)

    # - trip_destination_simulate
    destinations = trip_destination_simulate(
        primary_purpose=primary_purpose,
        trips=trips,
        destination_sample=destination_sample,
        model_settings=model_settings,
        size_term_matrix=size_term_matrix, skims=skims,
        chunk_size=chunk_size, trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    dropped_trips = ~trips.index.isin(destinations.index)
    if dropped_trips.any():
        logger.warning("%s trip_destination_simulate %s trips "
                       "without viable destination alternatives" %
                       (trace_label, dropped_trips.sum()))

    t0 = print_elapsed_time("%s.trip_destination_simulate" % trace_label, t0)

    return destinations
Esempio n. 31
0
def choose_trip_destination(primary_purpose, trips, alternatives, tours_merged,
                            model_settings, want_logsums, want_sample_table,
                            size_term_matrix, skim_hotel, estimator,
                            chunk_size, trace_hh_id, trace_label):

    logger.info("choose_trip_destination %s with %d trips", trace_label,
                trips.shape[0])

    t0 = print_elapsed_time()

    # - trip_destination_sample
    destination_sample = trip_destination_sample(
        primary_purpose=primary_purpose,
        trips=trips,
        alternatives=alternatives,
        model_settings=model_settings,
        size_term_matrix=size_term_matrix,
        skim_hotel=skim_hotel,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    dropped_trips = ~trips.index.isin(destination_sample.index.unique())
    if dropped_trips.any():
        logger.warning("%s trip_destination_sample %s trips "
                       "without viable destination alternatives" %
                       (trace_label, dropped_trips.sum()))
        trips = trips[~dropped_trips]

    t0 = print_elapsed_time("%s.trip_destination_sample" % trace_label, t0)

    if trips.empty:
        return pd.Series(index=trips.index).to_frame('choice'), None

    # - compute logsums
    destination_sample = compute_logsums(primary_purpose=primary_purpose,
                                         trips=trips,
                                         destination_sample=destination_sample,
                                         tours_merged=tours_merged,
                                         model_settings=model_settings,
                                         skim_hotel=skim_hotel,
                                         chunk_size=chunk_size,
                                         trace_label=trace_label)

    t0 = print_elapsed_time("%s.compute_logsums" % trace_label, t0)

    # - trip_destination_simulate
    destinations = trip_destination_simulate(
        primary_purpose=primary_purpose,
        trips=trips,
        destination_sample=destination_sample,
        model_settings=model_settings,
        want_logsums=want_logsums,
        size_term_matrix=size_term_matrix,
        skim_hotel=skim_hotel,
        estimator=estimator,
        chunk_size=chunk_size,
        trace_hh_id=trace_hh_id,
        trace_label=trace_label)

    dropped_trips = ~trips.index.isin(destinations.index)
    if dropped_trips.any():
        logger.warning("%s trip_destination_simulate %s trips "
                       "without viable destination alternatives" %
                       (trace_label, dropped_trips.sum()))

    if want_sample_table:
        # FIXME - sample_table
        destination_sample.set_index(model_settings['ALT_DEST_COL_NAME'],
                                     append=True,
                                     inplace=True)
    else:
        destination_sample = None

    t0 = print_elapsed_time("%s.trip_destination_simulate" % trace_label, t0)

    return destinations, destination_sample