Exemple #1
0
def create_mandatory_tours():

    # FIXME - move this to body?

    persons = inject.get_table('persons')
    configs_dir = inject.get_injectable('configs_dir')

    persons = persons.to_frame(columns=[
        "mandatory_tour_frequency", "is_worker", "school_taz", "workplace_taz"
    ])
    persons = persons[~persons.mandatory_tour_frequency.isnull()]

    tour_frequency_alternatives = inject.get_injectable(
        'mandatory_tour_frequency_alternatives')

    tours = process_mandatory_tours(persons, tour_frequency_alternatives)

    expressions.assign_columns(df=tours,
                               model_settings='annotate_tours_with_dest',
                               configs_dir=configs_dir,
                               trace_label='create_mandatory_tours')

    pipeline.extend_table("tours", tours)
    tracing.register_traceable_table('tours', tours)
    pipeline.get_rn_generator().add_channel(tours, 'tours')
Exemple #2
0
def get_trace_target(df, slicer):
    """
    get target ids and column or index to identify target trace rows in df

    Parameters
    ----------
    df: pandas.DataFrame
        dataframe to slice
    slicer: str
        name of column or index to use for slicing

    Returns
    -------
    (target, column) tuple

    target : int or list of ints
        id or ids that identify tracer target rows
    column : str
        name of column to search for targets or None to search index
    """

    target_ids = None  # id or ids to slice by (e.g. hh_id or person_ids or tour_ids)
    column = None  # column name to slice on or None to slice on index

    # special do-not-slice code for dumping entire df
    if slicer == 'NONE':
        return target_ids, column

    if slicer is None:
        slicer = df.index.name

    if isinstance(df, pd.DataFrame):
        # always slice by household id if we can
        if 'household_id' in df.columns:
            slicer = 'household_id'
        if slicer in df.columns:
            column = slicer

    if column is None and df.index.name != slicer:
        raise RuntimeError("bad slicer '%s' for df with index '%s'" %
                           (slicer, df.index.name))

    traceable_table_indexes = inject.get_injectable('traceable_table_indexes',
                                                    {})
    traceable_table_ids = inject.get_injectable('traceable_table_ids', {})

    if df.empty:
        target_ids = None
    elif slicer in traceable_table_indexes:
        # maps 'person_id' to 'persons', etc
        table_name = traceable_table_indexes[slicer]
        target_ids = traceable_table_ids.get(table_name, [])
    elif slicer == 'TAZ':
        target_ids = inject.get_injectable('trace_od', [])

    return target_ids, column
Exemple #3
0
def track_skim_usage(output_dir):
    """
    write statistics on skim usage (diagnostic to detect loading of un-needed skims)

    FIXME - have not yet implemented a facility to avoid loading of unused skims

    Parameters
    ----------
    output_dir: str

    """
    pd.options.display.max_columns = 500
    pd.options.display.max_rows = 100

    checkpoints = pipeline.get_checkpoints()
    tables = OrderedDict()

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack', None)

    mode = 'wb' if sys.version_info < (3,) else 'w'
    with open(config.output_file_path('skim_usage.txt'), mode) as output_file:

        print("\n### skim_dict usage", file=output_file)
        for key in skim_dict.usage:
            print(key, file=output_file)

        if skim_stack is None:

            unused_keys = {k for k in skim_dict.skim_info['omx_keys']} - \
                          {k for k in skim_dict.usage}

            print("\n### unused skim keys", file=output_file)
            for key in unused_keys:
                print(key, file=output_file)

        else:

            print("\n### skim_stack usage", file=output_file)
            for key in skim_stack.usage:
                print(key, file=output_file)

            unused = {k for k in skim_dict.skim_info['omx_keys'] if not isinstance(k, tuple)} - \
                     {k for k in skim_dict.usage if not isinstance(k, tuple)}
            print("\n### unused skim str keys", file=output_file)
            for key in unused:
                print(key, file=output_file)

                unused = {k[0] for k in skim_dict.skim_info['omx_keys'] if isinstance(k, tuple)} - \
                         {k[0] for k in skim_dict.usage if isinstance(k, tuple)} - \
                         {k for k in skim_stack.usage}
            print("\n### unused skim dim3 keys", file=output_file)
            for key in unused:
                print(key, file=output_file)
Exemple #4
0
def track_skim_usage(output_dir):
    """
    write statistics on skim usage (diagnostic to detect loading of un-needed skims)

    FIXME - have not yet implemented a facility to avoid loading of unused skims

    Parameters
    ----------
    output_dir: str

    """
    pd.options.display.max_columns = 500
    pd.options.display.max_rows = 100

    checkpoints = pipeline.get_checkpoints()
    tables = OrderedDict()

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack', None)

    mode = 'wb' if sys.version_info < (3, ) else 'w'
    with open(config.output_file_path('skim_usage.txt'), mode) as output_file:

        print("\n### skim_dict usage", file=output_file)
        for key in skim_dict.usage:
            print(key, file=output_file)

        if skim_stack is None:

            unused_keys = {k for k in skim_dict.skim_info['omx_keys']} - \
                          {k for k in skim_dict.usage}

            print("\n### unused skim keys", file=output_file)
            for key in unused_keys:
                print(key, file=output_file)

        else:

            print("\n### skim_stack usage", file=output_file)
            for key in skim_stack.usage:
                print(key, file=output_file)

            unused = {k for k in skim_dict.skim_info['omx_keys'] if not isinstance(k, tuple)} - \
                     {k for k in skim_dict.usage if not isinstance(k, tuple)}
            print("\n### unused skim str keys", file=output_file)
            for key in unused:
                print(key, file=output_file)

                unused = {k[0] for k in skim_dict.skim_info['omx_keys'] if isinstance(k, tuple)} - \
                         {k[0] for k in skim_dict.usage if isinstance(k, tuple)} - \
                         {k for k in skim_stack.usage}
            print("\n### unused skim dim3 keys", file=output_file)
            for key in unused:
                print(key, file=output_file)
Exemple #5
0
def get_trace_target(df, slicer):
    """
    get target ids and column or index to identify target trace rows in df

    Parameters
    ----------
    df: pandas.DataFrame
        dataframe to slice
    slicer: str
        name of column or index to use for slicing

    Returns
    -------
    (target, column) tuple

    target : int or list of ints
        id or ids that identify tracer target rows
    column : str
        name of column to search for targets or None to search index
    """

    target_ids = None  # id or ids to slice by (e.g. hh_id or person_ids or tour_ids)
    column = None  # column name to slice on or None to slice on index

    # special do-not-slice code for dumping entire df
    if slicer == 'NONE':
        return target_ids, column

    if slicer is None:
        slicer = df.index.name

    if isinstance(df, pd.DataFrame):
        # always slice by household id if we can
        if 'household_id' in df.columns:
            slicer = 'household_id'
        if slicer in df.columns:
            column = slicer

    if column is None and df.index.name != slicer:
        raise RuntimeError("bad slicer '%s' for df with index '%s'" % (slicer, df.index.name))

    traceable_table_indexes = inject.get_injectable('traceable_table_indexes', {})
    traceable_table_ids = inject.get_injectable('traceable_table_ids', {})

    if df.empty:
        target_ids = None
    elif slicer in traceable_table_indexes:
        # maps 'person_id' to 'persons', etc
        table_name = traceable_table_indexes[slicer]
        target_ids = traceable_table_ids.get(table_name, [])
    elif slicer == 'TAZ':
        target_ids = inject.get_injectable('trace_od', [])

    return target_ids, column
Exemple #6
0
def config_logger(custom_config_file=None, basic=False):
    """
    Configure logger

    if log_config_file is not supplied then look for conf file in configs_dir

    if not found use basicConfig

    Parameters
    ----------
    custom_config_file: str
        custom config filename
    basic: boolean
        basic setup

    Returns
    -------
    Nothing
    """
    log_config_file = None

    if custom_config_file and os.path.isfile(custom_config_file):
        log_config_file = custom_config_file
    elif not basic:
        # look for conf file in configs_dir
        configs_dir = inject.get_injectable('configs_dir')
        default_config_file = os.path.join(configs_dir, LOGGING_CONF_FILE_NAME)
        if os.path.isfile(default_config_file):
            log_config_file = default_config_file

    if log_config_file:
        with open(log_config_file) as f:
            config_dict = yaml.load(f)
            config_dict = config_dict['logging']
            config_dict.setdefault('version', 1)
            logging.config.dictConfig(config_dict)
    else:
        logging.basicConfig(level=logging.INFO, stream=sys.stdout)

    logger = logging.getLogger(ASIM_LOGGER)

    if custom_config_file and not os.path.isfile(custom_config_file):
        logger.error("#\n#\n#\nconfig_logger could not find conf file '%s'" %
                     custom_config_file)

    if log_config_file:
        logger.info("Read logging configuration from: %s" % log_config_file)
    else:
        print "Configured logging using basicConfig"
        logger.info("Configured logging using basicConfig")

    output_dir = inject.get_injectable('output_dir')
    logger.debug("Deleting files in output_dir %s" % output_dir)
    delete_csv_files(output_dir)
def initialize_tours(network_los, households, persons, trace_hh_id):

    trace_label = 'initialize_tours'

    tours = read_input_table("tours")

    # FIXME can't use households_sliced injectable as flag like persons table does in case of resume_after.
    # FIXME could just always slice...
    slice_happened = \
        inject.get_injectable('households_sample_size', 0) > 0 \
        or inject.get_injectable('households_sample_size', 0) > 0
    if slice_happened:
        logger.info("slicing tours %s" % (tours.shape,))
        # keep all persons in the sampled households
        tours = tours[tours.person_id.isin(persons.index)]

    # annotate before patching tour_id to allow addition of REQUIRED_TOUR_COLUMNS defined above
    model_settings = config.read_model_settings('initialize_tours.yaml', mandatory=True)
    expressions.assign_columns(
        df=tours,
        model_settings=model_settings.get('annotate_tours'),
        trace_label=tracing.extend_trace_label(trace_label, 'annotate_tours'))

    skip_patch_tour_ids = model_settings.get('skip_patch_tour_ids', False)
    if skip_patch_tour_ids:
        pass
    else:
        tours = patch_tour_ids(tours)
    assert tours.index.name == 'tour_id'

    # replace table function with dataframe
    inject.add_table('tours', tours)

    pipeline.get_rn_generator().add_channel('tours', tours)

    tracing.register_traceable_table('tours', tours)

    logger.debug(f"{len(tours.household_id.unique())} unique household_ids in tours")
    logger.debug(f"{len(households.index.unique())} unique household_ids in households")
    assert not tours.index.duplicated().any()

    tours_without_persons = ~tours.person_id.isin(persons.index)
    if tours_without_persons.any():
        logger.error(f"{tours_without_persons.sum()} tours out of {len(persons)} without persons\n"
                     f"{pd.Series({'person_id': tours_without_persons.index.values})}")
        raise RuntimeError(f"{tours_without_persons.sum()} tours with bad person_id")

    if trace_hh_id:
        tracing.trace_df(tours,
                         label='initialize_tours',
                         warn_if_empty=True)
Exemple #8
0
def test_misc():

    inject.clear_cache()

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("configs_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("data_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("output_dir")
    assert "directory does not exist" in str(excinfo.value)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs_test_misc')
    inject.add_injectable("configs_dir", configs_dir)

    settings = inject.get_injectable("settings")
    assert isinstance(settings, dict)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    # default values if not specified in settings
    assert inject.get_injectable("chunk_size") == 0
Exemple #9
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into eh pipeline
    """

    logger.info("preload_injectables")

    t0 = tracing.print_elapsed_time()

    if inject.get_injectable('skim_dict', None) is not None:
        t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)

    if inject.get_injectable('skim_stack', None) is not None:
        t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)
Exemple #10
0
def test_misc():

    inject.clear_cache()

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("configs_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("data_dir")
    assert "directory does not exist" in str(excinfo.value)

    with pytest.raises(RuntimeError) as excinfo:
        inject.get_injectable("output_dir")
    assert "directory does not exist" in str(excinfo.value)

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs_test_misc')
    inject.add_injectable("configs_dir", configs_dir)

    settings = inject.get_injectable("settings")
    assert isinstance(settings, dict)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    # default values if not specified in settings
    assert inject.get_injectable("chunk_size") == 0
Exemple #11
0
def wrap_skims(model_settings):
    """
    wrap skims of trip destination using origin, dest column names from model settings.
    Various of these are used by destination_sample, compute_logsums, and destination_simulate
    so we create them all here with canonical names.

    Note that compute_logsums aliases their names so it can use the same equations to compute
    logsums from origin to alt_dest, and from alt_dest to primarly destination

    odt_skims - SkimStackWrapper: trip origin, trip alt_dest, time_of_day
    dot_skims - SkimStackWrapper: trip alt_dest, trip origin, time_of_day
    dpt_skims - SkimStackWrapper: trip alt_dest, trip primary_dest, time_of_day
    pdt_skims - SkimStackWrapper: trip primary_dest,trip alt_dest, time_of_day
    od_skims - SkimDictWrapper: trip origin, trip alt_dest
    dp_skims - SkimDictWrapper: trip alt_dest, trip primary_dest

    Parameters
    ----------
    model_settings

    Returns
    -------
        dict containing skims, keyed by canonical names relative to tour orientation
    """

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    o = model_settings['TRIP_ORIGIN']
    d = model_settings['ALT_DEST_COL_NAME']
    p = model_settings['PRIMARY_DEST']

    skims = {
        "odt_skims":
        skim_stack.wrap(left_key=o, right_key=d, skim_key='trip_period'),
        "dot_skims":
        skim_stack.wrap(left_key=d, right_key=o, skim_key='trip_period'),
        "dpt_skims":
        skim_stack.wrap(left_key=d, right_key=p, skim_key='trip_period'),
        "pdt_skims":
        skim_stack.wrap(left_key=p, right_key=d, skim_key='trip_period'),
        "od_skims":
        skim_dict.wrap(o, d),
        "dp_skims":
        skim_dict.wrap(d, p),
    }

    return skims
Exemple #12
0
def base_settings_file_path(file_name):
    """

    FIXME - should be in configs

    Parameters
    ----------
    file_name

    Returns
    -------
        path to base settings file or None if not found
    """

    if not file_name.lower().endswith('.yaml'):
        file_name = '%s.yaml' % (file_name, )

    configs_dir = inject.get_injectable('configs_dir')
    configs_dir = [configs_dir] if isinstance(configs_dir,
                                              str) else configs_dir

    for dir in configs_dir:
        file_path = os.path.join(dir, file_name)
        if os.path.exists(file_path):
            return file_path

    raise RuntimeError("base_settings_file %s not found" % file_name)
Exemple #13
0
def log_file_path(file_name):

    output_dir = inject.get_injectable('output_dir')

    # - check for optional log subfolder
    if os.path.exists(os.path.join(output_dir, 'log')):
        output_dir = os.path.join(output_dir, 'log')

    # - check for optional process name prefix
    prefix = inject.get_injectable('log_file_prefix', None)
    if prefix:
        file_name = "%s-%s" % (prefix, file_name)

    file_path = os.path.join(output_dir, file_name)

    return file_path
Exemple #14
0
    def get_tvpb_best_transit_time(self, orig, dest, tod):

        # FIXME lots of pathological knowledge here as we are only called by accessibility directly from expressions

        trace_label = tracing.extend_trace_label(
            'accessibility.tvpb_best_time', tod)
        recipe = 'accessibility'
        path_type = 'WTW'

        with chunk.chunk_log(trace_label):
            result = \
                self.build_virtual_path(recipe, path_type, orig, dest, tod,
                                        demographic_segment=None, want_choices=False,
                                        trace_label=trace_label)

            trace_od = inject.get_injectable("trace_od", None)
            if trace_od:
                filter_targets = (orig == trace_od[0]) & (dest == trace_od[1])
                if filter_targets.any():
                    self.build_virtual_path(recipe,
                                            path_type,
                                            orig,
                                            dest,
                                            tod,
                                            demographic_segment=None,
                                            want_choices=False,
                                            trace_label=trace_label,
                                            filter_targets=filter_targets,
                                            trace=True)

        return result
Exemple #15
0
def log_file_path(file_name):

    output_dir = inject.get_injectable('output_dir')

    # - check for optional log subfolder
    if os.path.exists(os.path.join(output_dir, 'log')):
        output_dir = os.path.join(output_dir, 'log')

    # - check for optional process name prefix
    prefix = inject.get_injectable('log_file_prefix', None)
    if prefix:
        file_name = "%s-%s" % (prefix, file_name)

    file_path = os.path.join(output_dir, file_name)

    return file_path
Exemple #16
0
def track_skim_usage(output_dir):
    """
    write statistics on skim usage (diagnostic to detect loading of un-needed skims)

    FIXME - have not yet implemented a facility to avoid loading of unused skims

    FIXME - if resume_after, this will only reflect skims used after resume

    Parameters
    ----------
    output_dir: str

    """
    pd.options.display.max_columns = 500
    pd.options.display.max_rows = 100

    skim_dict = inject.get_injectable('skim_dict')

    mode = 'wb' if sys.version_info < (3, ) else 'w'
    with open(config.output_file_path('skim_usage.txt'), mode) as output_file:

        print("\n### skim_dict usage", file=output_file)
        for key in skim_dict.get_skim_usage():
            print(key, file=output_file)

        unused = set(k for k in skim_dict.skim_info.base_keys) - set(
            k for k in skim_dict.get_skim_usage())

        for key in unused:
            print(key, file=output_file)
Exemple #17
0
def test_full_run1():

    _MODELS = [
        'input_pre_processor',
        'setup_data_structures',
        'initial_seed_balancing',
        'meta_control_factoring',
        'final_seed_balancing',
        'integerize_final_seed_weights',
        'sub_balancing.geography=TRACT',
        'sub_balancing.geography=TAZ',
        'expand_households',
        'summarize',
        'write_tables',
        'write_synthetic_population',
    ]

    pipeline.run(models=_MODELS, resume_after=None)

    expanded_household_ids = pipeline.get_table('expanded_household_ids')
    assert isinstance(expanded_household_ids, pd.DataFrame)
    taz_hh_counts = expanded_household_ids.groupby('TAZ').size()
    assert len(taz_hh_counts) == TAZ_COUNT
    assert taz_hh_counts.loc[100] == TAZ_100_HH_COUNT

    # output_tables action: skip
    output_dir = inject.get_injectable('output_dir')
    assert not os.path.exists(os.path.join(output_dir, 'households.csv'))
    assert os.path.exists(os.path.join(output_dir, 'summary_DISTRICT_1.csv'))

    # tables will no longer be available after pipeline is closed
    pipeline.close_pipeline()

    inject.clear_cache()
Exemple #18
0
def skim_dict(data_dir, settings):

    omx_file_path = config.data_file_path(settings["skims_file"])
    tags_to_load = settings['skim_time_periods']['labels']

    logger.info("loading skim_dict from %s" % (omx_file_path, ))

    # select the skims to load
    skim_info = get_skim_info(omx_file_path, tags_to_load)

    logger.debug("omx_shape %s skim_dtype %s" %
                 (skim_info['omx_shape'], skim_info['dtype']))

    skim_buffers = inject.get_injectable('data_buffers', None)
    if skim_buffers:
        logger.info('Using existing skim_buffers for skims')
    else:
        skim_buffers = buffers_for_skims(skim_info, shared=False)
        load_skims(omx_file_path, skim_info, skim_buffers)

    skim_data = skim_data_from_buffers(skim_buffers, skim_info)

    block_names = list(skim_info['blocks'].keys())
    for i in range(len(skim_data)):
        block_name = block_names[i]
        block_data = skim_data[i]
        logger.info(
            "block_name %s bytes %s (%s)" %
            (block_name, block_data.nbytes, util.GB(block_data.nbytes)))

    # create skim dict
    skim_dict = skim.SkimDict(skim_data, skim_info)
    skim_dict.offset_mapper.set_offset_int(-1)

    return skim_dict
Exemple #19
0
def person_max_window(persons):

    timetable = inject.get_injectable("timetable")

    # ndarray with one row per person and one column per time period
    # array value of 1 where free periods and 0 elsewhere
    s = pd.Series(persons.index.values, index=persons.index)
    available = timetable.individually_available(s)

    row_ids, start_pos, run_length, run_val = rle(available)

    # rle returns all runs, but we only care about runs of available (run_val == 1)
    target_rows = np.where(run_val == 1)
    row_ids = row_ids[target_rows]
    run_length = run_length[target_rows]

    df = pd.DataFrame({'row_ids': row_ids, 'run_length': run_length})

    # groupby index of row_ids match the numpy row indexes of timetable.pairwise_available ndarray
    # but there may be missing values of any no-overlap persons pairs
    max_overlap = df.groupby('row_ids').run_length.max()
    # fill in any missing values to align with input arrays
    input_row_ids = np.arange(persons.shape[0])
    max_window = max_overlap.reindex(input_row_ids).fillna(0)

    # FIXME should we return series or ndarray?
    max_window.index = persons.index

    return max_window
def test_create_input_store(seed_households, data_dir):

    settings_yaml = """
        create_input_store: True
        input_table_list:
          - tablename: households
            h5_tablename: seed_households
            filename: households.csv
            index_col: household_id
            rename_columns:
              HHID: household_id
    """

    settings = yaml.load(settings_yaml, Loader=yaml.SafeLoader)
    inject.add_injectable('settings', settings)

    hh_file = os.path.join(data_dir, 'households.csv')
    seed_households.to_csv(hh_file, index=False)

    assert os.path.isfile(hh_file)

    df = input.read_input_table('households')

    assert df.index.name == 'household_id'

    output_store = os.path.join(inject.get_injectable('output_dir'),
                                'input_data.h5')
    assert os.path.exists(output_store)

    store_df = pd.read_hdf(output_store, 'seed_households')
    assert store_df.equals(seed_households)
Exemple #21
0
def setting(key, default=None):

    settings = inject.get_injectable('settings')

    # explicit setting in settings file takes precedence
    s = settings.get(key, None)

    # if no setting, try injectable
    if s is None:
        s = inject.get_injectable(key, None)

    # otherwise fall back to supplied default
    if s is None:
        s = default

    return s
Exemple #22
0
def test_mp_run():

    mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp')
    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir])

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    tracing.config_logger()

    run_list = mp_tasks.get_run_list()
    mp_tasks.print_run_list(run_list)

    # do this after config.handle_standard_args, as command line args may override injectables
    injectables = ['data_dir', 'configs_dir', 'output_dir']
    injectables = {k: inject.get_injectable(k) for k in injectables}

    # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])

    mp_tasks.run_multiprocess(run_list, injectables)
    pipeline.open_pipeline('_')
    regress_mini_auto()
    pipeline.close_pipeline()
Exemple #23
0
    def __init__(self, size_term_selector):

        # do this once so they can request siae_terms for various segments (tour_type or purpose)
        land_use = inject.get_table('land_use')
        size_terms = inject.get_injectable('size_terms')
        self.destination_size_terms = \
            tour_destination_size_terms(land_use, size_terms, size_term_selector)
Exemple #24
0
    def get_tvpb_logsum(self, path_type, orig, dest, tod, demographic_segment, want_choices, trace_label=None):

        # assume they have given us a more specific name (since there may be more than one active wrapper)
        trace_label = trace_label or 'get_tvpb_logsum'
        trace_label = tracing.extend_trace_label(trace_label, path_type)

        recipe = 'tour_mode_choice'

        with chunk.chunk_log(trace_label):

            logsum_df = \
                self.build_virtual_path(recipe, path_type, orig, dest, tod, demographic_segment,
                                        want_choices=want_choices, trace_label=trace_label)

            trace_hh_id = inject.get_injectable("trace_hh_id", None)
            if trace_hh_id:
                filter_targets = tracing.trace_targets(orig)
                # choices from preceding run (because random numbers)
                override_choices = logsum_df['path_num'] if want_choices else None
                if filter_targets.any():
                    self.build_virtual_path(recipe, path_type, orig, dest, tod, demographic_segment,
                                            want_choices=want_choices, override_choices=override_choices,
                                            trace_label=trace_label, filter_targets=filter_targets, trace=True)

        return logsum_df
Exemple #25
0
    def __init__(self, size_term_selector):

        # do this once so they can request siae_terms for various segments (tour_type or purpose)
        land_use = inject.get_table('land_use')
        size_terms = inject.get_injectable('size_terms')
        self.destination_size_terms = \
            tour_destination_size_terms(land_use, size_terms, size_term_selector)
Exemple #26
0
def skim_dict(data_dir, settings):

    omx_file_path = config.data_file_path(settings["skims_file"])
    tags_to_load = settings['skim_time_periods']['labels']

    logger.info("loading skim_dict from %s" % (omx_file_path, ))

    # select the skims to load
    skim_info = get_skim_info(omx_file_path, tags_to_load)

    logger.debug("omx_shape %s skim_dtype %s" % (skim_info['omx_shape'], skim_info['dtype']))

    skim_buffers = inject.get_injectable('data_buffers', None)
    if skim_buffers:
        logger.info('Using existing skim_buffers for skims')
    else:
        skim_buffers = buffers_for_skims(skim_info, shared=False)
        load_skims(omx_file_path, skim_info, skim_buffers)

    skim_data = skim_data_from_buffers(skim_buffers, skim_info)

    block_names = list(skim_info['blocks'].keys())
    for i in range(len(skim_data)):
        block_name = block_names[i]
        block_data = skim_data[i]
        logger.info("block_name %s bytes %s (%s)" %
                    (block_name, block_data.nbytes, util.GB(block_data.nbytes)))

    # create skim dict
    skim_dict = skim.SkimDict(skim_data, skim_info)
    skim_dict.offset_mapper.set_offset_int(-1)

    return skim_dict
Exemple #27
0
def preload_injectables():
    """
    preload bulky injectables up front - stuff that isn't inserted into the pipeline
    """

    logger.info("preload_injectables")

    inject.add_step('track_skim_usage', track_skim_usage)
    inject.add_step('write_data_dictionary', write_data_dictionary)
    inject.add_step('write_tables', write_tables)

    table_list = config.setting('input_table_list')

    # default ActivitySim table names and indices
    if table_list is None:
        logger.warn("No 'input_table_list' found in settings. This will be a "
                    "required setting in upcoming versions of ActivitySim.")

        new_settings = inject.get_injectable('settings')
        new_settings['input_table_list'] = DEFAULT_TABLE_LIST
        inject.add_injectable('settings', new_settings)

    t0 = tracing.print_elapsed_time()

    # FIXME - still want to do this?
    # if inject.get_injectable('skim_dict', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_dict", t0, debug=True)
    #
    # if inject.get_injectable('skim_stack', None) is not None:
    #     t0 = tracing.print_elapsed_time("preload skim_stack", t0, debug=True)

    return True
Exemple #28
0
def shared_memory_size(data_buffers=None):
    """
    return total size of the multiprocessing shared memory block in data_buffers

    Returns
    -------

    """

    shared_size = 0

    if data_buffers is None:
        data_buffers = inject.get_injectable('data_buffers', {})

    for k, data_buffer in data_buffers.items():
        try:
            obj = data_buffer.get_obj()
        except Exception:
            obj = data_buffer
        data = np.ctypeslib.as_array(obj)
        data_size = data.nbytes

        shared_size += data_size

    return shared_size
Exemple #29
0
def register_traceable_table(table_name, df):
    """
    Register traceable table

    Parameters
    ----------
    df: pandas.DataFrame
        traced dataframe

    Returns
    -------
    Nothing
    """

    trace_hh_id = inject.get_injectable("trace_hh_id", None)

    if trace_hh_id is None:
        return

    if table_name == 'households':
        register_households(df, trace_hh_id)
    elif table_name == 'persons':
        register_persons(df, trace_hh_id)
    elif table_name == 'trips':
        register_trips(df, trace_hh_id)
    elif table_name == 'tours':
        register_tours(df, trace_hh_id)
    def get_skim_data(self, skim_tag, skim_info):
        """
        Read skim data from backing store and return it as a 3D ndarray quack-alike SkimData object

        Parameters
        ----------
        skim_tag: str
        skim_info: string

        Returns
        -------
        SkimData
        """

        data_buffers = inject.get_injectable('data_buffers', None)
        if data_buffers:
            # we assume any existing skim buffers will already have skim data loaded into them
            logger.info(
                f"get_skim_data {skim_tag} using existing shared skim_buffers for skims"
            )
            skim_buffer = data_buffers[skim_tag]
        else:
            skim_buffer = self.allocate_skim_buffer(skim_info, shared=False)
            self.load_skims_to_buffer(skim_info, skim_buffer)

        skim_data = SkimData(
            self._skim_data_from_buffer(skim_info, skim_buffer))

        logger.info(
            f"get_skim_data {skim_tag} {type(skim_data).__name__} shape {skim_data.shape}"
        )

        return skim_data
Exemple #31
0
def test_mp_run():

    mp_configs_dir = os.path.join(os.path.dirname(__file__), 'configs_mp')
    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')
    inject.add_injectable('configs_dir', [mp_configs_dir, configs_dir])

    output_dir = os.path.join(os.path.dirname(__file__), 'output')
    inject.add_injectable("output_dir", output_dir)

    data_dir = os.path.join(os.path.dirname(__file__), 'data')
    inject.add_injectable("data_dir", data_dir)

    tracing.config_logger()

    run_list = mp_tasks.get_run_list()
    mp_tasks.print_run_list(run_list)

    # do this after config.handle_standard_args, as command line args may override injectables
    injectables = ['data_dir', 'configs_dir', 'output_dir']
    injectables = {k: inject.get_injectable(k) for k in injectables}

    # pipeline.run(models=run_list['models'], resume_after=run_list['resume_after'])

    mp_tasks.run_multiprocess(run_list, injectables)
    pipeline.open_pipeline('_')
    regress_mini_auto()
    pipeline.close_pipeline()
Exemple #32
0
def person_max_window(persons):

    timetable = inject.get_injectable("timetable")

    # ndarray with one row per person and one column per time period
    # array value of 1 where free periods and 0 elsewhere
    s = pd.Series(persons.index.values, index=persons.index)
    available = timetable.individually_available(s)

    row_ids, start_pos, run_length, run_val = rle(available)

    # rle returns all runs, but we only care about runs of available (run_val == 1)
    target_rows = np.where(run_val == 1)
    row_ids = row_ids[target_rows]
    run_length = run_length[target_rows]

    df = pd.DataFrame({'row_ids': row_ids, 'run_length': run_length})

    # groupby index of row_ids match the numpy row indexes of timetable.pairwise_available ndarray
    # but there may be missing values of any no-overlap persons pairs
    max_overlap = df.groupby('row_ids').run_length.max()
    # fill in any missing values to align with input arrays
    input_row_ids = np.arange(persons.shape[0])
    max_window = max_overlap.reindex(input_row_ids).fillna(0)

    # FIXME should we return series or ndarray?
    max_window.index = persons.index

    return max_window
Exemple #33
0
def preload_injectables():
    """
    called after pipeline is
    """

    # could simply list injectables as arguments, but this way we can report timing...

    logger.info("preload_injectables")

    t0 = tracing.print_elapsed_time()

    if inject.get_injectable('skim_dict', None) is not None:
        t0 = tracing.print_elapsed_time("preload skim_dict")

    if inject.get_injectable('skim_stack', None) is not None:
        t0 = tracing.print_elapsed_time("preload skim_stack")
def atwork_subtour_destination_sample(tours,
                                      persons_merged,
                                      atwork_subtour_destination_sample_spec,
                                      skim_dict,
                                      destination_size_terms,
                                      chunk_size,
                                      trace_hh_id):

    trace_label = 'atwork_subtour_location_sample'
    model_settings = inject.get_injectable('atwork_subtour_destination_settings')

    persons_merged = persons_merged.to_frame()

    tours = tours.to_frame()
    tours = tours[tours.tour_category == 'subtour']

    # merge persons into tours
    choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True)

    alternatives = destination_size_terms.to_frame()

    constants = config.get_model_constants(model_settings)

    sample_size = model_settings["SAMPLE_SIZE"]
    alt_col_name = model_settings["ALT_COL_NAME"]
    chooser_col_name = 'workplace_taz'

    logger.info("Running atwork_subtour_location_sample with %d persons" % len(choosers))

    # create wrapper with keys for this lookup - in this case there is a workplace_taz
    # in the choosers and a TAZ in the alternatives which get merged during interaction
    # the skims will be available under the name "skims" for any @ expressions
    skims = skim_dict.wrap(chooser_col_name, 'TAZ')

    locals_d = {
        'skims': skims
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    choices = interaction_sample(
        choosers,
        alternatives,
        sample_size=sample_size,
        alt_col_name=alt_col_name,
        spec=atwork_subtour_destination_sample_spec,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_label)

    choices['person_id'] = choosers.person_id
    choices['workplace_taz'] = choosers.workplace_taz

    inject.add_table('atwork_subtour_destination_sample', choices)
Exemple #35
0
def skims_for_logsums(tour_purpose, model_settings, trace_label):

    assert 'LOGSUM_SETTINGS' in model_settings

    network_los = inject.get_injectable('network_los')

    skim_dict = network_los.get_default_skim_dict()

    orig_col_name = 'home_zone_id'
    dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(
        tour_purpose)

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name,
                                               dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name,
                                               dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name,
                                           dest_key=dest_col_name,
                                           tod_key='out_period',
                                           segment_key='demographic_segment',
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name,
                                           dest_key=orig_col_name,
                                           tod_key='in_period',
                                           segment_key='demographic_segment',
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

    return skims
Exemple #36
0
def initialize_traceable_tables():

    traceable_table_ids = inject.get_injectable('traceable_table_ids', {})
    if len(traceable_table_ids) > 0:
        logger.debug(
            f"initialize_traceable_tables resetting table_ids for {list(traceable_table_ids.keys())}"
        )
    inject.add_injectable('traceable_table_ids', {})
def test_missing_table_list(data_dir):

    settings = inject.get_injectable('settings')
    assert isinstance(settings, dict)

    with pytest.raises(AssertionError) as excinfo:
        input.read_input_table('households')
    assert 'no input_table_list found' in str(excinfo.value)
Exemple #38
0
def test_vts():

    inject.add_injectable("settings", {})

    # note: need 0 duration tour on one end of day to guarantee at least one available tour
    alts = pd.DataFrame({"start": [1, 1, 2, 3], "end": [1, 4, 5, 6]})
    alts['duration'] = alts.end - alts.start
    inject.add_injectable("tdd_alts", alts)

    current_tour_person_ids = pd.Series(['b', 'c'], index=['d', 'e'])

    previous_tour_by_personid = pd.Series([2, 2, 1], index=['a', 'b', 'c'])

    prev_tour_attrs = get_previous_tour_by_tourid(current_tour_person_ids,
                                                  previous_tour_by_personid,
                                                  alts)

    pdt.assert_series_equal(
        prev_tour_attrs.start_previous,
        pd.Series([2, 1], index=['d', 'e'], name='start_previous'))

    pdt.assert_series_equal(
        prev_tour_attrs.end_previous,
        pd.Series([5, 4], index=['d', 'e'], name='end_previous'))

    tours = pd.DataFrame({
        "person_id": [1, 1, 2, 3, 3],
        "tour_num": [1, 2, 1, 1, 2],
        "tour_type": ['x', 'x', 'x', 'x', 'x']
    })

    persons = pd.DataFrame({"income": [20, 30, 25]}, index=[1, 2, 3])

    inject.add_table('persons', persons)

    spec = pd.DataFrame({"Coefficient": [1.2]}, index=["income"])
    spec.index.name = "Expression"

    inject.add_injectable("check_for_variability", True)

    timetable = inject.get_injectable("timetable")

    tdd_choices = vectorize_tour_scheduling(tours,
                                            persons,
                                            alts,
                                            timetable,
                                            tour_segments={'spec': spec},
                                            tour_segment_col=None,
                                            model_settings={},
                                            chunk_size=0,
                                            trace_label='test_vts')

    # FIXME - dead reckoning regression
    # there's no real logic here - this is just what came out of the monte carlo
    # note that the result comes out ordered by the nth trips and not ordered
    # by the trip index.  shrug?
    expected = [2, 2, 2, 0, 0]
    assert (tdd_choices.values == expected).all()
Exemple #39
0
def load_shadow_price_calculator(model_settings):
    """
    Initialize ShadowPriceCalculator for model_selector (e.g. school or workplace)

    If multiprocessing, get the shared_data buffer to coordinate global_desired_size
    calculation across sub-processes

    Parameters
    ----------
    model_settings : dict

    Returns
    -------
    spc : ShadowPriceCalculator
    """

    num_processes = inject.get_injectable('num_processes', 1)

    model_selector = model_settings['MODEL_SELECTOR']

    # - get shared_data from data_buffers (if multiprocessing)
    data_buffers = inject.get_injectable('data_buffers', None)
    if data_buffers is not None:
        logger.info('Using existing data_buffers for shadow_price')

        # - shadow_pricing_info
        shadow_pricing_info = inject.get_injectable('shadow_pricing_info', None)
        if shadow_pricing_info is None:
            shadow_pricing_info = get_shadow_pricing_info()
            inject.add_injectable('shadow_pricing_info', shadow_pricing_info)

        # - extract data buffer and reshape as numpy array
        data, lock = \
            shadow_price_data_from_buffers(data_buffers, shadow_pricing_info, model_selector)
    else:
        assert num_processes == 1
        data = None  # ShadowPriceCalculator will allocate its own data
        lock = None

    # - ShadowPriceCalculator
    spc = ShadowPriceCalculator(
        model_settings,
        num_processes, data, lock)

    return spc
Exemple #40
0
def read_raw_persons(households):

    df = read_input_table("persons")

    if inject.get_injectable('households_sliced', False):
        # keep all persons in the sampled households
        df = df[df.household_id.isin(households.index)]

    return df
Exemple #41
0
def build_output_file_path(file_name, use_prefix=None):
    output_dir = inject.get_injectable('output_dir')

    if use_prefix:
        file_name = "%s-%s" % (use_prefix, file_name)

    file_path = os.path.join(output_dir, file_name)

    return file_path
Exemple #42
0
def wrap_skims(model_settings):
    """
    wrap skims of trip destination using origin, dest column names from model settings.
    Various of these are used by destination_sample, compute_logsums, and destination_simulate
    so we create them all here with canonical names.

    Note that compute_logsums aliases their names so it can use the same equations to compute
    logsums from origin to alt_dest, and from alt_dest to primarly destination

    odt_skims - SkimStackWrapper: trip origin, trip alt_dest, time_of_day
    dot_skims - SkimStackWrapper: trip alt_dest, trip origin, time_of_day
    dpt_skims - SkimStackWrapper: trip alt_dest, trip primary_dest, time_of_day
    pdt_skims - SkimStackWrapper: trip primary_dest,trip alt_dest, time_of_day
    od_skims - SkimDictWrapper: trip origin, trip alt_dest
    dp_skims - SkimDictWrapper: trip alt_dest, trip primary_dest

    Parameters
    ----------
    model_settings

    Returns
    -------
        dict containing skims, keyed by canonical names relative to tour orientation
    """

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    o = model_settings['TRIP_ORIGIN']
    d = model_settings['ALT_DEST']
    p = model_settings['PRIMARY_DEST']

    skims = {
        "odt_skims": skim_stack.wrap(left_key=o, right_key=d, skim_key='trip_period'),
        "dot_skims": skim_stack.wrap(left_key=d, right_key=o, skim_key='trip_period'),
        "dpt_skims": skim_stack.wrap(left_key=d, right_key=p, skim_key='trip_period'),
        "pdt_skims": skim_stack.wrap(left_key=p, right_key=d, skim_key='trip_period'),
        "od_skims": skim_dict.wrap(o, d),
        "dp_skims": skim_dict.wrap(d, p),
    }

    return skims
Exemple #43
0
def trace_file_path(file_name):

    output_dir = inject.get_injectable('output_dir')

    # - check for optional trace subfolder
    if os.path.exists(os.path.join(output_dir, 'trace')):
        output_dir = os.path.join(output_dir, 'trace')
    else:
        file_name = "trace.%s" % (file_name,)

    file_path = os.path.join(output_dir, file_name)
    return file_path
Exemple #44
0
def test_mini_pipeline_run2():

    # the important thing here is that we should get
    # exactly the same results as for test_mini_pipeline_run
    # when we restart pipeline

    configs_dir = os.path.join(os.path.dirname(__file__), 'configs')

    setup_dirs(configs_dir)

    inject_settings(configs_dir, households_sample_size=HOUSEHOLDS_SAMPLE_SIZE)

    # should be able to get this BEFORE pipeline is opened
    checkpoints_df = pipeline.get_checkpoints()
    prev_checkpoint_count = len(checkpoints_df.index)

    # print "checkpoints_df\n", checkpoints_df[['checkpoint_name']]
    assert prev_checkpoint_count == 8

    pipeline.open_pipeline('auto_ownership_simulate')

    regress_mini_auto()

    # try to run a model already in pipeline
    with pytest.raises(RuntimeError) as excinfo:
        pipeline.run_model('auto_ownership_simulate')
    assert "run model 'auto_ownership_simulate' more than once" in str(excinfo.value)

    # and these new ones
    pipeline.run_model('cdap_simulate')
    pipeline.run_model('mandatory_tour_frequency')

    regress_mini_mtf()

    # should be able to get this before pipeline is closed (from existing open store)
    checkpoints_df = pipeline.get_checkpoints()
    assert len(checkpoints_df.index) == prev_checkpoint_count

    # - write list of override_hh_ids to override_hh_ids.csv in data for use in next test
    num_hh_ids = 10
    hh_ids = pipeline.get_table("households").head(num_hh_ids).index.values
    hh_ids = pd.DataFrame({'household_id': hh_ids})

    data_dir = inject.get_injectable('data_dir')
    hh_ids.to_csv(os.path.join(data_dir, 'override_hh_ids.csv'), index=False, header=True)

    pipeline.close_pipeline()
    inject.clear_cache()
    close_handlers()
Exemple #45
0
def log_settings(injectables):

    settings = [
        'households_sample_size',
        'chunk_size',
        'multiprocess',
        'num_processes',
        'resume_after',
    ]

    for k in settings:
        logger.info("setting %s: %s" % (k, config.setting(k)))

    for k in injectables:
        logger.info("injectable %s: %s" % (k, inject.get_injectable(k)))
Exemple #46
0
def delete_output_files(file_type, ignore=None, subdir=None):
    """
    Delete files in output directory of specified type

    Parameters
    ----------
    output_dir: str
        Directory of trace output CSVs

    Returns
    -------
    Nothing
    """

    output_dir = inject.get_injectable('output_dir')

    directories = ['', 'log', 'trace']

    for subdir in directories:

        dir = os.path.join(output_dir, subdir) if subdir else output_dir

        if not os.path.exists(dir):
            continue

        if ignore:
            ignore = [os.path.realpath(p) for p in ignore]

        # logger.debug("Deleting %s files in output dir %s" % (file_type, dir))

        for the_file in os.listdir(dir):
            if the_file.endswith(file_type):
                file_path = os.path.join(dir, the_file)

                if ignore and os.path.realpath(file_path) in ignore:
                    logger.debug("delete_output_files ignoring %s" % file_path)
                    continue

                try:
                    if os.path.isfile(file_path):
                        os.unlink(file_path)
                except Exception as e:
                    print(e)
Exemple #47
0
def p2p_time_window_overlap(p1_ids, p2_ids):
    """

    Parameters
    ----------
    p1_ids
    p2_ids

    Returns
    -------

    """

    timetable = inject.get_injectable("timetable")

    assert len(p1_ids) == len(p2_ids)
    # if series, ought to have same index
    assert (p1_ids.index == p2_ids.index).all()

    # ndarray with one row per p2p and one column per time period
    # array value of 1 where overlapping free periods and 0 elsewhere
    available = timetable.pairwise_available(p1_ids, p2_ids)

    row_ids, start_pos, run_length, run_val = rle(available)

    # rle returns all runs, but we only care about runs of available (run_val == 1)
    target_rows = np.where(run_val == 1)
    row_ids = row_ids[target_rows]
    run_length = run_length[target_rows]

    df = pd.DataFrame({'row_ids': row_ids, 'run_length': run_length})

    # groupby index of row_ids match the numpy row indexes of timetable.pairwise_available ndarray
    # but there may be missing values of any no-overlap persons pairs
    max_overlap = df.groupby('row_ids').run_length.max()
    # fill in any missing values to align with input arrays
    input_row_ids = np.arange(len(p1_ids))
    max_overlap = max_overlap.reindex(input_row_ids).fillna(0)

    # FIXME should we return series or ndarray?
    max_overlap.index = p1_ids.index

    return max_overlap
Exemple #48
0
def get_shadow_pricing_info():
    """
    return dict with info about dtype and shapes of desired and modeled size tables

    block shape is (num_zones, num_segments + 1)


    Returns
    -------
    shadow_pricing_info: dict
        dtype: <sp_dtype>,
        block_shapes: dict {<model_selector>: <block_shape>}
    """

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    shadow_settings = config.read_model_settings('shadow_pricing.yaml')

    # shadow_pricing_models is dict of {<model_selector>: <model_name>}
    shadow_pricing_models = shadow_settings['shadow_pricing_models']

    blocks = OrderedDict()
    for model_selector in shadow_pricing_models:

        sp_rows = len(land_use)
        sp_cols = len(size_terms[size_terms.model_selector == model_selector])

        # extra tally column for TALLY_CHECKIN and TALLY_CHECKOUT semaphores
        blocks[block_name(model_selector)] = (sp_rows, sp_cols + 1)

    sp_dtype = np.int64

    shadow_pricing_info = {
        'dtype': sp_dtype,
        'block_shapes': blocks,
    }

    for k in shadow_pricing_info:
        logger.debug("shadow_pricing_info %s: %s" % (k, shadow_pricing_info.get(k)))

    return shadow_pricing_info
Exemple #49
0
def get_cached_spec(hhsize):

    spec_name = cached_spec_name(hhsize)

    spec = inject.get_injectable(spec_name, None)
    if spec is not None:
        logger.info("build_cdap_spec returning cached injectable spec %s", spec_name)
        return spec

    # this is problematic for multiprocessing and since we delete csv files in output_dir
    # at the start of every run, doesn't provide any benefit in single-processing as the
    # cached spec will be available as an injectable to subsequent chunks

    # # try data dir
    # if os.path.exists(config.output_file_path(spec_name)):
    #     spec_path = config.output_file_path(spec_name)
    #     logger.info("build_cdap_spec reading cached spec %s from %s", spec_name, spec_path)
    #     return pd.read_csv(spec_path, index_col='Expression')

    return None
Exemple #50
0
def cascading_input_file_path(file_name, dir_list_injectable_name, mandatory=True):

    dir_list = inject.get_injectable(dir_list_injectable_name)

    if isinstance(dir_list, str):
        dir_list = [dir_list]

    assert isinstance(dir_list, list)

    file_path = None
    for dir in dir_list:
        p = os.path.join(dir, file_name)
        if os.path.isfile(p):
            file_path = p
            break

    if mandatory and not file_path:
        raise RuntimeError("file_path %s: file '%s' not in %s" %
                           (dir_list_injectable_name, file_path, dir_list))

    return file_path
Exemple #51
0
def read_settings_file(file_name, mandatory=True):

    def backfill_settings(settings, backfill):
        new_settings = backfill.copy()
        new_settings.update(settings)
        return new_settings

    configs_dir = inject.get_injectable('configs_dir')

    if isinstance(configs_dir, str):
        configs_dir = [configs_dir]

    assert isinstance(configs_dir, list)

    settings = {}
    for dir in configs_dir:
        file_path = os.path.join(dir, file_name)
        if os.path.exists(file_path):
            if settings:
                logger.debug("read settings for %s from %s" % (file_name, file_path))

            with open(file_path) as f:
                s = yaml.load(f, Loader=yaml.SafeLoader)
            settings = backfill_settings(settings, s)

            if s.get('inherit_settings', False):
                logger.debug("inherit_settings flag set for %s in %s" % (file_name, file_path))
                continue
            else:
                break

    if mandatory and not settings:
        raise RuntimeError("read_settings_file: no settings for '%s' in %s" %
                           (file_name, configs_dir))

    return settings
Exemple #52
0
def interaction_trace_rows(interaction_df, choosers, sample_size=None):
    """
    Trace model design for interaction_simulate

    Parameters
    ----------
    interaction_df: pandas.DataFrame
        traced model_design dataframe
    choosers: pandas.DataFrame
        interaction_simulate choosers
        (needed to filter the model_design dataframe by traced hh or person id)
    sample_size int or None
        int for constant sample size, or None if choosers have different numbers of alternatives
    Returns
    -------
    trace_rows : numpy.ndarray
        array of booleans to flag which rows in interaction_df to trace

    trace_ids : tuple (str,  numpy.ndarray)
        column name and array of trace_ids mapping trace_rows to their target_id
        for use by trace_interaction_eval_results which needs to know target_id
        so it can create separate tables for each distinct target for readability
    """

    # slicer column name and id targets to use for chooser id added to model_design dataframe
    # currently we only ever slice by person_id, but that could change, so we check here...

    traceable_table_ids = inject.get_injectable('traceable_table_ids', {})

    if choosers.index.name == 'person_id' and 'persons' in traceable_table_ids:
        slicer_column_name = choosers.index.name
        targets = traceable_table_ids['persons']
    elif 'household_id' in choosers.columns and 'households' in traceable_table_ids:
        slicer_column_name = 'household_id'
        targets = traceable_table_ids['households']
    elif 'person_id' in choosers.columns and 'persons' in traceable_table_ids:
        slicer_column_name = 'person_id'
        targets = traceable_table_ids['persons']
    else:
        print(choosers.columns)
        raise RuntimeError("interaction_trace_rows don't know how to slice index '%s'"
                           % choosers.index.name)

    if sample_size is None:
        # if sample size not constant, we count on either
        # slicer column being in itneraction_df
        # or index of interaction_df being same as choosers
        if slicer_column_name in interaction_df.columns:
            trace_rows = np.in1d(interaction_df[slicer_column_name], targets)
            trace_ids = interaction_df.loc[trace_rows, slicer_column_name].values
        else:
            assert interaction_df.index.name == choosers.index.name
            trace_rows = np.in1d(interaction_df.index, targets)
            trace_ids = interaction_df[trace_rows].index.values

    else:

        if slicer_column_name == choosers.index.name:
            trace_rows = np.in1d(choosers.index, targets)
            trace_ids = np.asanyarray(choosers[trace_rows].index)
        elif slicer_column_name == 'person_id':
            trace_rows = np.in1d(choosers['person_id'], targets)
            trace_ids = np.asanyarray(choosers[trace_rows].person_id)
        elif slicer_column_name == 'household_id':
            trace_rows = np.in1d(choosers['household_id'], targets)
            trace_ids = np.asanyarray(choosers[trace_rows].household_id)
        else:
            assert False

        # simply repeat if sample size is constant across choosers
        assert sample_size == len(interaction_df.index) / len(choosers.index)
        trace_rows = np.repeat(trace_rows, sample_size)
        trace_ids = np.repeat(trace_ids, sample_size)

    assert type(trace_rows) == np.ndarray
    assert type(trace_ids) == np.ndarray

    trace_ids = (slicer_column_name, trace_ids)

    return trace_rows, trace_ids
def vectorize_joint_tour_scheduling(
        joint_tours, joint_tour_participants,
        persons_merged, alts, spec,
        model_settings,
        chunk_size=0, trace_label=None):
    """
    Like vectorize_tour_scheduling but specifically for joint tours

    joint tours have a few peculiarities necessitating separate treatment:

    Timetable has to be initialized to set all timeperiods...

    Parameters
    ----------
    tours : DataFrame
        DataFrame of tours containing tour attributes, as well as a person_id
        column to define the nth tour for each person.
    persons_merged : DataFrame
        DataFrame of persons containing attributes referenced by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent time slots.  Will be passed to
        interaction_simulate in batches for each nth tour.
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
        (or dict of specs keyed on tour_type if tour_types is not None)
    model_settings : dict

    Returns
    -------
    choices : Series
        A Series of choices where the index is the index of the tours
        DataFrame and the values are the index of the alts DataFrame.
    persons_timetable : TimeTable
        timetable updated with joint tours (caller should replace_table for it to persist)
    """

    trace_label = tracing.extend_trace_label(trace_label, 'vectorize_joint_tour_scheduling')

    assert len(joint_tours.index) > 0
    assert 'tour_num' in joint_tours.columns
    assert 'tour_type' in joint_tours.columns

    timetable_window_id_col = None
    tour_owner_id_col = 'household_id'
    segment = None

    persons_timetable = inject.get_injectable("timetable")
    choice_list = []

    # keep a series of the the most recent tours for each person
    # initialize with first trip from alts
    previous_tour_by_householdid = pd.Series(alts.index[0], index=joint_tours.household_id.unique())

    # tours must be scheduled in increasing trip_num order
    # second trip of type must be in group immediately following first
    # this ought to have been ensured when tours are created (tour_frequency.process_tours)

    # print "participant windows before scheduling\n", \
    #     persons_timetable.slice_windows_by_row_id(joint_tour_participants.person_id)

    for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True):

        tour_trace_label = tracing.extend_trace_label(trace_label, 'tour_%s' % (tour_num,))

        # no more than one tour per household per call to schedule_tours
        assert not nth_tours.household_id.duplicated().any()

        nth_participants = \
            joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)]

        timetable = build_joint_tour_timetables(
            nth_tours, nth_participants,
            persons_timetable, alts)

        choices = \
            schedule_tours(nth_tours,
                           persons_merged, alts,
                           spec, segment,
                           model_settings,
                           timetable, timetable_window_id_col,
                           previous_tour_by_householdid, tour_owner_id_col,
                           chunk_size, tour_trace_label)

        # - update timetables of all joint tour participants
        persons_timetable.assign(
            nth_participants.person_id,
            reindex(choices, nth_participants.tour_id))

        choice_list.append(choices)

    choices = pd.concat(choice_list)

    # add the start, end, and duration from tdd_alts
    # assert (alts.index == list(range(alts.shape[0]))).all()
    tdd = pd.DataFrame(data=alts.values[choices.values],
                       columns=alts.columns,
                       index=choices.index)

    # tdd = alts.loc[choices]
    # tdd.index = choices.index

    tdd.index = choices.index
    # include the index of the choice in the tdd alts table
    tdd['tdd'] = choices

    # print "participant windows after scheduling\n", \
    #     persons_timetable.slice_windows_by_row_id(joint_tour_participants.person_id)

    return tdd, persons_timetable
def vectorize_tour_scheduling(tours, persons_merged, alts,
                              spec, segment_col,
                              model_settings,
                              chunk_size=0, trace_label=None):
    """
    The purpose of this method is fairly straightforward - it takes tours
    and schedules them into time slots.  Alternatives should be specified so
    as to define those time slots (usually with start and end times).

    schedule_tours adds variables that can be used in the spec which have
    to do with the previous tours per person.  Every column in the
    alternatives table is appended with the suffix "_previous" and made
    available.  So if your alternatives table has columns for start and end,
    then start_previous and end_previous will be set to the start and end of
    the most recent tour for a person.  The first time through,
    start_previous and end_previous are undefined, so make sure to protect
    with a tour_num >= 2 in the variable computation.




    Parameters
    ----------
    tours : DataFrame
        DataFrame of tours containing tour attributes, as well as a person_id
        column to define the nth tour for each person.
    persons_merged : DataFrame
        DataFrame of persons containing attributes referenced by expressions in spec
    alts : DataFrame
        DataFrame of alternatives which represent time slots.  Will be passed to
        interaction_simulate in batches for each nth tour.
    spec : DataFrame
        The spec which will be passed to interaction_simulate.
        (or dict of specs keyed on tour_type if tour_types is not None)
    model_settings : dict

    Returns
    -------
    choices : Series
        A Series of choices where the index is the index of the tours
        DataFrame and the values are the index of the alts DataFrame.
    timetable : TimeTable
        persons timetable updated with tours (caller should replace_table for it to persist)
    """

    trace_label = tracing.extend_trace_label(trace_label, 'vectorize_tour_scheduling')

    assert len(tours.index) > 0
    assert 'tour_num' in tours.columns
    assert 'tour_type' in tours.columns

    # tours must be scheduled in increasing trip_num order
    # second trip of type must be in group immediately following first
    # this ought to have been ensured when tours are created (tour_frequency.process_tours)

    timetable = inject.get_injectable("timetable")
    choice_list = []

    # keep a series of the the most recent tours for each person
    # initialize with first trip from alts
    previous_tour_by_personid = pd.Series(alts.index[0], index=tours.person_id.unique())

    timetable_window_id_col = 'person_id'
    tour_owner_id_col = 'person_id'

    # no more than one tour per person per call to schedule_tours
    # tours must be scheduled in increasing trip_num order
    # second trip of type must be in group immediately following first
    # segregate scheduling by tour_type if multiple specs passed in dict keyed by tour_type

    for tour_num, nth_tours in tours.groupby('tour_num', sort=True):

        tour_trace_label = tracing.extend_trace_label(trace_label, 'tour_%s' % (tour_num,))

        if isinstance(spec, dict):

            assert segment_col is not None

            for spec_segment in spec:

                segment_trace_label = tracing.extend_trace_label(tour_trace_label, spec_segment)

                in_segment = nth_tours[segment_col] == spec_segment

                if not in_segment.any():
                    logger.info("skipping empty segment %s")
                    continue

                # assume segmentation of spec and logsum coefficients are aligned
                logsum_tour_purpose = spec_segment

                choices = \
                    schedule_tours(nth_tours[in_segment],
                                   persons_merged, alts,
                                   spec[spec_segment], logsum_tour_purpose,
                                   model_settings,
                                   timetable, timetable_window_id_col,
                                   previous_tour_by_personid, tour_owner_id_col,
                                   chunk_size,
                                   segment_trace_label)

                choice_list.append(choices)

        else:

            # unsegmented spec dict indicates no logsums
            # caller could use single-element spec dict if logsum support desired,
            # but this case nor required for mtctm1
            assert segment_col is None
            logsum_segment = None

            choices = \
                schedule_tours(nth_tours,
                               persons_merged, alts,
                               spec, logsum_segment,
                               model_settings,
                               timetable, timetable_window_id_col,
                               previous_tour_by_personid, tour_owner_id_col,
                               chunk_size,
                               tour_trace_label)

            choice_list.append(choices)

    choices = pd.concat(choice_list)

    # add the start, end, and duration from tdd_alts
    # use np instead of (slower) loc[] since alts has rangeindex
    tdd = pd.DataFrame(data=alts.values[choices.values],
                       columns=alts.columns,
                       index=choices.index)

    # tdd = alts.loc[choices]
    # tdd.index = choices.index

    # include the index of the choice in the tdd alts table
    tdd['tdd'] = choices

    return tdd, timetable
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info("%s compute_logsums for %d choosers%s alts" %
                (trace_label, choosers.shape[0], alt_tdd.shape[0]))

    # - setup skims

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    orig_col_name = 'TAZ'
    dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(tour_purpose)

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
    }

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)

    omnibus_coefficient_spec = get_coeffecients_spec(logsum_settings)
    coefficient_spec = omnibus_coefficient_spec[tour_purpose]
    coefficients = assign.evaluate_constants(coefficient_spec, constants=constants)

    locals_dict = {}
    locals_dict.update(coefficients)
    locals_dict.update(constants)
    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    # - compute logsums
    logsum_spec = get_logsum_spec(logsum_settings)
    nest_spec = config.get_logit_model_settings(logsum_settings)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=0,
        trace_label=trace_label)

    return logsums
Exemple #56
0
def run_trip_destination(
        trips,
        tours_merged,
        chunk_size, trace_hh_id,
        trace_label):
    """
    trip destination - main functionality separated from model step so it can be called iteratively

    Run the trip_destination model, assigning destinations for each (intermediate) trip
    (last trips already have a destination - either the tour primary destination or Home)

    Set trip destination and origin columns, and a boolean failed flag for any failed trips
    (destination for flagged failed trips will be set to -1)

    Parameters
    ----------
    trips
    tours_merged
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------

    """

    model_settings = config.read_model_settings('trip_destination.yaml')
    preprocessor_settings = model_settings.get('preprocessor', None)
    logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS'])

    land_use = inject.get_table('land_use')
    size_terms = inject.get_injectable('size_terms')

    # - initialize trip origin and destination to those of half-tour
    # (we will sequentially adjust intermediate trips origin and destination as we choose them)
    tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int)
    tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int)
    trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin)
    trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination)
    trips['failed'] = False

    trips = trips.sort_index()
    trips['next_trip_id'] = np.roll(trips.index, -1)
    trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0)

    # - filter tours_merged (AFTER copying destination and origin columns to trips)
    # tours_merged is used for logsums, we filter it here upfront to save space and time
    tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS']
    if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings:
        redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS']
        tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols]
    tours_merged = tours_merged[tours_merged_cols]

    # - skims
    skims = wrap_skims(model_settings)

    # - size_terms and alternatives
    alternatives = tour_destination_size_terms(land_use, size_terms, 'trip')

    # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose
    # e.g. size_terms.get(df.dest_taz, df.purpose)
    # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index
    size_term_matrix = DataFrameMatrix(alternatives)

    # don't need size terms in alternatives, just TAZ index
    alternatives = alternatives.drop(alternatives.columns, axis=1)
    alternatives.index.name = model_settings['ALT_DEST']

    # - process intermediate trips in ascending trip_num order
    intermediate = trips.trip_num < trips.trip_count
    if intermediate.any():

        first_trip_num = trips[intermediate].trip_num.min()
        last_trip_num = trips[intermediate].trip_num.max()

        # iterate over trips in ascending trip_num order
        for trip_num in range(first_trip_num, last_trip_num + 1):

            nth_trips = trips[intermediate & (trips.trip_num == trip_num)]
            nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num)

            # - annotate nth_trips
            if preprocessor_settings:
                expressions.assign_columns(
                    df=nth_trips,
                    model_settings=preprocessor_settings,
                    locals_dict=config.get_model_constants(model_settings),
                    trace_label=nth_trace_label)

            logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0])

            # - choose destination for nth_trips, segmented by primary_purpose
            choices_list = []
            for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'):
                choices = choose_trip_destination(
                    primary_purpose,
                    trips_segment,
                    alternatives,
                    tours_merged,
                    model_settings,
                    size_term_matrix, skims,
                    chunk_size, trace_hh_id,
                    trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose))

                choices_list.append(choices)

            destinations = pd.concat(choices_list)

            failed_trip_ids = nth_trips.index.difference(destinations.index)
            if failed_trip_ids.any():
                logger.warning("%s sidelining %s trips without viable destination alternatives" %
                               (nth_trace_label, failed_trip_ids.shape[0]))
                next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids)
                trips.loc[failed_trip_ids, 'failed'] = True
                trips.loc[failed_trip_ids, 'destination'] = -1
                trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values

            # - assign choices to these trips destinations and to next trips origin
            assign_in_place(trips, destinations.to_frame('destination'))
            destinations.index = nth_trips.next_trip_id.reindex(destinations.index)
            assign_in_place(trips, destinations.to_frame('origin'))

    del trips['next_trip_id']

    return trips
Exemple #57
0
 def override_setting(key, value):
     new_settings = inject.get_injectable('settings')
     new_settings[key] = value
     inject.add_injectable('settings', new_settings)
Exemple #58
0
def setting(key, default=None):

    return inject.get_injectable('settings').get(key, default)
Exemple #59
0
def add_size_tables():
    """
    inject tour_destination_size_terms tables for each model_selector (e.g. school, workplace)

    Size tables are pandas dataframes with locations counts for model_selector by zone and segment
    tour_destination_size_terms

    if using shadow pricing, we scale size_table counts to sample population
    (in which case, they have to be created while single-process)

    Scaling is problematic as it breaks household result replicability across sample sizes
    It also changes the magnitude of the size terms so if they are used as utilities in
    expression files, their importance will diminish relative to other utilities as the sample
    size decreases.

    Scaling makes most sense for a full sample in conjunction with shadow pricing, where
    shadow prices can be adjusted iteratively to bring modelled counts into line with desired
    (size table) counts.
    """

    use_shadow_pricing = bool(config.setting('use_shadow_pricing'))

    shadow_settings = config.read_model_settings('shadow_pricing.yaml')
    shadow_pricing_models = shadow_settings['shadow_pricing_models']

    # probably ought not scale if not shadow_pricing (breaks partial sample replicability)
    # but this allows compatability with existing CTRAMP behavior...
    scale_size_table = shadow_settings.get('SCALE_SIZE_TABLE', False)

    if shadow_pricing_models is None:
        logger.warning('shadow_pricing_models list not found in shadow_pricing settings')
        return

    # shadow_pricing_models is dict of {<model_selector>: <model_name>}
    # since these are scaled to model size, they have to be created while single-process

    for model_selector, model_name in iteritems(shadow_pricing_models):

        model_settings = config.read_model_settings(model_name)

        assert model_selector == model_settings['MODEL_SELECTOR']

        segment_ids = model_settings['SEGMENT_IDS']
        chooser_table_name = model_settings['CHOOSER_TABLE_NAME']
        chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME']

        choosers_df = inject.get_table(chooser_table_name).to_frame()
        if 'CHOOSER_FILTER_COLUMN_NAME' in model_settings:
            choosers_df = \
                choosers_df[choosers_df[model_settings['CHOOSER_FILTER_COLUMN_NAME']] != 0]

        # - raw_desired_size
        land_use = inject.get_table('land_use')
        size_terms = inject.get_injectable('size_terms')
        raw_size = tour_destination_size_terms(land_use, size_terms, model_selector)
        assert set(raw_size.columns) == set(segment_ids.keys())

        if use_shadow_pricing or scale_size_table:

            inject.add_table('raw_' + size_table_name(model_selector), raw_size)

            # - scale size_table counts to sample population
            # scaled_size = zone_size * (total_segment_modeled / total_segment_desired)

            # segment scale factor (modeled / desired) keyed by segment_name
            segment_scale_factors = {}
            for c in raw_size:
                # number of zone demographics desired destination choices
                segment_desired_size = raw_size[c].astype(np.float64).sum()

                # number of synthetic population choosers in segment
                segment_chooser_count = \
                    (choosers_df[chooser_segment_column] == segment_ids[c]).sum()

                segment_scale_factors[c] = \
                    segment_chooser_count / np.maximum(segment_desired_size, 1)

                logger.info("add_desired_size_tables %s segment %s "
                            "desired %s modeled %s scale_factor %s" %
                            (chooser_table_name, c,
                             segment_desired_size,
                             segment_chooser_count,
                             segment_scale_factors[c]))

            # FIXME - should we be rounding?
            scaled_size = (raw_size * segment_scale_factors).round()
        else:
            scaled_size = raw_size

        inject.add_table(size_table_name(model_selector), scaled_size)
Exemple #60
0
def pipeline_file_path(file_name):

    prefix = inject.get_injectable('pipeline_file_prefix', None)
    return build_output_file_path(file_name, use_prefix=prefix)