def get_alts_from_segmented_nested_logit(model_settings, segment_name,
                                         trace_label):
    """Infer alts from logit spec

    Parameters
    ----------
    model_settings : dict
    segment_column_name : str
    trace_label : str

    Returns
    -------
    list
    """

    nest_spec = config.get_logit_model_settings(model_settings)
    coefficients = simulate.get_segment_coefficients(model_settings,
                                                     segment_name)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)
    tour_mode_alts = []
    for nest in logit.each_nest(nest_spec):
        if nest.is_leaf:
            tour_mode_alts.append(nest.name)

    return tour_mode_alts
Beispiel #2
0
def compute_logsums(primary_purpose, trips, destination_sample, tours_merged,
                    model_settings, skim_hotel, chunk_size, trace_label):
    """
    Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice
    for each alternative since we need out-of-direction logsum
    (i.e . origin to alt_dest, and alt_dest to half-tour destination)

    Returns
    -------
        adds od_logsum and dp_logsum columns to trips (in place)
    """
    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.info("Running %s with %d samples", trace_label,
                destination_sample.shape[0])

    # chunk usage is uniform so better to combine
    chunk_tag = 'trip_destination.compute_logsums'

    # FIXME should pass this in?
    network_los = inject.get_injectable('network_los')

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    # - choosers - merge destination_sample and trips_merged
    # re/set index because pandas merge does not preserve left index if it has duplicate values!
    choosers = pd.merge(destination_sample,
                        trips_merged.reset_index(),
                        left_index=True,
                        right_on='trip_id',
                        how="left",
                        suffixes=('', '_r')).set_index('trip_id')
    assert choosers.index.equals(destination_sample.index)

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])
    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     primary_purpose)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)

    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    locals_dict = {}
    locals_dict.update(config.get_model_constants(logsum_settings))

    # coefficients can appear in expressions
    locals_dict.update(coefficients)

    skims = skim_hotel.logsum_skims()
    if network_los.zone_system == los.THREE_ZONE:
        # TVPB constants can appear in expressions
        locals_dict.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    # - od_logsums
    od_skims = {
        'ORIGIN': model_settings['TRIP_ORIGIN'],
        'DESTINATION': model_settings['ALT_DEST_COL_NAME'],
        "odt_skims": skims['odt_skims'],
        "dot_skims": skims['dot_skims'],
        "od_skims": skims['od_skims'],
    }
    if network_los.zone_system == los.THREE_ZONE:
        od_skims.update({
            'tvpb_logsum_odt': skims['tvpb_logsum_odt'],
            'tvpb_logsum_dot': skims['tvpb_logsum_dot']
        })
    destination_sample['od_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        nest_spec,
        logsum_spec,
        od_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'od'),
        chunk_tag=chunk_tag)

    # - dp_logsums
    dp_skims = {
        'ORIGIN': model_settings['ALT_DEST_COL_NAME'],
        'DESTINATION': model_settings['PRIMARY_DEST'],
        "odt_skims": skims['dpt_skims'],
        "dot_skims": skims['pdt_skims'],
        "od_skims": skims['dp_skims'],
    }
    if network_los.zone_system == los.THREE_ZONE:
        dp_skims.update({
            'tvpb_logsum_odt': skims['tvpb_logsum_dpt'],
            'tvpb_logsum_dot': skims['tvpb_logsum_pdt']
        })

    destination_sample['dp_logsum'] = compute_ood_logsums(
        choosers,
        logsum_settings,
        nest_spec,
        logsum_spec,
        dp_skims,
        locals_dict,
        chunk_size,
        trace_label=tracing.extend_trace_label(trace_label, 'dp'),
        chunk_tag=chunk_tag)

    return destination_sample
Beispiel #3
0
def run_tour_mode_choice_simulate(choosers,
                                  tour_purpose,
                                  model_settings,
                                  mode_column_name,
                                  logsum_column_name,
                                  network_los,
                                  skims,
                                  constants,
                                  estimator,
                                  chunk_size,
                                  trace_label=None,
                                  trace_choice_name=None):
    """
    This is a utility to run a mode choice model for each segment (usually
    segments are tour/trip purposes).  Pass in the tours/trip that need a mode,
    the Skim object, the spec to evaluate with, and any additional expressions
    you want to use in the evaluation of variables.
    """

    spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    coefficients = simulate.get_segment_coefficients(model_settings,
                                                     tour_purpose)

    spec = simulate.eval_coefficients(spec, coefficients, estimator)

    nest_spec = config.get_logit_model_settings(model_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(skims)

    # coefficients can appear in expressions
    locals_dict.update(coefficients)

    assert ('in_period' not in choosers) and ('out_period' not in choosers)
    in_time = skims['in_time_col_name']
    out_time = skims['out_time_col_name']
    choosers['in_period'] = network_los.skim_time_period_label(
        choosers[in_time])
    choosers['out_period'] = network_los.skim_time_period_label(
        choosers[out_time])

    expressions.annotate_preprocessors(choosers, locals_dict, skims,
                                       model_settings, trace_label)

    trace_column_names = choosers.index.name
    assert trace_column_names == 'tour_id'
    if trace_column_names not in choosers:
        choosers[trace_column_names] = choosers.index

    if estimator:
        # write choosers after annotation
        estimator.write_choosers(choosers)

    choices = mode_choice_simulate(choosers=choosers,
                                   spec=spec,
                                   nest_spec=nest_spec,
                                   skims=skims,
                                   locals_d=locals_dict,
                                   chunk_size=chunk_size,
                                   mode_column_name=mode_column_name,
                                   logsum_column_name=logsum_column_name,
                                   trace_label=trace_label,
                                   trace_choice_name=trace_choice_name,
                                   trace_column_names=trace_column_names,
                                   estimator=estimator)

    return choices
Beispiel #4
0
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     network_los, skims, trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info(
        f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts"
    )

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)
    locals_dict = {}
    locals_dict.update(constants)

    if network_los.zone_system == los.THREE_ZONE:
        # TVPB constants can appear in expressions
        locals_dict.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    locals_dict.update(skims)

    # constrained coefficients can appear in expressions
    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     tour_purpose)
    locals_dict.update(coefficients)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    # - compute logsums
    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                trace_label)

    logsums = simulate.simple_simulate_logsums(choosers,
                                               logsum_spec,
                                               nest_spec,
                                               skims=skims,
                                               locals_d=locals_dict,
                                               chunk_size=0,
                                               trace_label=trace_label)

    return logsums
Beispiel #5
0
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings,
                     trace_label):
    """
    compute logsums for tours using skims for alt_tdd out_period and in_period
    """

    trace_label = tracing.extend_trace_label(trace_label, 'logsums')

    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])

    choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser')
    logger.info("%s compute_logsums for %d choosers%s alts" %
                (trace_label, choosers.shape[0], alt_tdd.shape[0]))

    # - setup skims

    skim_dict = inject.get_injectable('skim_dict')
    skim_stack = inject.get_injectable('skim_stack')

    orig_col_name = 'TAZ'
    dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(
        tour_purpose)

    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='in_period')
    odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name,
                                             right_key=dest_col_name,
                                             skim_key='in_period')
    dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name,
                                             right_key=orig_col_name,
                                             skim_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name,
    }

    # - locals_dict
    constants = config.get_model_constants(logsum_settings)

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(df=choosers,
                                   model_settings=preprocessor_settings,
                                   locals_dict=locals_dict,
                                   trace_label=trace_label)

    # - compute logsums

    coefficients = simulate.get_segment_coefficients(logsum_settings,
                                                     tour_purpose)
    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    logsum_spec = simulate.eval_coefficients(logsum_spec,
                                             coefficients,
                                             estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients)

    # constrained coefficients can appear in expressions
    locals_dict.update(coefficients)

    logsums = simulate.simple_simulate_logsums(choosers,
                                               logsum_spec,
                                               nest_spec,
                                               skims=skims,
                                               locals_d=locals_dict,
                                               chunk_size=0,
                                               trace_label=trace_label)

    return logsums
def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.)

    Adds trip_mode column to trip table
    """

    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    # give trip mode choice the option to run without calling tours_merged. Useful for xborder
    # model where tour_od_choice needs trip mode choice logsums before some of the join keys
    # needed by tour_merged (e.g. home_zone_id) exist
    tours_cols = [
        col for col in model_settings['TOURS_MERGED_CHOOSER_COLUMNS']
        if col not in trips_df.columns
    ]
    if len(tours_cols) > 0:
        tours_merged = inject.get_table('tours_merged').to_frame(
            columns=tours_cols)
    else:
        tours_merged = pd.DataFrame()

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips_df,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose,
                          value_counts=True)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(
        trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'
    min_per_period = network_los.skim_time_periods['period_minutes']
    periods_per_hour = 60 / min_per_period

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({
        'ORIGIN': orig_col,
        'DESTINATION': dest_col,
        'MIN_PER_PERIOD': min_per_period,
        'PERIODS_PER_HOUR': periods_per_hour
    })

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col,
                                               dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col,
                                               dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb
        tvpb_recipe = model_settings.get('TVPB_recipe', 'tour_mode_choice')
        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col,
                                           dest_key=dest_col,
                                           tod_key='trip_period',
                                           segment_key='demographic_segment',
                                           recipe=tvpb_recipe,
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
        })

        # This if-clause gives the user the option of NOT inheriting constants
        # from the tvpb settings. previously, these constants were inherited
        # automatically, which had the undesirable effect of overwriting any
        # trip mode choice model constants/coefficients that shared the same
        # name. The default behavior is still the same (True), but the user
        # can now avoid any chance of squashing these local variables by
        # adding `use_TVPB_constants: False` to the trip_mode_choice.yaml file.
        # the tvpb will still use the constants as defined in the recipe
        # specified above in `tvpb.wrap_logsum()` but they will not be used
        # in the trip mode choice expressions.
        if model_settings.get('use_TVPB_constants', True):
            constants.update(
                network_los.setting(
                    'TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    # don't create estimation data bundle if trip mode choice is being called
    # from another model step (e.g. tour mode choice logsum creation)
    if pipeline._PIPELINE.rng().step_name != 'trip_mode_choice':
        estimator = None
    else:
        estimator = estimation.manager.begin_estimation('trip_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby(
            'primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(
            trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (
            primary_purpose,
            len(trips_segment.index),
        ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        coefficients = simulate.get_segment_coefficients(
            model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)

        constants_keys = constants.keys()
        if any([coeff in constants_keys for coeff in coefficients.keys()]):
            logger.warning(
                "coefficients are obscuring constants in locals_dict")
        locals_dict.update(coefficients)

        # have to initialize chunker for preprocessing in order to access
        # tvpb logsum terms in preprocessor expressions.
        with chunk.chunk_log(tracing.extend_trace_label(
                trace_label, 'preprocessing'),
                             base=True):
            expressions.annotate_preprocessors(trips_segment, locals_dict,
                                               skims, model_settings,
                                               segment_trace_label)

        if estimator:
            # write choosers after annotation
            estimator.write_choosers(trips_segment)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=simulate.eval_coefficients(model_spec, coefficients,
                                            estimator),
            nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                      segment_trace_label),
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=segment_trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[
                        dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                            skim_cache[c]) else ''
                choices_df[dest_col].where(
                    choices_df[mode_column_name] != mode,
                    skim_cache[c],
                    inplace=True)

    if estimator:
        estimator.write_choices(choices_df.trip_mode)
        choices_df.trip_mode = estimator.get_survey_values(
            choices_df.trip_mode, 'trips', 'trip_mode')
        estimator.write_override_choices(choices_df.trip_mode)
        estimator.end_estimation()
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('trip_modes',
                          trips_merged.tour_mode,
                          value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name],
                          value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Beispiel #7
0
def compute_logsums(choosers,
                    tour_purpose,
                    logsum_settings, model_settings,
                    network_los,
                    chunk_size,
                    chunk_tag,
                    trace_label):
    """

    Parameters
    ----------
    choosers
    tour_purpose
    logsum_settings
    model_settings
    network_los
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------
    logsums: pandas series
        computed logsums with same index as choosers
    """

    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')
    logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0])

    # compute_logsums needs to know name of dest column in interaction_sample
    orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    dest_col_name = model_settings['ALT_DEST_COL_NAME']

    # FIXME - are we ok with altering choosers (so caller doesn't have to set these)?
    assert ('in_period' not in choosers) and ('out_period' not in choosers)
    choosers['in_period'] = network_los.skim_time_period_label(model_settings['IN_PERIOD'])
    choosers['out_period'] = network_los.skim_time_period_label(model_settings['OUT_PERIOD'])

    assert ('duration' not in choosers)
    choosers['duration'] = model_settings['IN_PERIOD'] - model_settings['OUT_PERIOD']

    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose)

    logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label)

    locals_dict = {}
    # model_constants can appear in expressions
    locals_dict.update(config.get_model_constants(logsum_settings))
    # constrained coefficients can appear in expressions
    locals_dict.update(coefficients)

    # setup skim keys
    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name,
                                               dim3_key='out_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name,
                                               dim3_key='in_period')
    odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name,
                                               dim3_key='in_period')
    dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name,
                                               dim3_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name,
                                           tod_key='out_period', segment_key='demographic_segment',
                                           trace_label=trace_label, tag='tvpb_logsum_odt')
        tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name,
                                           tod_key='in_period', segment_key='demographic_segment',
                                           trace_label=trace_label, tag='tvpb_logsum_dot')

        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        locals_dict.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    locals_dict.update(skims)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        chunk_tag=chunk_tag,
        trace_label=trace_label)

    return logsums
def trip_mode_choice(trips, tours_merged, network_los, chunk_size,
                     trace_hh_id):
    """
    Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip.

    Modes for each primary tour putpose are calculated separately because they have different
    coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.)

    Adds trip_mode column to trip table
    """
    trace_label = 'trip_mode_choice'
    model_settings_file_name = 'trip_mode_choice.yaml'
    model_settings = config.read_model_settings(model_settings_file_name)

    logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME')
    mode_column_name = 'trip_mode'

    trips_df = trips.to_frame()
    logger.info("Running %s with %d trips", trace_label, trips_df.shape[0])

    tours_merged = tours_merged.to_frame()
    tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']]

    tracing.print_summary('primary_purpose',
                          trips_df.primary_purpose,
                          value_counts=True)

    # - trips_merged - merge trips and tours_merged
    trips_merged = pd.merge(trips_df,
                            tours_merged,
                            left_on='tour_id',
                            right_index=True,
                            how="left")
    assert trips_merged.index.equals(trips.index)

    # setup skim keys
    assert ('trip_period' not in trips_merged)
    trips_merged['trip_period'] = network_los.skim_time_period_label(
        trips_merged.depart)

    orig_col = 'origin'
    dest_col = 'destination'

    constants = {}
    constants.update(config.get_model_constants(model_settings))
    constants.update({'ORIGIN': orig_col, 'DESTINATION': dest_col})

    skim_dict = network_los.get_default_skim_dict()

    odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col,
                                               dest_key=dest_col,
                                               dim3_key='trip_period')
    dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col,
                                               dest_key=orig_col,
                                               dim3_key='trip_period')
    od_skim_wrapper = skim_dict.wrap('origin', 'destination')

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_wrapper,
    }

    if network_los.zone_system == los.THREE_ZONE:
        # fixme - is this a lightweight object?
        tvpb = network_los.tvpb

        tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col,
                                           dest_key=dest_col,
                                           tod_key='trip_period',
                                           segment_key='demographic_segment',
                                           cache_choices=True,
                                           trace_label=trace_label,
                                           tag='tvpb_logsum_odt')
        skims.update({
            'tvpb_logsum_odt': tvpb_logsum_odt,
            # 'tvpb_logsum_dot': tvpb_logsum_dot
        })

        # TVPB constants can appear in expressions
        constants.update(
            network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS'))

    estimator = estimation.manager.begin_estimation('trip_mode_choice')
    if estimator:
        estimator.write_coefficients(model_settings=model_settings)
        estimator.write_coefficients_template(model_settings=model_settings)
        estimator.write_spec(model_settings)
        estimator.write_model_settings(model_settings,
                                       model_settings_file_name)

    model_spec = simulate.read_model_spec(file_name=model_settings['SPEC'])
    nest_spec = config.get_logit_model_settings(model_settings)

    choices_list = []
    for primary_purpose, trips_segment in trips_merged.groupby(
            'primary_purpose'):

        segment_trace_label = tracing.extend_trace_label(
            trace_label, primary_purpose)

        logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (
            primary_purpose,
            len(trips_segment.index),
        ))

        # name index so tracing knows how to slice
        assert trips_segment.index.name == 'trip_id'

        if network_los.zone_system == los.THREE_ZONE:
            tvpb_logsum_odt.extend_trace_label(primary_purpose)
            # tvpb_logsum_dot.extend_trace_label(primary_purpose)

        coefficients = simulate.get_segment_coefficients(
            model_settings, primary_purpose)

        locals_dict = {}
        locals_dict.update(constants)
        locals_dict.update(coefficients)

        expressions.annotate_preprocessors(trips_segment, locals_dict, skims,
                                           model_settings, segment_trace_label)

        if estimator:
            # write choosers after annotation
            estimator.write_choosers(trips_segment)

        locals_dict.update(skims)

        choices = mode_choice_simulate(
            choosers=trips_segment,
            spec=simulate.eval_coefficients(model_spec, coefficients,
                                            estimator),
            nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                      segment_trace_label),
            skims=skims,
            locals_d=locals_dict,
            chunk_size=chunk_size,
            mode_column_name=mode_column_name,
            logsum_column_name=logsum_column_name,
            trace_label=trace_label,
            trace_choice_name='trip_mode_choice',
            estimator=estimator)

        if trace_hh_id:
            # trace the coefficients
            tracing.trace_df(pd.Series(locals_dict),
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'constants'),
                             transpose=False,
                             slicer='NONE')

            # so we can trace with annotations
            assign_in_place(trips_segment, choices)

            tracing.trace_df(trips_segment,
                             label=tracing.extend_trace_label(
                                 segment_trace_label, 'trip_mode'),
                             slicer='tour_id',
                             index_label='tour_id',
                             warn_if_empty=True)

        choices_list.append(choices)

        # FIXME - force garbage collection
        force_garbage_collect()

    choices_df = pd.concat(choices_list)

    # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types
    if network_los.zone_system == los.THREE_ZONE:

        tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types')
        for mode, path_type in tvpb_mode_path_types.items():

            skim_cache = tvpb_logsum_odt.cache[path_type]

            for c in skim_cache:
                dest_col = c
                if dest_col not in choices_df:
                    choices_df[
                        dest_col] = np.nan if pd.api.types.is_numeric_dtype(
                            skim_cache[c]) else ''
                choices_df[dest_col].where(
                    choices_df[mode_column_name] != mode,
                    skim_cache[c],
                    inplace=True)

    if estimator:
        estimator.write_choices(choices_df.trip_mode)
        choices_df.trip_mode = estimator.get_survey_values(
            choices_df.trip_mode, 'trips', 'trip_mode')
        estimator.write_override_choices(choices_df.trip_mode)
        estimator.end_estimation()

    # update trips table with choices (and potionally logssums)
    trips_df = trips.to_frame()
    assign_in_place(trips_df, choices_df)

    tracing.print_summary('trip_modes',
                          trips_merged.tour_mode,
                          value_counts=True)

    tracing.print_summary('trip_mode_choice choices',
                          trips_df[mode_column_name],
                          value_counts=True)

    assert not trips_df[mode_column_name].isnull().any()

    pipeline.replace_table("trips", trips_df)

    if trace_hh_id:
        tracing.trace_df(trips_df,
                         label=tracing.extend_trace_label(
                             trace_label, 'trip_mode'),
                         slicer='trip_id',
                         index_label='trip_id',
                         warn_if_empty=True)
Beispiel #9
0
def run_od_logsums(spec_segment_name, tours_merged_df, od_sample,
                   model_settings, network_los, estimator, chunk_size,
                   trace_hh_id, trace_label):
    """
    add logsum column to existing tour_destination_sample table

    logsum is calculated by running the mode_choice model for each sample
    (person, OD_id) pair in od_sample, and computing the logsum of all the utilities
    """
    chunk_tag = 'tour_od.logsums'
    logsum_settings = config.read_model_settings(
        model_settings['LOGSUM_SETTINGS'])
    origin_id_col = model_settings['ORIG_COL_NAME']
    dest_id_col = model_settings['DEST_COL_NAME']
    tour_od_id_col = get_od_id_col(origin_id_col, dest_id_col)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    tours_merged_df = \
        logsum.filter_chooser_columns(tours_merged_df, logsum_settings, model_settings)

    # merge ods into choosers table
    choosers = od_sample.join(tours_merged_df, how='left')
    choosers[tour_od_id_col] = choosers[origin_id_col].astype(
        str) + '_' + choosers[dest_id_col].astype(str)

    logger.info("Running %s with %s rows", trace_label, len(choosers))

    tracing.dump_df(DUMP, choosers, trace_label, 'choosers')

    # run trip mode choice to compute tour mode choice logsums
    if logsum_settings.get('COMPUTE_TRIP_MODE_CHOICE_LOGSUMS', False):

        pseudo_tours = choosers.copy()
        trip_mode_choice_settings = config.read_model_settings(
            'trip_mode_choice')

        # tours_merged table doesn't yet have all the cols it needs to be called (e.g.
        # home_zone_id), so in order to compute tour mode choice/trip mode choice logsums
        # in this step we have to pass all tour-level attributes in with the main trips
        # table. see trip_mode_choice.py L56-61 for more details.
        tour_cols_needed = trip_mode_choice_settings.get(
            'TOURS_MERGED_CHOOSER_COLUMNS', [])
        tour_cols_needed.append(tour_od_id_col)

        # from tour_mode_choice.py
        not_university = (pseudo_tours.tour_type !=
                          'school') | ~pseudo_tours.is_university
        pseudo_tours['tour_purpose'] = \
            pseudo_tours.tour_type.where(not_university, 'univ')

        pseudo_tours['stop_frequency'] = '0out_0in'
        pseudo_tours['primary_purpose'] = pseudo_tours['tour_purpose']
        choosers_og_index = choosers.index.name
        pseudo_tours.reset_index(inplace=True)
        pseudo_tours.index.name = 'unique_id'

        # need dest_id_col to create dest col in trips, but need to preserve
        # tour dest as separate column in the trips table bc the trip mode choice
        # preprocessor isn't able to get the tour dest from the tours table bc the
        # tours don't yet have ODs.
        stop_frequency_alts = inject.get_injectable('stop_frequency_alts')
        pseudo_tours['tour_destination'] = pseudo_tours[dest_id_col]
        trips = trip.initialize_from_tours(
            pseudo_tours, stop_frequency_alts,
            [origin_id_col, dest_id_col, 'tour_destination', 'unique_id'])
        outbound = trips['outbound']
        trips['depart'] = reindex(pseudo_tours.start, trips.unique_id)
        trips.loc[~outbound,
                  'depart'] = reindex(pseudo_tours.end, trips.loc[~outbound,
                                                                  'unique_id'])

        logsum_trips = pd.DataFrame()
        nest_spec = config.get_logit_model_settings(logsum_settings)

        # actual coeffs dont matter here, just need them to load the nest structure
        coefficients = simulate.get_segment_coefficients(
            logsum_settings, pseudo_tours.iloc[0]['tour_purpose'])
        nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients,
                                                    trace_label)
        tour_mode_alts = []
        for nest in logit.each_nest(nest_spec):
            if nest.is_leaf:
                tour_mode_alts.append(nest.name)

        # repeat rows from the trips table iterating over tour mode
        for tour_mode in tour_mode_alts:
            trips['tour_mode'] = tour_mode
            logsum_trips = pd.concat((logsum_trips, trips), ignore_index=True)
        assert len(logsum_trips) == len(trips) * len(tour_mode_alts)
        logsum_trips.index.name = 'trip_id'

        for col in tour_cols_needed:
            if col not in trips:
                logsum_trips[col] = reindex(pseudo_tours[col],
                                            logsum_trips.unique_id)

        pipeline.replace_table('trips', logsum_trips)
        tracing.register_traceable_table('trips', logsum_trips)
        pipeline.get_rn_generator().add_channel('trips', logsum_trips)

        # run trip mode choice on pseudo-trips. use orca instead of pipeline to
        # execute the step because pipeline can only handle one open step at a time
        orca.run(['trip_mode_choice'])

        # grab trip mode choice logsums and pivot by tour mode and direction, index
        # on tour_id to enable merge back to choosers table
        trips = inject.get_table('trips').to_frame()
        trip_dir_mode_logsums = trips.pivot(index=['tour_id', tour_od_id_col],
                                            columns=['tour_mode', 'outbound'],
                                            values='trip_mode_choice_logsum')
        new_cols = [
            '_'.join(['logsum', mode, 'outbound' if outbound else 'inbound'])
            for mode, outbound in trip_dir_mode_logsums.columns
        ]
        trip_dir_mode_logsums.columns = new_cols

        choosers.reset_index(inplace=True)
        choosers.set_index(['tour_id', tour_od_id_col], inplace=True)
        choosers = pd.merge(choosers,
                            trip_dir_mode_logsums,
                            left_index=True,
                            right_index=True)
        choosers.reset_index(inplace=True)
        choosers.set_index(choosers_og_index, inplace=True)

        pipeline.get_rn_generator().drop_channel('trips')
        tracing.deregister_traceable_table('trips')

        assert (od_sample.index == choosers.index).all()
        for col in new_cols:
            od_sample[col] = choosers[col]

    logsums = logsum.compute_logsums(choosers, spec_segment_name,
                                     logsum_settings, model_settings,
                                     network_los, chunk_size, chunk_tag,
                                     trace_label, 'end', 'start', 'duration')

    assert (od_sample.index == logsums.index).all()
    od_sample['tour_mode_choice_logsum'] = logsums

    return od_sample
Beispiel #10
0
def compute_logsums(choosers,
                    tour_purpose,
                    logsum_settings, model_settings,
                    skim_dict, skim_stack,
                    chunk_size, trace_label):
    """

    Parameters
    ----------
    choosers
    tour_purpose
    logsum_settings
    model_settings
    skim_dict
    skim_stack
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------
    logsums: pandas series
        computed logsums with same index as choosers
    """

    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')

    # compute_logsums needs to know name of dest column in interaction_sample
    orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME']
    dest_col_name = model_settings['ALT_DEST_COL_NAME']

    # FIXME - are we ok with altering choosers (so caller doesn't have to set these)?
    assert ('in_period' not in choosers) and ('out_period' not in choosers)
    choosers['in_period'] = expressions.skim_time_period_label(model_settings['IN_PERIOD'])
    choosers['out_period'] = expressions.skim_time_period_label(model_settings['OUT_PERIOD'])

    assert ('duration' not in choosers)
    choosers['duration'] = model_settings['IN_PERIOD'] - model_settings['OUT_PERIOD']

    logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC'])
    coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose)
    logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None)

    nest_spec = config.get_logit_model_settings(logsum_settings)
    nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients)

    constants = config.get_model_constants(logsum_settings)

    logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0])

    # setup skim keys
    odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='out_period')
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='in_period')
    odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name,
                                             skim_key='in_period')
    dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name,
                                             skim_key='out_period')
    od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name)

    skims = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "odr_skims": odr_skim_stack_wrapper,
        "dor_skims": dor_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper,
        'orig_col_name': orig_col_name,
        'dest_col_name': dest_col_name
    }

    locals_dict = {}
    locals_dict.update(constants)
    locals_dict.update(skims)

    # constrained coefficients can appear in expressions
    locals_dict.update(coefficients)

    # - run preprocessor to annotate choosers
    # allow specification of alternate preprocessor for nontour choosers
    preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor')
    preprocessor_settings = logsum_settings[preprocessor]

    if preprocessor_settings:

        simulate.set_skim_wrapper_targets(choosers, skims)

        expressions.assign_columns(
            df=choosers,
            model_settings=preprocessor_settings,
            locals_dict=locals_dict,
            trace_label=trace_label)

    logsums = simulate.simple_simulate_logsums(
        choosers,
        logsum_spec,
        nest_spec,
        skims=skims,
        locals_d=locals_dict,
        chunk_size=chunk_size,
        trace_label=trace_label,
        alt_col_name=dest_col_name)

    return logsums