def get_alts_from_segmented_nested_logit(model_settings, segment_name, trace_label): """Infer alts from logit spec Parameters ---------- model_settings : dict segment_column_name : str trace_label : str Returns ------- list """ nest_spec = config.get_logit_model_settings(model_settings) coefficients = simulate.get_segment_coefficients(model_settings, segment_name) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) tour_mode_alts = [] for nest in logit.each_nest(nest_spec): if nest.is_leaf: tour_mode_alts.append(nest.name) return tour_mode_alts
def compute_logsums(primary_purpose, trips, destination_sample, tours_merged, model_settings, skim_hotel, chunk_size, trace_label): """ Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice for each alternative since we need out-of-direction logsum (i.e . origin to alt_dest, and alt_dest to half-tour destination) Returns ------- adds od_logsum and dp_logsum columns to trips (in place) """ trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums') logger.info("Running %s with %d samples", trace_label, destination_sample.shape[0]) # chunk usage is uniform so better to combine chunk_tag = 'trip_destination.compute_logsums' # FIXME should pass this in? network_los = inject.get_injectable('network_los') # - trips_merged - merge trips and tours_merged trips_merged = pd.merge(trips, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # - choosers - merge destination_sample and trips_merged # re/set index because pandas merge does not preserve left index if it has duplicate values! choosers = pd.merge(destination_sample, trips_merged.reset_index(), left_index=True, right_on='trip_id', how="left", suffixes=('', '_r')).set_index('trip_id') assert choosers.index.equals(destination_sample.index) logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) coefficients = simulate.get_segment_coefficients(logsum_settings, primary_purpose) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) locals_dict = {} locals_dict.update(config.get_model_constants(logsum_settings)) # coefficients can appear in expressions locals_dict.update(coefficients) skims = skim_hotel.logsum_skims() if network_los.zone_system == los.THREE_ZONE: # TVPB constants can appear in expressions locals_dict.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) # - od_logsums od_skims = { 'ORIGIN': model_settings['TRIP_ORIGIN'], 'DESTINATION': model_settings['ALT_DEST_COL_NAME'], "odt_skims": skims['odt_skims'], "dot_skims": skims['dot_skims'], "od_skims": skims['od_skims'], } if network_los.zone_system == los.THREE_ZONE: od_skims.update({ 'tvpb_logsum_odt': skims['tvpb_logsum_odt'], 'tvpb_logsum_dot': skims['tvpb_logsum_dot'] }) destination_sample['od_logsum'] = compute_ood_logsums( choosers, logsum_settings, nest_spec, logsum_spec, od_skims, locals_dict, chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'od'), chunk_tag=chunk_tag) # - dp_logsums dp_skims = { 'ORIGIN': model_settings['ALT_DEST_COL_NAME'], 'DESTINATION': model_settings['PRIMARY_DEST'], "odt_skims": skims['dpt_skims'], "dot_skims": skims['pdt_skims'], "od_skims": skims['dp_skims'], } if network_los.zone_system == los.THREE_ZONE: dp_skims.update({ 'tvpb_logsum_odt': skims['tvpb_logsum_dpt'], 'tvpb_logsum_dot': skims['tvpb_logsum_pdt'] }) destination_sample['dp_logsum'] = compute_ood_logsums( choosers, logsum_settings, nest_spec, logsum_spec, dp_skims, locals_dict, chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'dp'), chunk_tag=chunk_tag) return destination_sample
def run_tour_mode_choice_simulate(choosers, tour_purpose, model_settings, mode_column_name, logsum_column_name, network_los, skims, constants, estimator, chunk_size, trace_label=None, trace_choice_name=None): """ This is a utility to run a mode choice model for each segment (usually segments are tour/trip purposes). Pass in the tours/trip that need a mode, the Skim object, the spec to evaluate with, and any additional expressions you want to use in the evaluation of variables. """ spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients = simulate.get_segment_coefficients(model_settings, tour_purpose) spec = simulate.eval_coefficients(spec, coefficients, estimator) nest_spec = config.get_logit_model_settings(model_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) locals_dict = {} locals_dict.update(constants) locals_dict.update(skims) # coefficients can appear in expressions locals_dict.update(coefficients) assert ('in_period' not in choosers) and ('out_period' not in choosers) in_time = skims['in_time_col_name'] out_time = skims['out_time_col_name'] choosers['in_period'] = network_los.skim_time_period_label( choosers[in_time]) choosers['out_period'] = network_los.skim_time_period_label( choosers[out_time]) expressions.annotate_preprocessors(choosers, locals_dict, skims, model_settings, trace_label) trace_column_names = choosers.index.name assert trace_column_names == 'tour_id' if trace_column_names not in choosers: choosers[trace_column_names] = choosers.index if estimator: # write choosers after annotation estimator.write_choosers(choosers) choices = mode_choice_simulate(choosers=choosers, spec=spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=trace_label, trace_choice_name=trace_choice_name, trace_column_names=trace_column_names, estimator=estimator) return choices
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label): """ compute logsums for tours using skims for alt_tdd out_period and in_period """ trace_label = tracing.extend_trace_label(trace_label, 'logsums') logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser') logger.info( f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts" ) # - locals_dict constants = config.get_model_constants(logsum_settings) locals_dict = {} locals_dict.update(constants) if network_los.zone_system == los.THREE_ZONE: # TVPB constants can appear in expressions locals_dict.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) locals_dict.update(skims) # constrained coefficients can appear in expressions coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose) locals_dict.update(coefficients) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - compute logsums logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) logsums = simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=0, trace_label=trace_label) return logsums
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label): """ compute logsums for tours using skims for alt_tdd out_period and in_period """ trace_label = tracing.extend_trace_label(trace_label, 'logsums') logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser') logger.info("%s compute_logsums for %d choosers%s alts" % (trace_label, choosers.shape[0], alt_tdd.shape[0])) # - setup skims skim_dict = inject.get_injectable('skim_dict') skim_stack = inject.get_injectable('skim_stack') orig_col_name = 'TAZ' dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get( tour_purpose) odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='in_period') dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, } # - locals_dict constants = config.get_model_constants(logsum_settings) locals_dict = {} locals_dict.update(constants) locals_dict.update(skims) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - compute logsums coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose) logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients) # constrained coefficients can appear in expressions locals_dict.update(coefficients) logsums = simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=0, trace_label=trace_label) return logsums
def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings_file_name = 'trip_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'trip_mode' trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) # give trip mode choice the option to run without calling tours_merged. Useful for xborder # model where tour_od_choice needs trip mode choice logsums before some of the join keys # needed by tour_merged (e.g. home_zone_id) exist tours_cols = [ col for col in model_settings['TOURS_MERGED_CHOOSER_COLUMNS'] if col not in trips_df.columns ] if len(tours_cols) > 0: tours_merged = inject.get_table('tours_merged').to_frame( columns=tours_cols) else: tours_merged = pd.DataFrame() # - trips_merged - merge trips and tours_merged trips_merged = pd.merge(trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = network_los.skim_time_period_label( trips_merged.depart) orig_col = 'origin' dest_col = 'destination' min_per_period = network_los.skim_time_periods['period_minutes'] periods_per_hour = 60 / min_per_period constants = {} constants.update(config.get_model_constants(model_settings)) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col, 'MIN_PER_PERIOD': min_per_period, 'PERIODS_PER_HOUR': periods_per_hour }) skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col, dim3_key='trip_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col, dim3_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_wrapper, } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_recipe = model_settings.get('TVPB_recipe', 'tour_mode_choice') tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col, tod_key='trip_period', segment_key='demographic_segment', recipe=tvpb_recipe, cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, }) # This if-clause gives the user the option of NOT inheriting constants # from the tvpb settings. previously, these constants were inherited # automatically, which had the undesirable effect of overwriting any # trip mode choice model constants/coefficients that shared the same # name. The default behavior is still the same (True), but the user # can now avoid any chance of squashing these local variables by # adding `use_TVPB_constants: False` to the trip_mode_choice.yaml file. # the tvpb will still use the constants as defined in the recipe # specified above in `tvpb.wrap_logsum()` but they will not be used # in the trip mode choice expressions. if model_settings.get('use_TVPB_constants', True): constants.update( network_los.setting( 'TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) # don't create estimation data bundle if trip mode choice is being called # from another model step (e.g. tour mode choice logsum creation) if pipeline._PIPELINE.rng().step_name != 'trip_mode_choice': estimator = None else: estimator = estimation.manager.begin_estimation('trip_mode_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_coefficients_template(model_settings=model_settings) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby( 'primary_purpose'): segment_trace_label = tracing.extend_trace_label( trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % ( primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(primary_purpose) # tvpb_logsum_dot.extend_trace_label(primary_purpose) coefficients = simulate.get_segment_coefficients( model_settings, primary_purpose) locals_dict = {} locals_dict.update(constants) constants_keys = constants.keys() if any([coeff in constants_keys for coeff in coefficients.keys()]): logger.warning( "coefficients are obscuring constants in locals_dict") locals_dict.update(coefficients) # have to initialize chunker for preprocessing in order to access # tvpb logsum terms in preprocessor expressions. with chunk.chunk_log(tracing.extend_trace_label( trace_label, 'preprocessing'), base=True): expressions.annotate_preprocessors(trips_segment, locals_dict, skims, model_settings, segment_trace_label) if estimator: # write choosers after annotation estimator.write_choosers(trips_segment) locals_dict.update(skims) choices = mode_choice_simulate( choosers=trips_segment, spec=simulate.eval_coefficients(model_spec, coefficients, estimator), nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label), skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=segment_trace_label, trace_choice_name='trip_mode_choice', estimator=estimator) if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label( segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations assign_in_place(trips_segment, choices) tracing.trace_df(trips_segment, label=tracing.extend_trace_label( segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] for c in skim_cache: dest_col = c if dest_col not in choices_df: choices_df[ dest_col] = np.nan if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where( choices_df[mode_column_name] != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df.trip_mode) choices_df.trip_mode = estimator.get_survey_values( choices_df.trip_mode, 'trips', 'trip_mode') estimator.write_override_choices(choices_df.trip_mode) estimator.end_estimation() trips_df = trips.to_frame() assign_in_place(trips_df, choices_df) tracing.print_summary('trip_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', trips_df[mode_column_name], value_counts=True) assert not trips_df[mode_column_name].isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label( trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def compute_logsums(choosers, tour_purpose, logsum_settings, model_settings, network_los, chunk_size, chunk_tag, trace_label): """ Parameters ---------- choosers tour_purpose logsum_settings model_settings network_los chunk_size trace_hh_id trace_label Returns ------- logsums: pandas series computed logsums with same index as choosers """ trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums') logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0]) # compute_logsums needs to know name of dest column in interaction_sample orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] dest_col_name = model_settings['ALT_DEST_COL_NAME'] # FIXME - are we ok with altering choosers (so caller doesn't have to set these)? assert ('in_period' not in choosers) and ('out_period' not in choosers) choosers['in_period'] = network_los.skim_time_period_label(model_settings['IN_PERIOD']) choosers['out_period'] = network_los.skim_time_period_label(model_settings['OUT_PERIOD']) assert ('duration' not in choosers) choosers['duration'] = model_settings['IN_PERIOD'] - model_settings['OUT_PERIOD'] logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) locals_dict = {} # model_constants can appear in expressions locals_dict.update(config.get_model_constants(logsum_settings)) # constrained coefficients can appear in expressions locals_dict.update(coefficients) # setup skim keys skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='out_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='in_period') odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='in_period') dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name, tod_key='out_period', segment_key='demographic_segment', trace_label=trace_label, tag='tvpb_logsum_odt') tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name, tod_key='in_period', segment_key='demographic_segment', trace_label=trace_label, tag='tvpb_logsum_dot') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions locals_dict.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) locals_dict.update(skims) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) logsums = simulate.simple_simulate_logsums( choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) return logsums
def trip_mode_choice(trips, tours_merged, network_los, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings_file_name = 'trip_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'trip_mode' trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) tours_merged = tours_merged.to_frame() tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']] tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge(trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = network_los.skim_time_period_label( trips_merged.depart) orig_col = 'origin' dest_col = 'destination' constants = {} constants.update(config.get_model_constants(model_settings)) constants.update({'ORIGIN': orig_col, 'DESTINATION': dest_col}) skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col, dim3_key='trip_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col, dim3_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_wrapper, } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col, tod_key='trip_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, # 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) estimator = estimation.manager.begin_estimation('trip_mode_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_coefficients_template(model_settings=model_settings) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby( 'primary_purpose'): segment_trace_label = tracing.extend_trace_label( trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % ( primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(primary_purpose) # tvpb_logsum_dot.extend_trace_label(primary_purpose) coefficients = simulate.get_segment_coefficients( model_settings, primary_purpose) locals_dict = {} locals_dict.update(constants) locals_dict.update(coefficients) expressions.annotate_preprocessors(trips_segment, locals_dict, skims, model_settings, segment_trace_label) if estimator: # write choosers after annotation estimator.write_choosers(trips_segment) locals_dict.update(skims) choices = mode_choice_simulate( choosers=trips_segment, spec=simulate.eval_coefficients(model_spec, coefficients, estimator), nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label), skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=trace_label, trace_choice_name='trip_mode_choice', estimator=estimator) if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label( segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations assign_in_place(trips_segment, choices) tracing.trace_df(trips_segment, label=tracing.extend_trace_label( segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] for c in skim_cache: dest_col = c if dest_col not in choices_df: choices_df[ dest_col] = np.nan if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where( choices_df[mode_column_name] != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df.trip_mode) choices_df.trip_mode = estimator.get_survey_values( choices_df.trip_mode, 'trips', 'trip_mode') estimator.write_override_choices(choices_df.trip_mode) estimator.end_estimation() # update trips table with choices (and potionally logssums) trips_df = trips.to_frame() assign_in_place(trips_df, choices_df) tracing.print_summary('trip_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', trips_df[mode_column_name], value_counts=True) assert not trips_df[mode_column_name].isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label( trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def run_od_logsums(spec_segment_name, tours_merged_df, od_sample, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label): """ add logsum column to existing tour_destination_sample table logsum is calculated by running the mode_choice model for each sample (person, OD_id) pair in od_sample, and computing the logsum of all the utilities """ chunk_tag = 'tour_od.logsums' logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) origin_id_col = model_settings['ORIG_COL_NAME'] dest_id_col = model_settings['DEST_COL_NAME'] tour_od_id_col = get_od_id_col(origin_id_col, dest_id_col) # FIXME - MEMORY HACK - only include columns actually used in spec tours_merged_df = \ logsum.filter_chooser_columns(tours_merged_df, logsum_settings, model_settings) # merge ods into choosers table choosers = od_sample.join(tours_merged_df, how='left') choosers[tour_od_id_col] = choosers[origin_id_col].astype( str) + '_' + choosers[dest_id_col].astype(str) logger.info("Running %s with %s rows", trace_label, len(choosers)) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') # run trip mode choice to compute tour mode choice logsums if logsum_settings.get('COMPUTE_TRIP_MODE_CHOICE_LOGSUMS', False): pseudo_tours = choosers.copy() trip_mode_choice_settings = config.read_model_settings( 'trip_mode_choice') # tours_merged table doesn't yet have all the cols it needs to be called (e.g. # home_zone_id), so in order to compute tour mode choice/trip mode choice logsums # in this step we have to pass all tour-level attributes in with the main trips # table. see trip_mode_choice.py L56-61 for more details. tour_cols_needed = trip_mode_choice_settings.get( 'TOURS_MERGED_CHOOSER_COLUMNS', []) tour_cols_needed.append(tour_od_id_col) # from tour_mode_choice.py not_university = (pseudo_tours.tour_type != 'school') | ~pseudo_tours.is_university pseudo_tours['tour_purpose'] = \ pseudo_tours.tour_type.where(not_university, 'univ') pseudo_tours['stop_frequency'] = '0out_0in' pseudo_tours['primary_purpose'] = pseudo_tours['tour_purpose'] choosers_og_index = choosers.index.name pseudo_tours.reset_index(inplace=True) pseudo_tours.index.name = 'unique_id' # need dest_id_col to create dest col in trips, but need to preserve # tour dest as separate column in the trips table bc the trip mode choice # preprocessor isn't able to get the tour dest from the tours table bc the # tours don't yet have ODs. stop_frequency_alts = inject.get_injectable('stop_frequency_alts') pseudo_tours['tour_destination'] = pseudo_tours[dest_id_col] trips = trip.initialize_from_tours( pseudo_tours, stop_frequency_alts, [origin_id_col, dest_id_col, 'tour_destination', 'unique_id']) outbound = trips['outbound'] trips['depart'] = reindex(pseudo_tours.start, trips.unique_id) trips.loc[~outbound, 'depart'] = reindex(pseudo_tours.end, trips.loc[~outbound, 'unique_id']) logsum_trips = pd.DataFrame() nest_spec = config.get_logit_model_settings(logsum_settings) # actual coeffs dont matter here, just need them to load the nest structure coefficients = simulate.get_segment_coefficients( logsum_settings, pseudo_tours.iloc[0]['tour_purpose']) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) tour_mode_alts = [] for nest in logit.each_nest(nest_spec): if nest.is_leaf: tour_mode_alts.append(nest.name) # repeat rows from the trips table iterating over tour mode for tour_mode in tour_mode_alts: trips['tour_mode'] = tour_mode logsum_trips = pd.concat((logsum_trips, trips), ignore_index=True) assert len(logsum_trips) == len(trips) * len(tour_mode_alts) logsum_trips.index.name = 'trip_id' for col in tour_cols_needed: if col not in trips: logsum_trips[col] = reindex(pseudo_tours[col], logsum_trips.unique_id) pipeline.replace_table('trips', logsum_trips) tracing.register_traceable_table('trips', logsum_trips) pipeline.get_rn_generator().add_channel('trips', logsum_trips) # run trip mode choice on pseudo-trips. use orca instead of pipeline to # execute the step because pipeline can only handle one open step at a time orca.run(['trip_mode_choice']) # grab trip mode choice logsums and pivot by tour mode and direction, index # on tour_id to enable merge back to choosers table trips = inject.get_table('trips').to_frame() trip_dir_mode_logsums = trips.pivot(index=['tour_id', tour_od_id_col], columns=['tour_mode', 'outbound'], values='trip_mode_choice_logsum') new_cols = [ '_'.join(['logsum', mode, 'outbound' if outbound else 'inbound']) for mode, outbound in trip_dir_mode_logsums.columns ] trip_dir_mode_logsums.columns = new_cols choosers.reset_index(inplace=True) choosers.set_index(['tour_id', tour_od_id_col], inplace=True) choosers = pd.merge(choosers, trip_dir_mode_logsums, left_index=True, right_index=True) choosers.reset_index(inplace=True) choosers.set_index(choosers_og_index, inplace=True) pipeline.get_rn_generator().drop_channel('trips') tracing.deregister_traceable_table('trips') assert (od_sample.index == choosers.index).all() for col in new_cols: od_sample[col] = choosers[col] logsums = logsum.compute_logsums(choosers, spec_segment_name, logsum_settings, model_settings, network_los, chunk_size, chunk_tag, trace_label, 'end', 'start', 'duration') assert (od_sample.index == logsums.index).all() od_sample['tour_mode_choice_logsum'] = logsums return od_sample
def compute_logsums(choosers, tour_purpose, logsum_settings, model_settings, skim_dict, skim_stack, chunk_size, trace_label): """ Parameters ---------- choosers tour_purpose logsum_settings model_settings skim_dict skim_stack chunk_size trace_hh_id trace_label Returns ------- logsums: pandas series computed logsums with same index as choosers """ trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums') # compute_logsums needs to know name of dest column in interaction_sample orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] dest_col_name = model_settings['ALT_DEST_COL_NAME'] # FIXME - are we ok with altering choosers (so caller doesn't have to set these)? assert ('in_period' not in choosers) and ('out_period' not in choosers) choosers['in_period'] = expressions.skim_time_period_label(model_settings['IN_PERIOD']) choosers['out_period'] = expressions.skim_time_period_label(model_settings['OUT_PERIOD']) assert ('duration' not in choosers) choosers['duration'] = model_settings['IN_PERIOD'] - model_settings['OUT_PERIOD'] logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients) constants = config.get_model_constants(logsum_settings) logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0]) # setup skim keys odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='in_period') dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name } locals_dict = {} locals_dict.update(constants) locals_dict.update(skims) # constrained coefficients can appear in expressions locals_dict.update(coefficients) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) logsums = simulate.simple_simulate_logsums( choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, trace_label=trace_label, alt_col_name=dest_col_name) return logsums