def compute_ood_logsums(choosers, logsum_settings, nest_spec, logsum_spec, od_skims, locals_dict, chunk_size, trace_label, chunk_tag): """ Compute one (of two) out-of-direction logsums for destination alternatives Will either be trip_origin -> alt_dest or alt_dest -> primary_dest """ locals_dict.update(od_skims) expressions.annotate_preprocessors(choosers, locals_dict, od_skims, logsum_settings, trace_label) logsums = simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=od_skims, locals_d=locals_dict, chunk_size=chunk_size, trace_label=trace_label, chunk_tag=chunk_tag) assert logsums.index.equals(choosers.index) # FIXME not strictly necessary, but would make trace files more legible? # logsums = logsums.replace(-np.inf, -999) return logsums
def annotate_trips(trips, network_los, model_settings): """ Add columns to local trips table. The annotator has access to the origin/destination skims and everything defined in the model settings CONSTANTS. Pipeline tables can also be accessed by listing them under TABLES in the preprocessor settings. """ trips_df = trips.to_frame() trace_label = 'trip_matrices' skim_dict = network_los.get_default_skim_dict() # setup skim keys if 'trip_period' not in trips_df: trips_df['trip_period'] = network_los.skim_time_period_label( trips_df.depart) od_skim_wrapper = skim_dict.wrap('origin', 'destination') odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key='origin', dest_key='destination', dim3_key='trip_period') skims = {'od_skims': od_skim_wrapper, "odt_skims": odt_skim_stack_wrapper} locals_dict = {} constants = config.get_model_constants(model_settings) if constants is not None: locals_dict.update(constants) expressions.annotate_preprocessors(trips_df, locals_dict, skims, model_settings, trace_label) # Data will be expanded by an expansion weight column from # the households pipeline table, if specified in the model settings. hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL') if hh_weight_col and hh_weight_col not in trips_df: logger.info("adding '%s' from households to trips table" % hh_weight_col) household_weights = pipeline.get_table('households')[hh_weight_col] trips_df[hh_weight_col] = trips_df.household_id.map(household_weights) return trips_df
def run_tour_mode_choice_simulate(choosers, tour_purpose, model_settings, mode_column_name, logsum_column_name, network_los, skims, constants, estimator, chunk_size, trace_label=None, trace_choice_name=None): """ This is a utility to run a mode choice model for each segment (usually segments are tour/trip purposes). Pass in the tours/trip that need a mode, the Skim object, the spec to evaluate with, and any additional expressions you want to use in the evaluation of variables. """ spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients = simulate.get_segment_coefficients(model_settings, tour_purpose) spec = simulate.eval_coefficients(spec, coefficients, estimator) nest_spec = config.get_logit_model_settings(model_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) locals_dict = {} locals_dict.update(constants) locals_dict.update(skims) # coefficients can appear in expressions locals_dict.update(coefficients) assert ('in_period' not in choosers) and ('out_period' not in choosers) in_time = skims['in_time_col_name'] out_time = skims['out_time_col_name'] choosers['in_period'] = network_los.skim_time_period_label( choosers[in_time]) choosers['out_period'] = network_los.skim_time_period_label( choosers[out_time]) expressions.annotate_preprocessors(choosers, locals_dict, skims, model_settings, trace_label) trace_column_names = choosers.index.name assert trace_column_names == 'tour_id' if trace_column_names not in choosers: choosers[trace_column_names] = choosers.index if estimator: # write choosers after annotation estimator.write_choosers(choosers) choices = mode_choice_simulate(choosers=choosers, spec=spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=trace_label, trace_choice_name=trace_choice_name, trace_column_names=trace_column_names, estimator=estimator) return choices
def trip_mode_choice( trips, tours_merged, network_los, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings = config.read_model_settings('trip_mode_choice.yaml') logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'trip_mode' model_spec = \ simulate.read_model_spec(file_name=model_settings['SPEC']) omnibus_coefficients = \ assign.read_constant_spec(config.config_file_path(model_settings['COEFFICIENTS'])) trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) tours_merged = tours_merged.to_frame() tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']] nest_spec = config.get_logit_model_settings(model_settings) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge( trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart) orig_col = 'origin' dest_col = 'destination' constants = {} constants.update(config.get_model_constants(model_settings)) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col }) skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col, dim3_key='trip_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col, dim3_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_wrapper, } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col, tod_key='trip_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, # 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'): segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(primary_purpose) # tvpb_logsum_dot.extend_trace_label(primary_purpose) locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose], constants=constants) locals_dict.update(constants) expressions.annotate_preprocessors( trips_segment, locals_dict, skims, model_settings, segment_trace_label) locals_dict.update(skims) choices = mode_choice_simulate( choosers=trips_segment, spec=model_spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=trace_label, trace_choice_name='trip_mode_choice') if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label(segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations assign_in_place(trips_segment, choices) tracing.trace_df(trips_segment, label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] print(f"mode {mode} path_type {path_type}") for c in skim_cache: dest_col = c if dest_col not in choices_df: choices_df[dest_col] = np.nan choices_df[dest_col].where(choices_df[mode_column_name] != mode, skim_cache[c], inplace=True) # update trips table with choices (and otionally logssums) trips_df = trips.to_frame() assign_in_place(trips_df, choices_df) tracing.print_summary('tour_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', trips_df[mode_column_name], value_counts=True) assert not trips_df[mode_column_name].isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label(trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def atwork_subtour_scheduling(tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings_file_name = 'tour_scheduling_atwork.yaml' tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation( 'atwork_subtour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) persons_merged = persons_merged.to_frame() logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "od_skims": od_skim_wrapper, } expressions.annotate_preprocessors(subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) # we don't need to update timetable because subtours are scheduled inside work trip windows choices = vectorize_subtour_scheduling(parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table tdd_choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df( DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings_file_name = 'trip_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'trip_mode' trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) # give trip mode choice the option to run without calling tours_merged. Useful for xborder # model where tour_od_choice needs trip mode choice logsums before some of the join keys # needed by tour_merged (e.g. home_zone_id) exist tours_cols = [ col for col in model_settings['TOURS_MERGED_CHOOSER_COLUMNS'] if col not in trips_df.columns ] if len(tours_cols) > 0: tours_merged = inject.get_table('tours_merged').to_frame( columns=tours_cols) else: tours_merged = pd.DataFrame() # - trips_merged - merge trips and tours_merged trips_merged = pd.merge(trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = network_los.skim_time_period_label( trips_merged.depart) orig_col = 'origin' dest_col = 'destination' min_per_period = network_los.skim_time_periods['period_minutes'] periods_per_hour = 60 / min_per_period constants = {} constants.update(config.get_model_constants(model_settings)) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col, 'MIN_PER_PERIOD': min_per_period, 'PERIODS_PER_HOUR': periods_per_hour }) skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col, dim3_key='trip_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col, dim3_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_wrapper, } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_recipe = model_settings.get('TVPB_recipe', 'tour_mode_choice') tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col, tod_key='trip_period', segment_key='demographic_segment', recipe=tvpb_recipe, cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, }) # This if-clause gives the user the option of NOT inheriting constants # from the tvpb settings. previously, these constants were inherited # automatically, which had the undesirable effect of overwriting any # trip mode choice model constants/coefficients that shared the same # name. The default behavior is still the same (True), but the user # can now avoid any chance of squashing these local variables by # adding `use_TVPB_constants: False` to the trip_mode_choice.yaml file. # the tvpb will still use the constants as defined in the recipe # specified above in `tvpb.wrap_logsum()` but they will not be used # in the trip mode choice expressions. if model_settings.get('use_TVPB_constants', True): constants.update( network_los.setting( 'TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) # don't create estimation data bundle if trip mode choice is being called # from another model step (e.g. tour mode choice logsum creation) if pipeline._PIPELINE.rng().step_name != 'trip_mode_choice': estimator = None else: estimator = estimation.manager.begin_estimation('trip_mode_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_coefficients_template(model_settings=model_settings) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby( 'primary_purpose'): segment_trace_label = tracing.extend_trace_label( trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % ( primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(primary_purpose) # tvpb_logsum_dot.extend_trace_label(primary_purpose) coefficients = simulate.get_segment_coefficients( model_settings, primary_purpose) locals_dict = {} locals_dict.update(constants) constants_keys = constants.keys() if any([coeff in constants_keys for coeff in coefficients.keys()]): logger.warning( "coefficients are obscuring constants in locals_dict") locals_dict.update(coefficients) # have to initialize chunker for preprocessing in order to access # tvpb logsum terms in preprocessor expressions. with chunk.chunk_log(tracing.extend_trace_label( trace_label, 'preprocessing'), base=True): expressions.annotate_preprocessors(trips_segment, locals_dict, skims, model_settings, segment_trace_label) if estimator: # write choosers after annotation estimator.write_choosers(trips_segment) locals_dict.update(skims) choices = mode_choice_simulate( choosers=trips_segment, spec=simulate.eval_coefficients(model_spec, coefficients, estimator), nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label), skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=segment_trace_label, trace_choice_name='trip_mode_choice', estimator=estimator) if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label( segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations assign_in_place(trips_segment, choices) tracing.trace_df(trips_segment, label=tracing.extend_trace_label( segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] for c in skim_cache: dest_col = c if dest_col not in choices_df: choices_df[ dest_col] = np.nan if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where( choices_df[mode_column_name] != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df.trip_mode) choices_df.trip_mode = estimator.get_survey_values( choices_df.trip_mode, 'trips', 'trip_mode') estimator.write_override_choices(choices_df.trip_mode) estimator.end_estimation() trips_df = trips.to_frame() assign_in_place(trips_df, choices_df) tracing.print_summary('trip_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', trips_df[mode_column_name], value_counts=True) assert not trips_df[mode_column_name].isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label( trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)