def write_coefficients(self, coefficients_df=None, model_settings=None, file_name=None): """ Because the whole point of estimation is to generate new coefficient values we want to make it easy to put the coefficients file back in configs So we make a point of preserving the same filename as the original config file """ if model_settings is not None: assert file_name is None file_name = model_settings['COEFFICIENTS'] assert file_name is not None if coefficients_df is None: coefficients_df = simulate.read_model_coefficients( file_name=file_name) # preserve original config file name base_file_name = os.path.basename(file_name) assert self.estimating self.write_table(coefficients_df, base_file_name, append=False)
def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ trace_label = 'auto_ownership_simulate' model_settings_file_name = 'auto_ownership.yaml' model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('auto_ownership') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choosers = households_merged.to_frame() logger.info("Running %s with %d households", trace_label, len(choosers)) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='auto_ownership', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'households', 'auto_ownership') estimator.write_override_choices(choices) estimator.end_estimation() households = households.to_frame() # no need to reindex as we used all households households['auto_ownership'] = choices pipeline.replace_table("households", households) tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True) if trace_hh_id: tracing.trace_df(households, label='auto_ownership', warn_if_empty=True)
def atwork_subtour_destination_sample(tours, persons_merged, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): model_spec = simulate.read_model_spec( file_name=model_settings['SAMPLE_SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] constants = config.get_model_constants(model_settings) sample_size = model_settings['SAMPLE_SIZE'] if estimator: # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers)) # create wrapper with keys for this lookup - in this case there is a workplace_zone_id # in the choosers and a zone_id in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions dest_column_name = destination_size_terms.index.name skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap('workplace_zone_id', dest_column_name) locals_d = {'skims': skims} if constants is not None: locals_d.update(constants) choices = interaction_sample(choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps choices['person_id'] = choosers.person_id return choices
def write_coefficient_template(model_settings): coefficients = simulate.read_model_coefficients(model_settings) coefficients = coefficients.transpose() coefficients.columns.name = None template = coefficients.copy() coef_names = [] coef_values = [] for c in coefficients.columns: values = coefficients[c] unique_values = values.unique() for uv in unique_values: if len(unique_values) == 1: uv_coef_name = c + '_all' else: uv_coef_name = c + '_' + '_'.join( values[values == uv].index.values) coef_names.append(uv_coef_name) coef_values.append(uv) template[c] = template[c].where(values != uv, uv_coef_name) refactored_coefficients = pd.DataFrame({ 'coefficient_name': coef_names, 'value': coef_values }) refactored_coefficients.value = refactored_coefficients.value.astype( np.float32) print(refactored_coefficients) template = template.transpose() template.to_csv( config.output_file_path('tour_mode_choice_coefficients_template.csv'), mode='w', index=True, header=True) refactored_coefficients.to_csv(config.output_file_path( 'tour_mode_choice_refactored_coefficients.csv'), mode='w', index=False, header=True)
def write_estimation_specs(estimator, model_settings, settings_file): """ write sample_spec, spec, and coefficients to estimation data bundle Parameters ---------- model_settings settings_file """ estimator.write_model_settings(model_settings, settings_file) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.write_coefficients(simulate.read_model_coefficients(model_settings)) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False)
def joint_tour_composition(tours, households, persons, chunk_size, trace_hh_id): """ This model predicts the makeup of the travel party (adults, children, or mixed). """ trace_label = 'joint_tour_composition' model_settings_file_name = 'joint_tour_composition.yaml' tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(trace_label, tours) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('joint_tour_composition') # - only interested in households with joint_tours households = households.to_frame() households = households[households.num_hh_joint_tours > 0] persons = persons.to_frame() persons = persons[persons.household_id.isin(households.index)] logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0]) # - run preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns(df=households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) joint_tours_merged = pd.merge(joint_tours, households, left_on='household_id', right_index=True, how='left') # - simple_simulate model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(joint_tours_merged) choices = simulate.simple_simulate(choosers=joint_tours_merged, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='composition', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'composition') estimator.write_override_choices(choices) estimator.end_estimation() # add composition column to tours for tracing joint_tours['composition'] = choices # reindex since we ran model on a subset of households tours['composition'] = choices.reindex(tours.index).fillna('').astype(str) pipeline.replace_table("tours", tours) tracing.print_summary('joint_tour_composition', joint_tours.composition, value_counts=True) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_composition.joint_tours", slicer='household_id')
def joint_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each joint tour """ trace_label = 'joint_tour_scheduling' model_settings_file_name = 'joint_tour_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results(trace_label) return # use inject.get_table as this won't exist if there are no joint_tours joint_tour_participants = inject.get_table( 'joint_tour_participants').to_frame() persons_merged = persons_merged.to_frame() logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0]) # it may seem peculiar that we are concerned with persons rather than households # but every joint tour is (somewhat arbitrarily) assigned a "primary person" # some of whose characteristics are used in the spec # and we get household attributes along with person attributes in persons_merged persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0] # since a households joint tours each potentially different participants # they may also have different joint tour masks (free time of all participants) # so we have to either chunk processing by joint_tour_num and build timetable by household # or build timetables by unique joint_tour constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=joint_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) timetable = inject.get_injectable("timetable") estimator = estimation.manager.begin_estimation('joint_tour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) timetable.begin_transaction(estimator) choices = vectorize_joint_tour_scheduling(joint_tours, joint_tour_participants, persons_merged, tdd_alts, timetable, spec=model_spec, model_settings=model_settings, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True): nth_participants = \ joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)] estimator.log( "assign timetable for %s participants in %s tours with tour_num %s" % (len(nth_participants), len(nth_tours), tour_num)) # - update timetables of all joint tour participants timetable.assign(nth_participants.person_id, reindex(choices, nth_participants.tour_id)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing joint_tours = tours[tours.tour_category == 'joint'] if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_scheduling", slicer='household_id')
def atwork_subtour_mode_choice(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') constants = config.get_model_constants(model_settings) logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) # setup skim keys orig_col_name = 'workplace_taz' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='in_period') dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } estimator = estimation.manager.begin_estimation( 'atwork_subtour_mode_choice') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) estimator.write_coefficients_template( simulate.read_model_coefficient_template(model_settings)) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # FIXME run_tour_mode_choice_simulate writes choosers post-annotation choices_df = run_tour_mode_choice_simulate( subtours_merged, tour_purpose='atwork', model_settings=model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') if estimator: estimator.write_choices(choices_df[mode_column_name]) choices_df[mode_column_name] = \ estimator.get_survey_values(choices_df[mode_column_name], 'tours', mode_column_name) estimator.write_override_choices(choices_df[mode_column_name]) estimator.end_estimation() tracing.print_summary('%s choices' % trace_label, choices_df[mode_column_name], value_counts=True) assign_in_place(tours, choices_df) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id') force_garbage_collect()
def atwork_subtour_mode_choice(tours, persons_merged, network_los, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) constants = {} constants.update(config.get_model_constants(model_settings)) skim_dict = network_los.get_default_skim_dict() # setup skim keys orig_col_name = 'workplace_zone_id' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='out_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='in_period') odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='in_period') dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name, tod_key='out_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name, tod_key='in_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_dot') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) estimator = estimation.manager.begin_estimation( 'atwork_subtour_mode_choice') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) estimator.write_coefficients_template( simulate.read_model_coefficient_template(model_settings)) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # FIXME run_tour_mode_choice_simulate writes choosers post-annotation choices_df = run_tour_mode_choice_simulate( subtours_merged, tour_purpose='atwork', model_settings=model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, network_los=network_los, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_types in tvpb_mode_path_types.items(): for direction, skim in zip(['od', 'do'], [tvpb_logsum_odt, tvpb_logsum_dot]): path_type = path_types[direction] skim_cache = skim.cache[path_type] print( f"mode {mode} direction {direction} path_type {path_type}") for c in skim_cache: dest_col = f'{direction}_{c}' if dest_col not in choices_df: choices_df[ dest_col] = 0 if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where(choices_df.tour_mode != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df[mode_column_name]) choices_df[mode_column_name] = \ estimator.get_survey_values(choices_df[mode_column_name], 'tours', mode_column_name) estimator.write_override_choices(choices_df[mode_column_name]) estimator.end_estimation() tracing.print_summary('%s choices' % trace_label, choices_df[mode_column_name], value_counts=True) assign_in_place(tours, choices_df) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id') force_garbage_collect()
def work_from_home(persons_merged, persons, chunk_size, trace_hh_id): """ This model predicts whether a person (worker) works from home. The output from this model is TRUE (if works from home) or FALSE (works away from home). The workplace location choice is overridden for workers who work from home and set to -1. """ trace_label = 'work_from_home' model_settings_file_name = 'work_from_home.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_zone_id > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('work_from_home') constants = config.get_model_constants(model_settings) work_from_home_alt = model_settings['WORK_FROM_HOME_ALT'] # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) # - iterative what-if if specified iterations = model_settings.get('WORK_FROM_HOME_ITERATIONS', 1) iterations_coefficient_constant = model_settings.get( 'WORK_FROM_HOME_COEFFICIENT_CONSTANT', None) iterations_target_percent = model_settings.get( 'WORK_FROM_HOME_TARGET_PERCENT', None) iterations_target_percent_tolerance = model_settings.get( 'WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE', None) for iteration in range(iterations): logger.info("Running %s with %d persons iteration %d", trace_label, len(choosers), iteration) # re-read spec to reset substitution model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) choices = simulate.simple_simulate(choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='work_from_home', estimator=estimator) if iterations_target_percent is not None: current_percent = ((choices == work_from_home_alt).sum() / len(choices)) logger.info( "Running %s iteration %i current percent %f target percent %f", trace_label, iteration, current_percent, iterations_target_percent) if current_percent <= (iterations_target_percent + iterations_target_percent_tolerance ) and current_percent >= ( iterations_target_percent - iterations_target_percent_tolerance): logger.info( "Running %s iteration %i converged with coefficient %f", trace_label, iteration, coefficients_df.value[iterations_coefficient_constant]) break else: new_value = np.log( iterations_target_percent / np.maximum(current_percent, 0.0001) ) + coefficients_df.value[iterations_coefficient_constant] coefficients_df.value[ iterations_coefficient_constant] = new_value logger.info( "Running %s iteration %i new coefficient for next iteration %f", trace_label, iteration, new_value) iteration = iteration + 1 choices = (choices == work_from_home_alt) dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME'] if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'work_from_home') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['work_from_home'] = choices.reindex( persons.index).fillna(0).astype(bool) persons[dest_choice_column_name] = np.where( persons.work_from_home is True, -1, persons[dest_choice_column_name]) pipeline.replace_table("persons", persons) tracing.print_summary('work_from_home', persons.work_from_home, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def joint_tour_participation( tours, persons_merged, chunk_size, trace_hh_id): """ Predicts for each eligible person to participate or not participate in each joint tour. """ trace_label = 'joint_tour_participation' model_settings_file_name = 'joint_tour_participation.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(model_settings, trace_label) return persons_merged = persons_merged.to_frame() # - create joint_tour_participation_candidates table candidates = joint_tour_participation_candidates(joint_tours, persons_merged) tracing.register_traceable_table('joint_tour_participants', candidates) pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates) logger.info("Running joint_tours_participation with %d potential participants (candidates)" % candidates.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_time_window_overlap': person_time_window_overlap, 'persons': persons_merged } expressions.assign_columns( df=candidates, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - simple_simulate estimator = estimation.manager.begin_estimation('joint_tour_participation') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(candidates) # add tour-based chunk_id so we can chunk all trips in tour together assert 'chunk_id' not in candidates.columns unique_household_ids = candidates.household_id.unique() household_chunk_ids = pd.Series(range(len(unique_household_ids)), index=unique_household_ids) candidates['chunk_id'] = reindex(household_chunk_ids, candidates.household_id) choices = simulate.simple_simulate_by_chunk_id( choosers=candidates, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='participation', custom_chooser=participants_chooser, estimator=estimator) # choice is boolean (participate or not) choice_col = model_settings.get('participation_choice', 'participate') assert choice_col in model_spec.columns, \ "couldn't find participation choice column '%s' in spec" PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col) participate = (choices == PARTICIPATE_CHOICE) if estimator: estimator.write_choices(choices) # we override the 'participate' boolean series, instead of raw alternative index in 'choices' series # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index survey_participants_df = estimator.get_survey_table('joint_tour_participants') participate = pd.Series(choices.index.isin(survey_participants_df.index.values), index=choices.index) # but estimation software wants to know the choices value (alternative index) choices = participate.replace({True: PARTICIPATE_CHOICE, False: 1-PARTICIPATE_CHOICE}) # estimator.write_override_choices(participate) # write choices as boolean participate estimator.write_override_choices(choices) # write choices as int alt indexes estimator.end_estimation() # satisfaction indexed by tour_id tour_satisfaction = get_tour_satisfaction(candidates, participate) assert tour_satisfaction.all() candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id) PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id'] participants = candidates[participate][PARTICIPANT_COLS].copy() # assign participant_num # FIXME do we want something smarter than the participant with the lowest person_id? participants['participant_num'] = \ participants.sort_values(by=['tour_id', 'person_id']).\ groupby('tour_id').cumcount() + 1 pipeline.replace_table("joint_tour_participants", participants) # drop channel as we aren't using any more (and it has candidates that weren't chosen) pipeline.get_rn_generator().drop_channel('joint_tour_participants') # - assign joint tour 'point person' (participant_num == 1) point_persons = participants[participants.participant_num == 1] joint_tours['person_id'] = point_persons.set_index('tour_id').person_id # update number_of_participants which was initialized to 1 joint_tours['number_of_participants'] = participants.groupby('tour_id').size() assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']]) pipeline.replace_table("tours", tours) # - run annotations annotate_jtp(model_settings, trace_label) if trace_hh_id: tracing.trace_df(participants, label="joint_tour_participation.participants") tracing.trace_df(joint_tours, label="joint_tour_participation.joint_tours")
def joint_tour_destination(tours, persons_merged, households_merged, network_los, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'joint_tour_destination' model_settings_file_name = 'joint_tour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None # choosers are tours - in a sense tours are choosing their destination tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] persons_merged = persons_merged.to_frame() households_merged = households_merged.to_frame() # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results('joint_tour_destination') return estimator = estimation.manager.begin_estimation('joint_tour_destination') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) # run_destination_simulate writes choosers because tours are merged just-in-time with persons # to reduce memory overhead (the full tours_merged table is only created for one segment at a time) choices_df, save_sample_df = run_joint_tour_destination( tours, persons_merged, households_merged, want_logsums, want_sample_table, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df.choice) choices_df.choice = estimator.get_survey_values( choices_df.choice, 'tours', 'destination') estimator.write_override_choices(choices_df.choice) estimator.end_estimation() # add column as we want joint_tours table for tracing. joint_tours['destination'] = choices_df.choice assign_in_place(tours, joint_tours[['destination']]) pipeline.replace_table("tours", tours) if want_logsums: joint_tours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, joint_tours[[logsum_column_name]]) tracing.print_summary('destination', joint_tours.destination, describe=True) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours")
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack, land_use, size_terms, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_destination' model_settings_file_name = 'atwork_subtour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) destination_column_name = 'destination' logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return estimator = estimation.manager.begin_estimation( 'atwork_subtour_destination') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) destination_size_terms = tour_destination_size_terms( land_use, size_terms, 'atwork') destination_sample_df = atwork_subtour_destination_sample( subtours, persons_merged, model_settings, skim_dict, destination_size_terms, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'sample')) destination_sample_df = atwork_subtour_destination_logsums( persons_merged, destination_sample_df, model_settings, skim_dict, skim_stack, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'logsums')) choices_df = atwork_subtour_destination_simulate( subtours, persons_merged, destination_sample_df, want_logsums, model_settings, skim_dict, destination_size_terms, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'simulate')) if estimator: estimator.write_choices(choices_df['choice']) choices_df['choice'] = estimator.get_survey_values( choices_df['choice'], 'tours', 'destination') estimator.write_override_choices(choices_df['choice']) estimator.end_estimation() subtours[destination_column_name] = choices_df['choice'] assign_in_place(tours, subtours[[destination_column_name]]) if want_logsums: subtours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, subtours[[logsum_column_name]]) pipeline.replace_table("tours", tours) if want_sample_table: # FIXME - sample_table assert len(destination_sample_df.index.unique()) == len(choices_df) destination_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) pipeline.extend_table(sample_table_name, destination_sample_df) tracing.print_summary(destination_column_name, subtours[destination_column_name], describe=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def atwork_subtour_destination_simulate(subtours, persons_merged, destination_sample, want_logsums, model_settings, skim_dict, destination_size_terms, estimator, chunk_size, trace_label): """ atwork_subtour_destination model on atwork_subtour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() # merge persons into tours choosers = pd.merge(subtours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] if estimator: estimator.write_choosers(choosers) alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] chooser_col_name = 'workplace_taz' # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge destination_size_terms columns into alt sample list alternatives = \ pd.merge(destination_sample, destination_size_terms, left_on=alt_dest_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=model_spec, choice_column=alt_dest_col_name, want_logsums=want_logsums, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='workplace_location', estimator=estimator) if not want_logsums: # for consistency, always return a dataframe with canonical column name assert isinstance(choices, pd.Series) choices = choices.to_frame('choice') return choices
def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id): """ Transit pass subsidy model. """ trace_label = 'transit_pass_subsidy' model_settings_file_name = 'transit_pass_subsidy.yaml' choosers = persons_merged.to_frame() logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('transit_pass_subsidy') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='transit_pass_subsidy', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'transit_pass_subsidy') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['transit_pass_subsidy'] = choices.reindex(persons.index) pipeline.replace_table("persons", persons) tracing.print_summary('transit_pass_subsidy', persons.transit_pass_subsidy, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def telecommute_frequency( persons_merged, persons, chunk_size, trace_hh_id): """ This model predicts the frequency of telecommute for a person (worker) who does not works from home. The alternatives of this model are 'No Telecommute', '1 day per week', '2 to 3 days per week' and '4 days per week'. This model reflects the choices of people who prefer a combination of working from home and office during a week. """ trace_label = 'telecommute_frequency' model_settings_file_name = 'telecommute_frequency.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_zone_id > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('telecommute_frequency') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='telecommute_frequency', estimator=estimator) choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'telecommute_frequency') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['telecommute_frequency'] = choices.reindex(persons.index).fillna('').astype(str) pipeline.replace_table("persons", persons) tracing.print_summary('telecommute_frequency', persons.telecommute_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' model_settings_file_name = 'mandatory_tour_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons", len(choosers)) # - if no mandatory tours if choosers.shape[0] == 0: add_null_results(trace_label, model_settings) return # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {} expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) estimator = estimation.manager.begin_estimation('mandatory_tour_frequency') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'mandatory_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # - create mandatory tours """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ alternatives = simulate.read_model_alts( 'mandatory_tour_frequency_alternatives.csv', set_index='alt') choosers['mandatory_tour_frequency'] = choices.reindex(choosers.index) mandatory_tours = process_mandatory_tours( persons=choosers, mandatory_tour_frequency_alts=alternatives) tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', mandatory_tours) pipeline.get_rn_generator().add_channel('tours', mandatory_tours) # - annotate persons persons = inject.get_table('persons').to_frame() # need to reindex as we only handled persons with cdap_activity == 'M' persons['mandatory_tour_frequency'] = choices.reindex( persons.index).fillna('').astype(str) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True) tracing.trace_df(persons, label="mandatory_tour_frequency.persons", warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ trace_label = 'non_mandatory_tour_scheduling' model_settings_file_name = 'non_mandatory_tour_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] logger.info("Running non_mandatory_tour_scheduling with %d tours", len(tours)) persons_merged = persons_merged.to_frame() if 'SIMULATE_CHOOSER_COLUMNS' in model_settings: persons_merged =\ expressions.filter_chooser_columns(persons_merged, model_settings['SIMULATE_CHOOSER_COLUMNS']) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=non_mandatory_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) timetable = inject.get_injectable("timetable") estimator = estimation.manager.begin_estimation( 'non_mandatory_tour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) timetable.begin_transaction(estimator) # - non_mandatory tour scheduling is not segmented by tour type spec_info = {'spec': model_spec, 'estimator': estimator} choices = vectorize_tour_scheduling(non_mandatory_tours, persons_merged, tdd_alts, timetable, tour_segments=spec_info, tour_segment_col=None, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' # FIXME - should be passed in? primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() persons_merged = persons_merged.to_frame() constants = config.get_model_constants(model_settings) logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) # setup skim keys orig_col_name = 'TAZ' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='in_period') dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } estimator = estimation.manager.begin_estimation('tour_mode_choice') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) estimator.write_coefficients_template( simulate.read_model_coefficient_template(model_settings)) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # FIXME run_tour_mode_choice_simulate writes choosers post-annotation choices_list = [] primary_tours_merged['primary_purpose'] = \ primary_tours_merged.tour_type.where((primary_tours_merged.tour_type != 'school') | ~primary_tours_merged.is_university, 'univ') for primary_purpose, tours_segment in primary_tours_merged.groupby( 'primary_purpose'): logger.info( "tour_mode_choice_simulate primary_purpose '%s' (%s tours)" % ( primary_purpose, len(tours_segment.index), )) # name index so tracing knows how to slice assert tours_segment.index.name == 'tour_id' choices_df = run_tour_mode_choice_simulate( tours_segment, primary_purpose, model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, primary_purpose), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices_df' % primary_purpose, choices_df.tour_mode, value_counts=True) choices_list.append(choices_df) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) if estimator: estimator.write_choices(choices_df.tour_mode) choices_df.tour_mode = estimator.get_survey_values( choices_df.tour_mode, 'tours', 'tour_mode') estimator.write_override_choices(choices_df.tour_mode) estimator.end_estimation() tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices_df.tour_mode, value_counts=True) # so we can trace with annotations assign_in_place(primary_tours, choices_df) # but only keep mode choice col all_tours = tours.to_frame() assign_in_place(all_tours, choices_df) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id', warn_if_empty=True)
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id): """ This model predicts the frequency of making fully joint trips (see the alternatives above). """ trace_label = 'joint_tour_frequency' model_settings_file_name = 'joint_tour_frequency.yaml' estimator = estimation.manager.begin_estimation('joint_tour_frequency') model_settings = config.read_model_settings(model_settings_file_name) alternatives = simulate.read_model_alts( 'joint_tour_frequency_alternatives.csv', set_index='alt') # - only interested in households with more than one cdap travel_active person and # - at least one non-preschooler households = households.to_frame() multi_person_households = households[ households.participates_in_jtf_model].copy() # - only interested in persons in multi_person_households # FIXME - gratuitous pathological efficiency move, just let yaml specify persons? persons = persons.to_frame() persons = persons[persons.household_id.isin(multi_person_households.index)] logger.info( "Running joint_tour_frequency with %d multi-person households" % multi_person_households.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns(df=multi_person_households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(multi_person_households) choices = simulate.simple_simulate( choosers=multi_person_households, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='joint_tour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'households', 'joint_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # - create joint_tours based on joint_tour_frequency choices # - we need a person_id in order to generate the tour index (and for register_traceable_table) # - but we don't know the tour participants yet # - so we arbitrarily choose the first person in the household # - to be point person for the purpose of generating an index and setting origin temp_point_persons = persons.loc[persons.PNUM == 1] temp_point_persons['person_id'] = temp_point_persons.index temp_point_persons = temp_point_persons.set_index('household_id') temp_point_persons = temp_point_persons[['person_id', 'home_zone_id']] joint_tours = \ process_joint_tours(choices, alternatives, temp_point_persons) tours = pipeline.extend_table("tours", joint_tours) tracing.register_traceable_table('tours', joint_tours) pipeline.get_rn_generator().add_channel('tours', joint_tours) # - annotate households # we expect there to be an alt with no tours - which we can use to backfill non-travelers no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] households['joint_tour_frequency'] = choices.reindex( households.index).fillna(no_tours_alt).astype(str) households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\ reindex(households.index).fillna(0).astype(np.int8) pipeline.replace_table("households", households) tracing.print_summary('joint_tour_frequency', households.joint_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(households, label="joint_tour_frequency.households") tracing.trace_df(joint_tours, label="joint_tour_frequency.joint_tours", slicer='household_id') if estimator: survey_tours = estimation.manager.get_survey_table('tours') survey_tours = survey_tours[survey_tours.tour_category == 'joint'] print(f"len(survey_tours) {len(survey_tours)}") print(f"len(joint_tours) {len(joint_tours)}") different = False survey_tours_not_in_tours = survey_tours[~survey_tours.index. isin(joint_tours.index)] if len(survey_tours_not_in_tours) > 0: print(f"survey_tours_not_in_tours\n{survey_tours_not_in_tours}") different = True tours_not_in_survey_tours = joint_tours[~joint_tours.index. isin(survey_tours.index)] if len(survey_tours_not_in_tours) > 0: print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}") different = True assert not different
def non_mandatory_tour_destination( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings_file_name = 'non_mandatory_tour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None tours = tours.to_frame() persons_merged = persons_merged.to_frame() # choosers are tours - in a sense tours are choosing their destination non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return estimator = estimation.manager.begin_estimation('non_mandatory_tour_destination') if estimator: estimator.write_coefficients(simulate.read_model_coefficients(model_settings)) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( non_mandatory_tours, persons_merged, want_logsums, want_sample_table, model_settings, skim_dict, skim_stack, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df.choice) choices_df.choice = estimator.get_survey_values(choices_df.choice, 'tours', 'destination') estimator.write_override_choices(choices_df.choice) estimator.end_estimation() non_mandatory_tours['destination'] = choices_df.choice assign_in_place(tours, non_mandatory_tours[['destination']]) if want_logsums: non_mandatory_tours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, non_mandatory_tours[[logsum_column_name]]) pipeline.replace_table("tours", tours) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'non_mandatory'], label="non_mandatory_tour_destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def free_parking(persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor): """ """ trace_label = 'free_parking' model_settings_file_name = 'free_parking.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_taz > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('free_parking') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='free_parking_at_work', estimator=estimator) free_parking_alt = model_settings['FREE_PARKING_ALT'] choices = (choices == free_parking_alt) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'free_parking_at_work') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['free_parking_at_work'] = choices.reindex( persons.index).fillna(0).astype(bool) pipeline.replace_table("persons", persons) tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ trace_label = 'mandatory_tour_scheduling' model_settings_file_name = 'mandatory_tour_scheduling.yaml' estimators = {} model_settings = config.read_model_settings(model_settings_file_name) logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) tours = tours.to_frame() mandatory_tours = tours[tours.tour_category == 'mandatory'] # - if no mandatory_tours if mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return persons_merged = persons_merged.to_frame() # - filter chooser columns for both logsums and simulate logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', []) model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', []) chooser_columns = logsum_columns + [ c for c in model_columns if c not in logsum_columns ] persons_merged = expressions.filter_chooser_columns( persons_merged, chooser_columns) # - add tour segmentation column # mtctm1 segments mandatory_scheduling spec by tour_type # (i.e. there are different specs for work and school tour_types) # mtctm1 logsum coefficients are segmented by primary_purpose # (i.e. there are different locsum coefficents for work, school, univ primary_purposes # for simplicity managing these different segmentation schemes, # we conflate them by segmenting the skims to align with primary_purpose tour_segment_col = 'mandatory_tour_seg' assert tour_segment_col not in mandatory_tours is_university_tour = \ (mandatory_tours.tour_type == 'school') & \ reindex(persons_merged.is_university, mandatory_tours.person_id) mandatory_tours[tour_segment_col] = \ mandatory_tours.tour_type.where(~is_university_tour, 'univ') # load specs spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {}) specs = {} estimators = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): # estimator for this tour_segment estimator = estimation.manager.begin_estimation( model_name='mandatory_tour_scheduling_%s' % spec_segment_name, bundle_name='mandatory_tour_scheduling') spec_file_name = spec_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=spec_file_name) coefficients_df = simulate.read_model_coefficients( spec_segment_settings[spec_segment_name]) specs[spec_segment_name] = simulate.eval_coefficients( model_spec, coefficients_df, estimator) if estimator: estimators[spec_segment_name] = estimator # add to local list estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(spec_settings) estimator.write_coefficients(coefficients_df) # - spec dict segmented by primary_purpose tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {}) tour_segments = {} for tour_segment_name, spec_segment_name in tour_segment_settings.items(): tour_segments[tour_segment_name] = {} tour_segments[tour_segment_name][ 'spec_segment_name'] = spec_segment_name tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name] tour_segments[tour_segment_name]['estimator'] = estimators.get( spec_segment_name) timetable = inject.get_injectable("timetable") if estimators: timetable.begin_transaction(list(estimators.values())) logger.info("Running mandatory_tour_scheduling with %d tours", len(tours)) choices = vts.vectorize_tour_scheduling(mandatory_tours, persons_merged, tdd_alts, timetable, tour_segments=tour_segments, tour_segment_col=tour_segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimators: # overrride choices for all estimators choices_list = [] for spec_segment_name, estimator in estimators.items(): model_choices = choices[( mandatory_tours.tour_type == spec_segment_name)] # FIXME vectorize_tour_scheduling calls used to write_choices but perhaps shouldn't estimator.write_choices(model_choices) override_choices = estimator.get_survey_values( model_choices, 'tours', 'tdd') estimator.write_override_choices(override_choices) choices_list.append(override_choices) estimator.end_estimation() choices = pd.concat(choices_list) # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) timetable.replace_table() # updated df for tracing mandatory_tours = tours[tours.tour_category == 'mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_scheduling", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def atwork_subtour_scheduling( tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings_file_name = 'tour_scheduling_atwork.yaml' tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('atwork_subtour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) persons_merged = persons_merged.to_frame() logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') do_skim_wrapper = skim_dict.wrap('destination', 'origin') skims = { "od_skims": od_skim_wrapper, "do_skims": do_skim_wrapper, } annotate_preprocessors( subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) # we don't need to update timetable because subtours are scheduled inside work trip windows choices = vectorize_subtour_scheduling( parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table tdd_choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df(DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making at-work subtour tours (alternatives for this model come from a separate csv file which is configured by the user). """ trace_label = 'atwork_subtour_frequency' model_settings_file_name = 'atwork_subtour_frequency.yaml' tours = tours.to_frame() work_tours = tours[tours.tour_type == 'work'] # - if no work_tours if len(work_tours) == 0: add_null_results(trace_label, tours) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('atwork_subtour_frequency') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) alternatives = simulate.read_model_alts( 'atwork_subtour_frequency_alternatives.csv', set_index='alt') # merge persons into work_tours persons_merged = persons_merged.to_frame() work_tours = pd.merge(work_tours, persons_merged, left_on='person_id', right_index=True) logger.info("Running atwork_subtour_frequency with %d work tours", len(work_tours)) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: assign_columns(df=work_tours, model_settings=preprocessor_settings, trace_label=trace_label) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df) estimator.write_choosers(work_tours) choices = simulate.simple_simulate( choosers=work_tours, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='atwork_subtour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'atwork_subtour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # add atwork_subtour_frequency column to tours # reindex since we are working with a subset of tours tours['atwork_subtour_frequency'] = choices.reindex(tours.index) pipeline.replace_table("tours", tours) # - create atwork_subtours based on atwork_subtour_frequency choice names work_tours = tours[tours.tour_type == 'work'] assert not work_tours.atwork_subtour_frequency.isnull().any() subtours = process_atwork_subtours(work_tours, alternatives) tours = pipeline.extend_table("tours", subtours) tracing.register_traceable_table('tours', subtours) pipeline.get_rn_generator().add_channel('tours', subtours) tracing.print_summary('atwork_subtour_frequency', tours.atwork_subtour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ trace_label = 'non_mandatory_tour_frequency' model_settings_file_name = 'non_mandatory_tour_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) # FIXME kind of tacky both that we know to add this here and del it below # 'tot_tours' is used in model_spec expressions alternatives = simulate.read_model_alts( 'non_mandatory_tour_frequency_alternatives.csv', set_index=None) alternatives['tot_tours'] = alternatives.sum(axis=1) # filter based on results of CDAP choosers = persons_merged.to_frame() choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {'person_max_window': person_max_window} expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers)) constants = config.get_model_constants(model_settings) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) spec_segments = model_settings.get('SPEC_SEGMENTS', {}) # segment by person type and pick the right spec for each person type choices_list = [] for segment_settings in spec_segments: segment_name = segment_settings['NAME'] ptype = segment_settings['PTYPE'] # pick the spec column for the segment segment_spec = model_spec[[segment_name]] chooser_segment = choosers[choosers.ptype == ptype] logger.info("Running segment '%s' of size %d", segment_name, len(chooser_segment)) if len(chooser_segment) == 0: # skip empty segments continue estimator = \ estimation.manager.begin_estimation(model_name=segment_name, bundle_name='non_mandatory_tour_frequency') coefficients_df = simulate.read_model_coefficients(segment_settings) segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator) if estimator: estimator.write_spec(model_settings, bundle_directory=True) estimator.write_model_settings(model_settings, model_settings_file_name, bundle_directory=True) # preserving coefficients file name makes bringing back updated coefficients more straightforward estimator.write_coefficients(coefficients_df, segment_settings) estimator.write_choosers(chooser_segment) estimator.write_alternatives(alternatives, bundle_directory=True) # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column # shuold we do it here or have interaction_simulate do it? # chooser index must be duplicated in column or it will be omitted from interaction_dataset # estimation requires that chooser_id is either in index or a column of interaction_dataset # so it can be reformatted (melted) and indexed by chooser_id and alt_id assert chooser_segment.index.name == 'person_id' assert 'person_id' not in chooser_segment.columns chooser_segment['person_id'] = chooser_segment.index # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? estimator.set_alt_id('alt_id') estimator.set_chooser_id(chooser_segment.index.name) choices = interaction_simulate( chooser_segment, alternatives, spec=segment_spec, locals_d=constants, chunk_size=chunk_size, trace_label='non_mandatory_tour_frequency.%s' % segment_name, trace_choice_name='non_mandatory_tour_frequency', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values( choices, 'persons', 'non_mandatory_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() choices_list.append(choices) # FIXME - force garbage collection? force_garbage_collect() del alternatives['tot_tours'] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate # is the index value of the chosen alternative in the alternatives table. choices = pd.concat(choices_list).sort_index() # add non_mandatory_tour_frequency column to persons persons = persons.to_frame() # we expect there to be an alt with no tours - which we can use to backfill non-travelers no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] # need to reindex as we only handled persons with cdap_activity in ['M', 'N'] persons['non_mandatory_tour_frequency'] = \ choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8) """ We have now generated non-mandatory tour frequencies, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) But before we do that, we run an additional probablilistic step to extend/increase tour counts beyond the strict limits of the tour_frequency alternatives chosen above (which are currently limited to at most 2 escort tours and 1 each of shopping, othmaint, othdiscr, eatout, and social tours) The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate is simply the index value of the chosen alternative in the alternatives table. get counts of each of the tour type alternatives (so we can extend) escort shopping othmaint othdiscr eatout social parent_id 2588676 2 0 0 1 1 0 2588677 0 1 0 1 0 0 """ # counts of each of the tour type alternatives (so we can extend) modeled_tour_counts = alternatives.loc[choices] modeled_tour_counts.index = choices.index # assign person ids to the index # - extend_tour_counts - probabalistic extended_tour_counts = \ extend_tour_counts(choosers, modeled_tour_counts.copy(), alternatives, trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts')) num_modeled_tours = modeled_tour_counts.sum().sum() num_extended_tours = extended_tour_counts.sum().sum() logger.info("extend_tour_counts increased tour count by %s from %s to %s" % (num_extended_tours - num_modeled_tours, num_modeled_tours, num_extended_tours)) """ create the non_mandatory tours based on extended_tour_counts """ if estimator: override_tour_counts = \ estimation.manager.get_survey_values(extended_tour_counts, table_name='persons', column_names=['_%s' % c for c in extended_tour_counts.columns]) override_tour_counts = \ override_tour_counts.rename(columns={('_%s' % c): c for c in extended_tour_counts.columns}) logger.info( "estimation get_survey_values override_tour_counts %s changed cells" % (override_tour_counts != extended_tour_counts).sum().sum()) extended_tour_counts = override_tour_counts """ create the non_mandatory tours based on extended_tour_counts """ non_mandatory_tours = process_non_mandatory_tours(persons, extended_tour_counts) assert len(non_mandatory_tours) == extended_tour_counts.sum().sum() if estimator: # make sure they created the right tours survey_tours = estimation.manager.get_survey_table( 'tours').sort_index() non_mandatory_survey_tours = survey_tours[survey_tours.tour_category == 'non_mandatory'] assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) assert non_mandatory_survey_tours.index.equals( non_mandatory_tours.sort_index().index) # make sure they created tours with the expected tour_ids columns = ['person_id', 'household_id', 'tour_type', 'tour_category'] survey_tours = \ estimation.manager.get_survey_values(non_mandatory_tours, table_name='tours', column_names=columns) tours_differ = (non_mandatory_tours[columns] != survey_tours[columns]).any(axis=1) if tours_differ.any(): print("tours_differ\n%s" % tours_differ) print("%s of %s tours differ" % (tours_differ.sum(), len(tours_differ))) print("differing survey_tours\n%s" % survey_tours[tours_differ]) print("differing modeled_tours\n%s" % non_mandatory_tours[columns][tours_differ]) assert (not tours_differ.any()) pipeline.extend_table("tours", non_mandatory_tours) tracing.register_traceable_table('tours', non_mandatory_tours) pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=trace_label) pipeline.replace_table("persons", persons) tracing.print_summary('non_mandatory_tour_frequency', persons.non_mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df( non_mandatory_tours, label="non_mandatory_tour_frequency.non_mandatory_tours", warn_if_empty=True) tracing.trace_df(choosers, label="non_mandatory_tour_frequency.choosers", warn_if_empty=True) tracing.trace_df( persons, label="non_mandatory_tour_frequency.annotated_persons", warn_if_empty=True)
def stop_frequency(tours, tours_merged, stop_frequency_alts, network_los, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings_file_name = 'stop_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already assert 'origin' in tours_merged assert 'destination' in tours_merged od_skim_stack_wrapper = network_los.get_default_skim_dict().wrap( 'origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper, 'network_los': network_los } locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) spec_segments = model_settings.get('SPEC_SEGMENTS') assert spec_segments is not None, f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}" segment_col = model_settings.get('SEGMENT_COL') assert segment_col is not None, f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}" nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for segment_settings in spec_segments: segment_name = segment_settings[segment_col] segment_value = segment_settings[segment_col] chooser_segment = tours_merged[tours_merged[segment_col] == segment_value] if len(chooser_segment) == 0: logging.info( f"{trace_label} skipping empty segment {segment_name}") continue logging.info( f"{trace_label} running segment {segment_name} with {chooser_segment.shape[0]} chooser rows" ) estimator = estimation.manager.begin_estimation( model_name=segment_name, bundle_name='stop_frequency') segment_spec = simulate.read_model_spec( file_name=segment_settings['SPEC']) assert segment_spec is not None, "spec for segment_type %s not found" % segment_name coefficients_file_name = segment_settings['COEFFICIENTS'] coefficients_df = simulate.read_model_coefficients( file_name=coefficients_file_name) segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator) if estimator: estimator.write_spec(segment_settings, bundle_directory=False) estimator.write_model_settings(model_settings, model_settings_file_name, bundle_directory=True) estimator.write_coefficients(coefficients_df, segment_settings) estimator.write_choosers(chooser_segment) estimator.set_chooser_id(chooser_segment.index.name) choices = simulate.simple_simulate( choosers=chooser_segment, spec=segment_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_name), trace_choice_name='stops', estimator=estimator) # convert indexes to alternative names choices = pd.Series(segment_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values( choices, 'tours', 'stop_frequency') # override choices estimator.write_override_choices(choices) estimator.end_estimation() choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) # FIXME should have added this when tours created? assert 'primary_purpose' not in tours if 'primary_purpose' not in tours.columns: # if not already there, then it will have been added by annotate tours preprocessor assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if estimator: # make sure they created trips with the expected tour_ids columns = ['person_id', 'household_id', 'tour_id', 'outbound'] survey_trips = estimation.manager.get_survey_table(table_name='trips') different = False survey_trips_not_in_trips = survey_trips[~survey_trips.index. isin(trips.index)] if len(survey_trips_not_in_trips) > 0: print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}") different = True trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index )] if len(survey_trips_not_in_trips) > 0: print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}") different = True assert not different survey_trips = \ estimation.manager.get_survey_values(trips, table_name='trips', column_names=columns) trips_differ = (trips[columns] != survey_trips[columns]).any(axis=1) if trips_differ.any(): print("trips_differ\n%s" % trips_differ) print("%s of %s tours differ" % (trips_differ.sum(), len(trips_differ))) print("differing survey_trips\n%s" % survey_trips[trips_differ]) print("differing modeled_trips\n%s" % trips[columns][trips_differ]) assert (not trips_differ.any()) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def run_tour_scheduling(model_name, chooser_tours, persons_merged, tdd_alts, tour_segment_col, chunk_size, trace_hh_id): trace_label = model_name model_settings_file_name = f'{model_name}.yaml' model_settings = config.read_model_settings(model_settings_file_name) if 'LOGSUM_SETTINGS' in model_settings: logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', []) else: logsum_columns = [] # - filter chooser columns for both logsums and simulate model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', []) chooser_columns = logsum_columns + [ c for c in model_columns if c not in logsum_columns ] persons_merged = expressions.filter_chooser_columns( persons_merged, chooser_columns) timetable = inject.get_injectable("timetable") # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {'tt': timetable} locals_d.update(config.get_model_constants(model_settings)) expressions.assign_columns(df=chooser_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) estimators = {} if 'TOUR_SPEC_SEGMENTS' in model_settings: # load segmented specs spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {}) specs = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): bundle_name = f'{model_name}_{spec_segment_name}' # estimator for this tour_segment estimator = estimation.manager.begin_estimation( model_name=bundle_name, bundle_name=bundle_name) spec_file_name = spec_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=spec_file_name) coefficients_df = simulate.read_model_coefficients(spec_settings) specs[spec_segment_name] = simulate.eval_coefficients( model_spec, coefficients_df, estimator) if estimator: estimators[spec_segment_name] = estimator # add to local list estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(spec_settings) estimator.write_coefficients(coefficients_df, spec_settings) # - spec dict segmented by primary_purpose tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {}) tour_segments = {} for tour_segment_name, spec_segment_name in tour_segment_settings.items( ): tour_segments[tour_segment_name] = {} tour_segments[tour_segment_name][ 'spec_segment_name'] = spec_segment_name tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name] tour_segments[tour_segment_name]['estimator'] = estimators.get( spec_segment_name) # default tour_segment_col to 'tour_type' if segmented spec and tour_segment_col not specified if tour_segment_col is None and tour_segments: tour_segment_col = 'tour_type' else: # unsegmented spec assert 'SPEC_SEGMENTS' not in model_settings assert 'TOUR_SPEC_SEGMENTS' not in model_settings assert tour_segment_col is None estimator = estimation.manager.begin_estimation(model_name) spec_file_name = model_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=spec_file_name) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimators[None] = estimator # add to local list estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) # - non_mandatory tour scheduling is not segmented by tour type tour_segments = {'spec': model_spec, 'estimator': estimator} if estimators: timetable.begin_transaction(list(estimators.values())) logger.info(f"Running {model_name} with %d tours", len(chooser_tours)) choices = vts.vectorize_tour_scheduling(chooser_tours, persons_merged, tdd_alts, timetable, tour_segments=tour_segments, tour_segment_col=tour_segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimators: # overrride choices for all estimators choices_list = [] for spec_segment_name, estimator in estimators.items(): if spec_segment_name: model_choices = choices[( chooser_tours.tour_type == spec_segment_name)] else: model_choices = choices estimator.write_choices(model_choices) override_choices = estimator.get_survey_values( model_choices, 'tours', 'tdd') estimator.write_override_choices(override_choices) choices_list.append(override_choices) estimator.end_estimation() choices = pd.concat(choices_list) # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in chooser_tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') return choices
def tour_mode_choice_simulate(tours, persons_merged, network_los, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' # FIXME - should be passed in? primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) persons_merged = persons_merged.to_frame() primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) constants = {} # model_constants can appear in expressions constants.update(config.get_model_constants(model_settings)) skim_dict = network_los.get_default_skim_dict() # setup skim keys orig_col_name = 'home_zone_id' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='out_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='in_period') odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='in_period') dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, # dot return skims for e.g. TNC bridge return fare "dor_skims": dor_skim_stack_wrapper, # odt return skims for e.g. TNC bridge return fare "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name, tod_key='out_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name, tod_key='in_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_dot') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) estimator = estimation.manager.begin_estimation('tour_mode_choice') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) estimator.write_coefficients_template( simulate.read_model_coefficient_template(model_settings)) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # (run_tour_mode_choice_simulate writes choosers post-annotation) # FIXME should normalize handling of tour_type and tour_purpose # mtctm1 school tour_type includes univ, which has different coefficients from elementary and HS # we should either add this column when tours created or add univ to tour_types not_university = (primary_tours_merged.tour_type != 'school') | ~primary_tours_merged.is_university primary_tours_merged['tour_purpose'] = \ primary_tours_merged.tour_type.where(not_university, 'univ') choices_list = [] for tour_purpose, tours_segment in primary_tours_merged.groupby( 'tour_purpose'): logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % ( tour_purpose, len(tours_segment.index), )) if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(tour_purpose) tvpb_logsum_dot.extend_trace_label(tour_purpose) # name index so tracing knows how to slice assert tours_segment.index.name == 'tour_id' choices_df = run_tour_mode_choice_simulate( tours_segment, tour_purpose, model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, network_los=network_los, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, tour_purpose), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices_df' % tour_purpose, choices_df.tour_mode, value_counts=True) choices_list.append(choices_df) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_types in tvpb_mode_path_types.items(): for direction, skim in zip(['od', 'do'], [tvpb_logsum_odt, tvpb_logsum_dot]): path_type = path_types[direction] skim_cache = skim.cache[path_type] print( f"mode {mode} direction {direction} path_type {path_type}") for c in skim_cache: dest_col = f'{direction}_{c}' if dest_col not in choices_df: choices_df[ dest_col] = 0 if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where(choices_df.tour_mode != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df.tour_mode) choices_df.tour_mode = estimator.get_survey_values( choices_df.tour_mode, 'tours', 'tour_mode') estimator.write_override_choices(choices_df.tour_mode) estimator.end_estimation() tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices_df.tour_mode, value_counts=True) # so we can trace with annotations assign_in_place(primary_tours, choices_df) # update tours table with mode choice (and optionally logsums) all_tours = tours.to_frame() assign_in_place(all_tours, choices_df) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id', warn_if_empty=True)