def annotate_preprocessors( tours_df, locals_dict, skims, model_settings, trace_label): locals_d = {} locals_d.update(locals_dict) locals_d.update(skims) preprocessor_settings = model_settings.get('preprocessor', []) if not isinstance(preprocessor_settings, list): assert isinstance(preprocessor_settings, dict) preprocessor_settings = [preprocessor_settings] simulate.set_skim_wrapper_targets(tours_df, skims) annotations = None for model_settings in preprocessor_settings: results = compute_columns( df=tours_df, model_settings=model_settings, locals_dict=locals_d, trace_label=trace_label) assign_in_place(tours_df, results)
def cleanup_failed_trips(trips): """ drop failed trips and cleanup fields in leg_mates: trip_num assign new ordinal trip num after failed trips are dropped trip_count assign new count of trips in leg, sans failed trips first update first flag as we may have dropped first trip (last trip can't fail) next_trip_id assign id of next trip in leg after failed trips are dropped """ if trips.failed.any(): logger.warning("cleanup_failed_trips dropping %s failed trips" % trips.failed.sum()) trips['patch'] = False flag_failed_trip_leg_mates(trips, 'patch') # drop the original failures trips = trips[~trips.failed] # increasing trip_id order patch_trips = trips[trips.patch].sort_index() # recompute fields dependent on trip_num sequence grouped = patch_trips.groupby(['tour_id', 'outbound']) patch_trips['trip_num'] = grouped.cumcount() + 1 patch_trips['trip_count'] = patch_trips['trip_num'] + grouped.cumcount(ascending=False) assign_in_place(trips, patch_trips[['trip_num', 'trip_count']]) del trips['patch'] del trips['failed'] return trips
def atwork_subtour_destination( tours, persons_merged, skim_dict, skim_stack, land_use, size_terms, chunk_size, trace_hh_id): persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() destination_size_terms = tour_destination_size_terms(land_use, size_terms, 'atwork') destination_sample = atwork_subtour_destination_sample( subtours, persons_merged, skim_dict, destination_size_terms, chunk_size, trace_hh_id) destination_sample = atwork_subtour_destination_logsums( persons_merged, destination_sample, skim_dict, skim_stack, chunk_size, trace_hh_id) choices = atwork_subtour_destination_simulate( subtours, persons_merged, destination_sample, skim_dict, destination_size_terms, chunk_size, trace_hh_id) subtours['destination'] = choices assign_in_place(tours, subtours[['destination']]) pipeline.replace_table("tours", tours) tracing.print_summary('subtour destination', subtours.destination, describe=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def add_result_columns(base_dfname, from_df, prefix=''): dest_df = inject.get_table(base_dfname).to_frame() if prefix: from_df = from_df.copy() from_df.columns = [prefix + c for c in from_df.columns.values] assign_in_place(dest_df, from_df) pipeline.replace_table(base_dfname, dest_df)
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ model_name = 'mandatory_tour_scheduling' trace_label = model_name persons_merged = persons_merged.to_frame() tours = tours.to_frame() mandatory_tours = tours[tours.tour_category == 'mandatory'] # - if no mandatory_tours if mandatory_tours.shape[0] == 0: tracing.no_results(model_name) return # - add tour segmentation column # mtctm1 segments mandatory_scheduling spec by tour_type # (i.e. there are different specs for work and school tour_types) # mtctm1 logsum coefficients are segmented by primary_purpose # (i.e. there are different logsum coefficients for work, school, univ primary_purposes # for simplicity managing these different segmentation schemes, # we conflate them by segmenting tour processing to align with primary_purpose tour_segment_col = 'mandatory_tour_seg' assert tour_segment_col not in mandatory_tours is_university_tour = \ (mandatory_tours.tour_type == 'school') & \ reindex(persons_merged.is_university, mandatory_tours.person_id) mandatory_tours[tour_segment_col] = \ mandatory_tours.tour_type.where(~is_university_tour, 'univ') choices = run_tour_scheduling(model_name, mandatory_tours, persons_merged, tdd_alts, tour_segment_col, chunk_size, trace_hh_id) assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing mandatory_tours = tours[tours.tour_category == 'mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(mandatory_tours, label=trace_label, slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def assign_columns(df, model_settings, configs_dir=None, trace_label=None): """ Evaluate expressions in context of df and assign rusulting target columns to df Can add new or modify existing columns (if target same as existing df column name) Parameters - same as for compute_columns except df must not be None Returns - nothing since we modify df in place """ assert df is not None results = compute_columns(df, model_settings, configs_dir, trace_label) assign_in_place(df, results)
def non_mandatory_tour_destination( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings = config.read_model_settings('non_mandatory_tour_destination.yaml') tours = tours.to_frame() persons_merged = persons_merged.to_frame() # choosers are tours - in a sense tours are choosing their destination non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return choices = tour_destination.run_tour_destination( tours, persons_merged, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) non_mandatory_tours['destination'] = choices assign_in_place(tours, non_mandatory_tours[['destination']]) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'non_mandatory'], label="non_mandatory_tour_destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack, land_use, size_terms, chunk_size, trace_hh_id): persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() destination_size_terms = tour_destination_size_terms( land_use, size_terms, 'atwork') destination_sample = atwork_subtour_destination_sample( subtours, persons_merged, skim_dict, destination_size_terms, chunk_size, trace_hh_id) destination_sample = atwork_subtour_destination_logsums( persons_merged, destination_sample, skim_dict, skim_stack, chunk_size, trace_hh_id) choices = atwork_subtour_destination_simulate(subtours, persons_merged, destination_sample, skim_dict, destination_size_terms, chunk_size, trace_hh_id) subtours['destination'] = choices assign_in_place(tours, subtours[['destination']]) pipeline.replace_table("tours", tours) tracing.print_summary('subtour destination', subtours.destination, describe=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def annotate_table(configs_dir): # model_settings name should have been provided as a step argument model_name = inject.get_step_arg('model_name') model_settings = config.read_model_settings(configs_dir, '%s.yaml' % model_name) df_name = model_settings['DF'] df = inject.get_table(df_name).to_frame() results = expressions.compute_columns( df, model_settings=model_settings, configs_dir=configs_dir, trace_label=None) assign_in_place(df, results) pipeline.replace_table(df_name, df)
def non_mandatory_tour_destination(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings = config.read_model_settings( 'non_mandatory_tour_destination.yaml') tours = tours.to_frame() persons_merged = persons_merged.to_frame() # choosers are tours - in a sense tours are choosing their destination non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return choices = tour_destination.run_tour_destination(tours, persons_merged, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) non_mandatory_tours['destination'] = choices assign_in_place(tours, non_mandatory_tours[['destination']]) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'non_mandatory'], label="non_mandatory_tour_destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ model_name = 'non_mandatory_tour_scheduling' trace_label = model_name persons_merged = persons_merged.to_frame() tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(model_name) return tour_segment_col = None choices = run_tour_scheduling(model_name, non_mandatory_tours, persons_merged, tdd_alts, tour_segment_col, chunk_size, trace_hh_id) assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label=trace_label, slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def cleanup_failed_trips(trips): """ drop failed trips and cleanup fields in leg_mates: trip_num assign new ordinal trip num after failed trips are dropped trip_count assign new count of trips in leg, sans failed trips first update first flag as we may have dropped first trip (last trip can't fail) next_trip_id assign id of next trip in leg after failed trips are dropped """ if trips.failed.any(): logger.warning("cleanup_failed_trips dropping %s failed trips" % trips.failed.sum()) trips['patch'] = False flag_failed_trip_leg_mates(trips, 'patch') # drop the original failures trips = trips[~trips.failed] # increasing trip_id order patch_trips = trips[trips.patch].sort_index() # recompute fields dependent on trip_num sequence grouped = patch_trips.groupby(['tour_id', 'outbound']) patch_trips['trip_num'] = grouped.cumcount() + 1 # FIXME - 'clever' hack to avoid regroup - implementation dependent optimization that could change patch_trips['trip_count'] = patch_trips['trip_num'] + grouped.cumcount( ascending=False) assign_in_place(trips, patch_trips[['trip_num', 'trip_count']]) del trips['patch'] del trips['failed'] return trips
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() persons_merged = persons_merged.to_frame() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) # setup skim keys orig_col_name = 'TAZ' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices_list = [] for tour_type, segment in primary_tours_merged.groupby('tour_type'): logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (tour_type, len(segment.index), )) # name index so tracing knows how to slice assert segment.index.name == 'tour_id' choices = run_tour_mode_choice_simulate( segment, spec, tour_type, model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, tour_type), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type, choices, value_counts=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices = pd.concat(choices_list) tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices, value_counts=True) # so we can trace with annotations primary_tours['tour_mode'] = choices # but only keep mode choice col all_tours = tours.to_frame() # uncomment to save annotations to table # assign_in_place(all_tours, annotations) assign_in_place(all_tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label(trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ trace_label = 'non_mandatory_tour_scheduling' model_settings = config.read_model_settings( 'non_mandatory_tour_scheduling.yaml') model_spec = simulate.read_model_spec( file_name='tour_scheduling_nonmandatory.csv') segment_col = None # no segmentation of model_spec tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] logger.info("Running non_mandatory_tour_scheduling with %d tours", len(tours)) persons_merged = persons_merged.to_frame() if 'SIMULATE_CHOOSER_COLUMNS' in model_settings: persons_merged =\ expressions.filter_chooser_columns(persons_merged, model_settings['SIMULATE_CHOOSER_COLUMNS']) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=non_mandatory_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) tdd_choices, timetable = vectorize_tour_scheduling( non_mandatory_tours, persons_merged, tdd_alts, model_spec, segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) timetable.replace_table() assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def joint_tour_destination(tours, persons_merged, households_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings_file_name = 'non_mandatory_tour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None # choosers are tours - in a sense tours are choosing their destination tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] persons_merged = persons_merged.to_frame() households_merged = households_merged.to_frame() # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results('joint_tour_destination') return estimator = estimation.manager.begin_estimation('joint_tour_destination') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) # run_destination_simulate writes choosers because tours are merged just-in-time with persons # to reduce memory overhead (the full tours_merged table is only created for one segment at a time) choices_df, save_sample_df = run_joint_tour_destination( tours, persons_merged, households_merged, want_logsums, want_sample_table, model_settings, skim_dict, skim_stack, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df.choice) choices_df.choice = estimator.get_survey_values( choices_df.choice, 'tours', 'destination') estimator.write_override_choices(choices_df.choice) estimator.end_estimation() # add column as we want joint_tours table for tracing. joint_tours['destination'] = choices_df.choice assign_in_place(tours, joint_tours[['destination']]) pipeline.replace_table("tours", tours) if want_logsums: joint_tours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, joint_tours[[logsum_column_name]]) tracing.print_summary('destination', joint_tours.destination, describe=True) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours")
def non_mandatory_tour_destination( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings_file_name = 'non_mandatory_tour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None tours = tours.to_frame() persons_merged = persons_merged.to_frame() # choosers are tours - in a sense tours are choosing their destination non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return estimator = estimation.manager.begin_estimation('non_mandatory_tour_destination') if estimator: estimator.write_coefficients(simulate.read_model_coefficients(model_settings)) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( non_mandatory_tours, persons_merged, want_logsums, want_sample_table, model_settings, skim_dict, skim_stack, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df.choice) choices_df.choice = estimator.get_survey_values(choices_df.choice, 'tours', 'destination') estimator.write_override_choices(choices_df.choice) estimator.end_estimation() non_mandatory_tours['destination'] = choices_df.choice assign_in_place(tours, non_mandatory_tours[['destination']]) if want_logsums: non_mandatory_tours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, non_mandatory_tours[[logsum_column_name]]) pipeline.replace_table("tours", tours) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'non_mandatory'], label="non_mandatory_tour_destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def atwork_subtour_destination( tours, persons_merged, network_los, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_destination' model_settings_file_name = 'atwork_subtour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) future_settings = { 'SIZE_TERM_SELECTOR': 'atwork', 'SEGMENTS': ['atwork'], 'ORIG_ZONE_ID': 'workplace_zone_id' } model_settings = config.future_model_settings(model_settings_file_name, model_settings, future_settings) destination_column_name = 'destination' logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return estimator = estimation.manager.begin_estimation('atwork_subtour_destination') if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( subtours, persons_merged, want_logsums, want_sample_table, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df['choice']) choices_df['choice'] = estimator.get_survey_values(choices_df['choice'], 'tours', 'destination') estimator.write_override_choices(choices_df['choice']) estimator.end_estimation() subtours[destination_column_name] = choices_df['choice'] assign_in_place(tours, subtours[[destination_column_name]]) if want_logsums: subtours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, subtours[[logsum_column_name]]) pipeline.replace_table("tours", tours) tracing.print_summary(destination_column_name, subtours[destination_column_name], describe=True) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def run_trip_destination(trips, tours_merged, estimator, chunk_size, trace_hh_id, trace_label, fail_some_trips_for_testing=False): """ trip destination - main functionality separated from model step so it can be called iteratively Run the trip_destination model, assigning destinations for each (intermediate) trip (last trips already have a destination - either the tour primary destination or Home) Set trip destination and origin columns, and a boolean failed flag for any failed trips (destination for flagged failed trips will be set to -1) Parameters ---------- trips tours_merged want_sample_table chunk_size trace_hh_id trace_label Returns ------- """ model_settings_file_name = 'trip_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) preprocessor_settings = model_settings.get('preprocessor', None) logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None land_use = inject.get_table('land_use') size_terms = inject.get_injectable('size_terms') network_los = inject.get_injectable('network_los') trips = trips.sort_index() trips['next_trip_id'] = np.roll(trips.index, -1) trips.next_trip_id = trips.next_trip_id.where( trips.trip_num < trips.trip_count, 0) # - initialize trip origin and destination to those of half-tour # (we will sequentially adjust intermediate trips origin and destination as we choose them) tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(np.int64) tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(np.int64) trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin) trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination) trips['failed'] = False if estimator: # need to check or override non-intermediate trip destination # should check consistency of survey trips origin, destination with parent tour and subsequent/prior trip? # FIXME if not consistent, do we fail or override? (seems weird to override them to bad values?) # expect all the same trips survey_trips = estimator.get_survey_table('trips').sort_index() assert survey_trips.index.equals(trips.index) first = (survey_trips.trip_num == 1) last = (survey_trips.trip_num == trips.trip_count) # expect survey's outbound first trip origin to be same as half tour origin assert ( survey_trips.origin[survey_trips.outbound & first] == tour_origin[survey_trips.outbound & first]).all() # expect outbound last trip destination to be same as half tour destination assert (survey_trips.destination[survey_trips.outbound & last] == tour_destination[survey_trips.outbound & last]).all() # expect inbound first trip origin to be same as half tour destination assert (survey_trips.origin[~survey_trips.outbound & first] == tour_destination[~survey_trips.outbound & first]).all() # expect inbound last trip destination to be same as half tour origin assert (survey_trips.destination[~survey_trips.outbound & last] == tour_origin[~survey_trips.outbound & last]).all() # - filter tours_merged (AFTER copying destination and origin columns to trips) # tours_merged is used for logsums, we filter it here upfront to save space and time tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS'] redundant_cols = model_settings.get( 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS', []) if redundant_cols: tours_merged_cols = [ c for c in tours_merged_cols if c not in redundant_cols ] tours_merged = tours_merged[tours_merged_cols] # - skims skim_hotel = SkimHotel(model_settings, network_los, trace_label) # - size_terms and alternatives alternatives = tour_destination_size_terms(land_use, size_terms, 'trip') # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by zone_id, purpose # e.g. size_terms.get(df.dest_zone_id, df.purpose) # returns a series of size_terms for each chooser's dest_zone_id and purpose with chooser index size_term_matrix = DataFrameMatrix(alternatives) # don't need size terms in alternatives, just zone_id index alternatives = alternatives.drop(alternatives.columns, axis=1) alternatives.index.name = model_settings['ALT_DEST_COL_NAME'] sample_list = [] # - process intermediate trips in ascending trip_num order intermediate = trips.trip_num < trips.trip_count if intermediate.any(): first_trip_num = trips[intermediate].trip_num.min() last_trip_num = trips[intermediate].trip_num.max() # iterate over trips in ascending trip_num order for trip_num in range(first_trip_num, last_trip_num + 1): nth_trips = trips[intermediate & (trips.trip_num == trip_num)] nth_trace_label = tracing.extend_trace_label( trace_label, 'trip_num_%s' % trip_num) locals_dict = {'network_los': network_los} locals_dict.update(config.get_model_constants(model_settings)) # - annotate nth_trips if preprocessor_settings: expressions.assign_columns( df=nth_trips, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=nth_trace_label) logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0]) # - choose destination for nth_trips, segmented by primary_purpose choices_list = [] for primary_purpose, trips_segment in nth_trips.groupby( 'primary_purpose'): choices, destination_sample = choose_trip_destination( primary_purpose, trips_segment, alternatives, tours_merged, model_settings, want_logsums, want_sample_table, size_term_matrix, skim_hotel, estimator, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label( nth_trace_label, primary_purpose)) choices_list.append(choices) if want_sample_table: assert destination_sample is not None sample_list.append(destination_sample) destinations_df = pd.concat(choices_list) if fail_some_trips_for_testing: if len(destinations_df) > 0: destinations_df = destinations_df.drop( destinations_df.index[0]) failed_trip_ids = nth_trips.index.difference(destinations_df.index) if failed_trip_ids.any(): logger.warning( "%s sidelining %s trips without viable destination alternatives" % (nth_trace_label, failed_trip_ids.shape[0])) next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids) trips.loc[failed_trip_ids, 'failed'] = True trips.loc[failed_trip_ids, 'destination'] = -1 trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values if len(destinations_df) == 0: assert failed_trip_ids.all() logger.warning( f"all {len(nth_trips)} {primary_purpose} trip_num {trip_num} trips failed" ) if len(destinations_df) > 0: # - assign choices to this trip's destinations # if estimator, then the choices will already have been overridden by trip_destination_simulate # because we need to overwrite choices before any failed choices are suppressed assign_in_place(trips, destinations_df.choice.to_frame('destination')) if want_logsums: assert 'logsum' in destinations_df.columns assign_in_place( trips, destinations_df.logsum.to_frame(logsum_column_name)) # - assign choice to next trip's origin destinations_df.index = nth_trips.next_trip_id.reindex( destinations_df.index) assign_in_place(trips, destinations_df.choice.to_frame('origin')) del trips['next_trip_id'] if len(sample_list) > 0: save_sample_df = pd.concat(sample_list) else: # this could happen if no intermediate trips, or if no saved sample desired save_sample_df = None return trips, save_sample_df
def joint_tour_destination(tours, persons_merged, households_merged, network_los, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'joint_tour_destination' model_settings_file_name = 'joint_tour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None # choosers are tours - in a sense tours are choosing their destination tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] persons_merged = persons_merged.to_frame() # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results('joint_tour_destination') return estimator = estimation.manager.begin_estimation('joint_tour_destination') if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( tours, persons_merged, want_logsums, want_sample_table, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df.choice) choices_df.choice = estimator.get_survey_values( choices_df.choice, 'tours', 'destination') estimator.write_override_choices(choices_df.choice) estimator.end_estimation() # add column as we want joint_tours table for tracing. joint_tours['destination'] = choices_df.choice assign_in_place(tours, joint_tours[['destination']]) pipeline.replace_table("tours", tours) if want_logsums: joint_tours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, joint_tours[[logsum_column_name]]) tracing.print_summary('destination', joint_tours.destination, describe=True) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len( choices_df) # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours")
def trip_mode_choice( trips, tours_merged, network_los, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings = config.read_model_settings('trip_mode_choice.yaml') logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'trip_mode' model_spec = \ simulate.read_model_spec(file_name=model_settings['SPEC']) omnibus_coefficients = \ assign.read_constant_spec(config.config_file_path(model_settings['COEFFICIENTS'])) trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) tours_merged = tours_merged.to_frame() tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']] nest_spec = config.get_logit_model_settings(model_settings) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge( trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart) orig_col = 'origin' dest_col = 'destination' constants = {} constants.update(config.get_model_constants(model_settings)) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col }) skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col, dim3_key='trip_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col, dim3_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_wrapper, } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col, tod_key='trip_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, # 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'): segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(primary_purpose) # tvpb_logsum_dot.extend_trace_label(primary_purpose) locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose], constants=constants) locals_dict.update(constants) expressions.annotate_preprocessors( trips_segment, locals_dict, skims, model_settings, segment_trace_label) locals_dict.update(skims) choices = mode_choice_simulate( choosers=trips_segment, spec=model_spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=trace_label, trace_choice_name='trip_mode_choice') if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label(segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations assign_in_place(trips_segment, choices) tracing.trace_df(trips_segment, label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] print(f"mode {mode} path_type {path_type}") for c in skim_cache: dest_col = c if dest_col not in choices_df: choices_df[dest_col] = np.nan choices_df[dest_col].where(choices_df[mode_column_name] != mode, skim_cache[c], inplace=True) # update trips table with choices (and otionally logssums) trips_df = trips.to_frame() assign_in_place(trips_df, choices_df) tracing.print_summary('tour_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', trips_df[mode_column_name], value_counts=True) assert not trips_df[mode_column_name].isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label(trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def run_trip_destination( trips, tours_merged, chunk_size, trace_hh_id, trace_label): """ trip destination - main functionality separated from model step so it can be called iteratively Run the trip_destination model, assigning destinations for each (intermediate) trip (last trips already have a destination - either the tour primary destination or Home) Set trip destination and origin columns, and a boolean failed flag for any failed trips (destination for flagged failed trips will be set to -1) Parameters ---------- trips tours_merged chunk_size trace_hh_id trace_label Returns ------- """ model_settings = config.read_model_settings('trip_destination.yaml') preprocessor_settings = model_settings.get('preprocessor', None) logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) land_use = inject.get_table('land_use') size_terms = inject.get_injectable('size_terms') # - initialize trip origin and destination to those of half-tour # (we will sequentially adjust intermediate trips origin and destination as we choose them) tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int) tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int) trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin) trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination) trips['failed'] = False trips = trips.sort_index() trips['next_trip_id'] = np.roll(trips.index, -1) trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0) # - filter tours_merged (AFTER copying destination and origin columns to trips) # tours_merged is used for logsums, we filter it here upfront to save space and time tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS'] if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings: redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS'] tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols] tours_merged = tours_merged[tours_merged_cols] # - skims skims = wrap_skims(model_settings) # - size_terms and alternatives alternatives = tour_destination_size_terms(land_use, size_terms, 'trip') # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose # e.g. size_terms.get(df.dest_taz, df.purpose) # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index size_term_matrix = DataFrameMatrix(alternatives) # don't need size terms in alternatives, just TAZ index alternatives = alternatives.drop(alternatives.columns, axis=1) alternatives.index.name = model_settings['ALT_DEST'] # - process intermediate trips in ascending trip_num order intermediate = trips.trip_num < trips.trip_count if intermediate.any(): first_trip_num = trips[intermediate].trip_num.min() last_trip_num = trips[intermediate].trip_num.max() # iterate over trips in ascending trip_num order for trip_num in range(first_trip_num, last_trip_num + 1): nth_trips = trips[intermediate & (trips.trip_num == trip_num)] nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num) # - annotate nth_trips if preprocessor_settings: expressions.assign_columns( df=nth_trips, model_settings=preprocessor_settings, locals_dict=config.get_model_constants(model_settings), trace_label=nth_trace_label) logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0]) # - choose destination for nth_trips, segmented by primary_purpose choices_list = [] for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'): choices = choose_trip_destination( primary_purpose, trips_segment, alternatives, tours_merged, model_settings, size_term_matrix, skims, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose)) choices_list.append(choices) destinations = pd.concat(choices_list) failed_trip_ids = nth_trips.index.difference(destinations.index) if failed_trip_ids.any(): logger.warning("%s sidelining %s trips without viable destination alternatives" % (nth_trace_label, failed_trip_ids.shape[0])) next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids) trips.loc[failed_trip_ids, 'failed'] = True trips.loc[failed_trip_ids, 'destination'] = -1 trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values # - assign choices to these trips destinations and to next trips origin assign_in_place(trips, destinations.to_frame('destination')) destinations.index = nth_trips.next_trip_id.reindex(destinations.index) assign_in_place(trips, destinations.to_frame('origin')) del trips['next_trip_id'] return trips
def joint_tour_scheduling( tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each joint tour """ trace_label = 'joint_tour_scheduling' model_settings = config.read_model_settings('joint_tour_scheduling.yaml') model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv') tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results(trace_label) return # use inject.get_table as this won't exist if there are no joint_tours joint_tour_participants = inject.get_table('joint_tour_participants').to_frame() persons_merged = persons_merged.to_frame() logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0]) # it may seem peculiar that we are concerned with persons rather than households # but every joint tour is (somewhat arbitrarily) assigned a "primary person" # some of whose characteristics are used in the spec # and we get household attributes along with person attributes in persons_merged persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0] # since a households joint tours each potentially different participants # they may also have different joint tour masks (free time of all participants) # so we have to either chunk processing by joint_tour_num and build timetable by household # or build timetables by unique joint_tour constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=joint_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) tdd_choices, timetable = vectorize_joint_tour_scheduling( joint_tours, joint_tour_participants, persons_merged, tdd_alts, spec=model_spec, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) timetable.replace_table() assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) # updated df for tracing joint_tours = tours[tours.tour_category == 'joint'] if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_scheduling", slicer='household_id')
def joint_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each joint tour """ trace_label = 'joint_tour_scheduling' model_settings_file_name = 'joint_tour_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results(trace_label) return # use inject.get_table as this won't exist if there are no joint_tours joint_tour_participants = inject.get_table( 'joint_tour_participants').to_frame() persons_merged = persons_merged.to_frame() logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0]) # it may seem peculiar that we are concerned with persons rather than households # but every joint tour is (somewhat arbitrarily) assigned a "primary person" # some of whose characteristics are used in the spec # and we get household attributes along with person attributes in persons_merged persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0] # since a households joint tours each potentially different participants # they may also have different joint tour masks (free time of all participants) # so we have to either chunk processing by joint_tour_num and build timetable by household # or build timetables by unique joint_tour constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=joint_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) timetable = inject.get_injectable("timetable") estimator = estimation.manager.begin_estimation('joint_tour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) timetable.begin_transaction(estimator) choices = vectorize_joint_tour_scheduling(joint_tours, joint_tour_participants, persons_merged, tdd_alts, timetable, spec=model_spec, model_settings=model_settings, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True): nth_participants = \ joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)] estimator.log( "assign timetable for %s participants in %s tours with tour_num %s" % (len(nth_participants), len(nth_tours), tour_num)) # - update timetables of all joint tour participants timetable.assign(nth_participants.person_id, reindex(choices, nth_participants.tour_id)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing joint_tours = tours[tours.tour_category == 'joint'] if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_scheduling", slicer='household_id')
def stop_frequency(tours, tours_merged, stop_frequency_alts, network_los, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings_file_name = 'stop_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already assert 'origin' in tours_merged assert 'destination' in tours_merged od_skim_stack_wrapper = network_los.get_default_skim_dict().wrap( 'origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper, 'network_los': network_los } locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) spec_segments = model_settings.get('SPEC_SEGMENTS') assert spec_segments is not None, f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}" segment_col = model_settings.get('SEGMENT_COL') assert segment_col is not None, f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}" nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for segment_settings in spec_segments: segment_name = segment_settings[segment_col] segment_value = segment_settings[segment_col] chooser_segment = tours_merged[tours_merged[segment_col] == segment_value] if len(chooser_segment) == 0: logging.info( f"{trace_label} skipping empty segment {segment_name}") continue logging.info( f"{trace_label} running segment {segment_name} with {chooser_segment.shape[0]} chooser rows" ) estimator = estimation.manager.begin_estimation( model_name=segment_name, bundle_name='stop_frequency') segment_spec = simulate.read_model_spec( file_name=segment_settings['SPEC']) assert segment_spec is not None, "spec for segment_type %s not found" % segment_name coefficients_file_name = segment_settings['COEFFICIENTS'] coefficients_df = simulate.read_model_coefficients( file_name=coefficients_file_name) segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator) if estimator: estimator.write_spec(segment_settings, bundle_directory=False) estimator.write_model_settings(model_settings, model_settings_file_name, bundle_directory=True) estimator.write_coefficients(coefficients_df, segment_settings) estimator.write_choosers(chooser_segment) estimator.set_chooser_id(chooser_segment.index.name) choices = simulate.simple_simulate( choosers=chooser_segment, spec=segment_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_name), trace_choice_name='stops', estimator=estimator) # convert indexes to alternative names choices = pd.Series(segment_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values( choices, 'tours', 'stop_frequency') # override choices estimator.write_override_choices(choices) estimator.end_estimation() choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) # FIXME should have added this when tours created? assert 'primary_purpose' not in tours if 'primary_purpose' not in tours.columns: # if not already there, then it will have been added by annotate tours preprocessor assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if estimator: # make sure they created trips with the expected tour_ids columns = ['person_id', 'household_id', 'tour_id', 'outbound'] survey_trips = estimation.manager.get_survey_table(table_name='trips') different = False survey_trips_not_in_trips = survey_trips[~survey_trips.index. isin(trips.index)] if len(survey_trips_not_in_trips) > 0: print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}") different = True trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index )] if len(survey_trips_not_in_trips) > 0: print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}") different = True assert not different survey_trips = \ estimation.manager.get_survey_values(trips, table_name='trips', column_names=columns) trips_differ = (trips[columns] != survey_trips[columns]).any(axis=1) if trips_differ.any(): print("trips_differ\n%s" % trips_differ) print("%s of %s tours differ" % (trips_differ.sum(), len(trips_differ))) print("differing survey_trips\n%s" % survey_trips[trips_differ]) print("differing modeled_trips\n%s" % trips[columns][trips_differ]) assert (not trips_differ.any()) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def atwork_subtour_mode_choice(tours, persons_merged, network_los, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) constants = {} constants.update(config.get_model_constants(model_settings)) skim_dict = network_los.get_default_skim_dict() # setup skim keys orig_col_name = 'workplace_zone_id' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='out_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='in_period') odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='in_period') dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name, tod_key='out_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name, tod_key='in_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_dot') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) estimator = estimation.manager.begin_estimation( 'atwork_subtour_mode_choice') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) estimator.write_coefficients_template( simulate.read_model_coefficient_template(model_settings)) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # FIXME run_tour_mode_choice_simulate writes choosers post-annotation choices_df = run_tour_mode_choice_simulate( subtours_merged, tour_purpose='atwork', model_settings=model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, network_los=network_los, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_types in tvpb_mode_path_types.items(): for direction, skim in zip(['od', 'do'], [tvpb_logsum_odt, tvpb_logsum_dot]): path_type = path_types[direction] skim_cache = skim.cache[path_type] print( f"mode {mode} direction {direction} path_type {path_type}") for c in skim_cache: dest_col = f'{direction}_{c}' if dest_col not in choices_df: choices_df[ dest_col] = 0 if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where(choices_df.tour_mode != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df[mode_column_name]) choices_df[mode_column_name] = \ estimator.get_survey_values(choices_df[mode_column_name], 'tours', mode_column_name) estimator.write_override_choices(choices_df[mode_column_name]) estimator.end_estimation() tracing.print_summary('%s choices' % trace_label, choices_df[mode_column_name], value_counts=True) assign_in_place(tours, choices_df) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id') force_garbage_collect()
def atwork_subtour_mode_choice_simulate(tours, persons_merged, tour_mode_choice_spec, tour_mode_choice_settings, skim_dict, skim_stack, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' tours = tours.to_frame() subtours = tours[tours.tour_category == 'subtour'] # merge persons into tours choosers = pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True) nest_spec = config.get_logit_model_settings(tour_mode_choice_settings) constants = config.get_model_constants(tour_mode_choice_settings) logger.info("Running %s with %d subtours" % (trace_label, len(subtours.index))) tracing.print_summary('%s tour_type' % trace_label, subtours.tour_type, value_counts=True) if trace_hh_id: tracing.trace_df(tour_mode_choice_spec, tracing.extend_trace_label(trace_label, 'spec'), slicer='NONE', transpose=False) # setup skim keys odt_skim_stack_wrapper = skim_stack.wrap(left_key='workplace_taz', right_key='destination', skim_key="out_period") dot_skim_stack_wrapper = skim_stack.wrap(left_key='destination', right_key='workplace_taz', skim_key="in_period") od_skims = skim_dict.wrap('workplace_taz', 'destination') spec = get_segment_and_unstack(tour_mode_choice_spec, segment='workbased') if trace_hh_id: tracing.trace_df(spec, tracing.extend_trace_label(trace_label, 'spec'), slicer='NONE', transpose=False) choices = _mode_choice_simulate( choosers, odt_skim_stack_wrapper=odt_skim_stack_wrapper, dot_skim_stack_wrapper=dot_skim_stack_wrapper, od_skim_stack_wrapper=od_skims, spec=spec, constants=constants, nest_spec=nest_spec, trace_label=trace_label, trace_choice_name='tour_mode_choice') tracing.print_summary('%s choices' % trace_label, choices, value_counts=True) subtours['destination'] = choices assign_in_place(tours, subtours[['destination']]) if trace_hh_id: trace_columns = [ 'mode', 'person_id', 'tour_type', 'tour_num', 'parent_tour_id' ] tracing.trace_df(subtours, label=tracing.extend_trace_label(trace_label, 'mode'), slicer='tour_id', index_label='tour_id', columns=trace_columns, warn_if_empty=True) # FIXME - this forces garbage collection memory_info()
def stop_frequency( tours, tours_merged, stop_frequency_alts, skim_dict, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings = config.read_model_settings('stop_frequency.yaml') tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper } if constants is not None: locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) choices_list = [] for segment_type, choosers in tours_merged.groupby('primary_purpose'): logging.info("%s running segment %s with %s chooser rows" % (trace_label, segment_type, choosers.shape[0])) spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type) assert spec is not None, "spec for segment_type %s not found" % segment_type choices = simulate.simple_simulate( choosers=choosers, spec=spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_type), trace_choice_name='stops') # convert indexes to alternative names choices = pd.Series(spec.columns[choices.values], index=choices.index) choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) if 'primary_purpose' not in tours.columns: assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def stop_frequency(tours, tours_merged, stop_frequency_alts, skim_dict, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings = config.read_model_settings('stop_frequency.yaml') tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = {"od_skims": od_skim_stack_wrapper} if constants is not None: locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) choices_list = [] for segment_type, choosers in tours_merged.groupby('primary_purpose'): logging.info("%s running segment %s with %s chooser rows" % (trace_label, segment_type, choosers.shape[0])) spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type) assert spec is not None, "spec for segment_type %s not found" % segment_type choices = simulate.simple_simulate( choosers=choosers, spec=spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_type), trace_choice_name='stops') # convert indexes to alternative names choices = pd.Series(spec.columns[choices.values], index=choices.index) choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) if 'primary_purpose' not in tours.columns: assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def atwork_subtour_scheduling( tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings = config.read_model_settings('tour_scheduling_atwork.yaml') model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv') persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') do_skim_wrapper = skim_dict.wrap('destination', 'origin') skims = { "od_skims": od_skim_wrapper, "do_skims": do_skim_wrapper, } annotate_preprocessors( subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(int).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) tdd_choices = vectorize_subtour_scheduling( parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, chunk_size=chunk_size, trace_label=trace_label) assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df(DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def joint_tour_participation( tours, persons_merged, chunk_size, trace_hh_id): """ Predicts for each eligible person to participate or not participate in each joint tour. """ trace_label = 'joint_tour_participation' model_settings = config.read_model_settings('joint_tour_participation.yaml') model_spec = simulate.read_model_spec(file_name='joint_tour_participation.csv') tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(model_settings, trace_label) return persons_merged = persons_merged.to_frame() # - create joint_tour_participation_candidates table candidates = joint_tour_participation_candidates(joint_tours, persons_merged) tracing.register_traceable_table('joint_tour_participants', candidates) pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates) logger.info("Running joint_tours_participation with %d potential participants (candidates)" % candidates.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_time_window_overlap': person_time_window_overlap, 'persons': persons_merged } expressions.assign_columns( df=candidates, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - simple_simulate nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=candidates, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='participation', custom_chooser=participants_chooser) # choice is boolean (participate or not) choice_col = model_settings.get('participation_choice', 'participate') assert choice_col in model_spec.columns, \ "couldn't find participation choice column '%s' in spec" PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col) participate = (choices == PARTICIPATE_CHOICE) # satisfaction indexed by tour_id tour_satisfaction = get_tour_satisfaction(candidates, participate) assert tour_satisfaction.all() candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id) PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id'] participants = candidates[participate][PARTICIPANT_COLS].copy() # assign participant_num # FIXME do we want something smarter than the participant with the lowest person_id? participants['participant_num'] = \ participants.sort_values(by=['tour_id', 'person_id']).\ groupby('tour_id').cumcount() + 1 pipeline.replace_table("joint_tour_participants", participants) # drop channel as we aren't using any more (and it has candidates that weren't chosen) pipeline.get_rn_generator().drop_channel('joint_tour_participants') # - assign joint tour 'point person' (participant_num == 1) point_persons = participants[participants.participant_num == 1] joint_tours['person_id'] = point_persons.set_index('tour_id').person_id # update number_of_participants which was initialized to 1 joint_tours['number_of_participants'] = participants.groupby('tour_id').size() assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']]) pipeline.replace_table("tours", tours) # - run annotations annotate_jtp(model_settings, trace_label) if trace_hh_id: tracing.trace_df(participants, label="joint_tour_participation.participants") tracing.trace_df(joint_tours, label="joint_tour_participation.joint_tours")
def trip_purpose_and_destination( trips, tours_merged, chunk_size, trace_hh_id): trace_label = "trip_purpose_and_destination" model_settings = config.read_model_settings('trip_purpose_and_destination.yaml') MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() if trips_df.empty: logger.info("%s - no trips. Nothing to do." % trace_label) return # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates assert (MAX_ITERATIONS > 0) # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry if 'destination' in trips_df: if trips_df.failed.any(): logger.info('trip_destination has already been run. Rerunning failed trips') flag_failed_trip_leg_mates(trips_df, 'failed') trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)] logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0]) else: # no failed trips from prior run of trip_destination logger.info("%s - no failed trips from prior model run." % trace_label) del trips_df['failed'] pipeline.replace_table("trips", trips_df) return results = [] i = 0 RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed'] while True: i += 1 for c in RESULT_COLUMNS: if c in trips_df: del trips_df[c] trips_df = run_trip_purpose_and_destination( trips_df, tours_merged_df, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, "i%s" % i)) num_failed_trips = trips_df.failed.sum() # if there were no failed trips, we are done if num_failed_trips == 0: results.append(trips_df[RESULT_COLUMNS]) break logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i)) file_name = "%s_i%s_failed_trips" % (trace_label, i) logger.info("writing failed trips to %s" % file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) # if max iterations reached, add remaining trips to results and give up # note that we do this BEFORE failing leg_mates so resulting trip legs are complete if i >= MAX_ITERATIONS: logger.warning("%s too many iterations %s" % (trace_label, i)) results.append(trips_df[RESULT_COLUMNS]) break # otherwise, if any trips failed, then their leg-mates trips must also fail flag_failed_trip_leg_mates(trips_df, 'failed') # add the good trips to results results.append(trips_df[~trips_df.failed][RESULT_COLUMNS]) # and keep the failed ones to retry trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)] # - assign result columns to trips results = pd.concat(results) logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i)) trips_df = trips.to_frame() assign_in_place(trips_df, results) trips_df = cleanup_failed_trips(trips_df) pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def joint_tour_participation( tours, persons_merged, chunk_size, trace_hh_id): """ Predicts for each eligible person to participate or not participate in each joint tour. """ trace_label = 'joint_tour_participation' model_settings_file_name = 'joint_tour_participation.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(model_settings, trace_label) return persons_merged = persons_merged.to_frame() # - create joint_tour_participation_candidates table candidates = joint_tour_participation_candidates(joint_tours, persons_merged) tracing.register_traceable_table('joint_tour_participants', candidates) pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates) logger.info("Running joint_tours_participation with %d potential participants (candidates)" % candidates.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_time_window_overlap': person_time_window_overlap, 'persons': persons_merged } expressions.assign_columns( df=candidates, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - simple_simulate estimator = estimation.manager.begin_estimation('joint_tour_participation') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(candidates) # add tour-based chunk_id so we can chunk all trips in tour together assert 'chunk_id' not in candidates.columns unique_household_ids = candidates.household_id.unique() household_chunk_ids = pd.Series(range(len(unique_household_ids)), index=unique_household_ids) candidates['chunk_id'] = reindex(household_chunk_ids, candidates.household_id) choices = simulate.simple_simulate_by_chunk_id( choosers=candidates, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='participation', custom_chooser=participants_chooser, estimator=estimator) # choice is boolean (participate or not) choice_col = model_settings.get('participation_choice', 'participate') assert choice_col in model_spec.columns, \ "couldn't find participation choice column '%s' in spec" PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col) participate = (choices == PARTICIPATE_CHOICE) if estimator: estimator.write_choices(choices) # we override the 'participate' boolean series, instead of raw alternative index in 'choices' series # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index survey_participants_df = estimator.get_survey_table('joint_tour_participants') participate = pd.Series(choices.index.isin(survey_participants_df.index.values), index=choices.index) # but estimation software wants to know the choices value (alternative index) choices = participate.replace({True: PARTICIPATE_CHOICE, False: 1-PARTICIPATE_CHOICE}) # estimator.write_override_choices(participate) # write choices as boolean participate estimator.write_override_choices(choices) # write choices as int alt indexes estimator.end_estimation() # satisfaction indexed by tour_id tour_satisfaction = get_tour_satisfaction(candidates, participate) assert tour_satisfaction.all() candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id) PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id'] participants = candidates[participate][PARTICIPANT_COLS].copy() # assign participant_num # FIXME do we want something smarter than the participant with the lowest person_id? participants['participant_num'] = \ participants.sort_values(by=['tour_id', 'person_id']).\ groupby('tour_id').cumcount() + 1 pipeline.replace_table("joint_tour_participants", participants) # drop channel as we aren't using any more (and it has candidates that weren't chosen) pipeline.get_rn_generator().drop_channel('joint_tour_participants') # - assign joint tour 'point person' (participant_num == 1) point_persons = participants[participants.participant_num == 1] joint_tours['person_id'] = point_persons.set_index('tour_id').person_id # update number_of_participants which was initialized to 1 joint_tours['number_of_participants'] = participants.groupby('tour_id').size() assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']]) pipeline.replace_table("tours", tours) # - run annotations annotate_jtp(model_settings, trace_label) if trace_hh_id: tracing.trace_df(participants, label="joint_tour_participation.participants") tracing.trace_df(joint_tours, label="joint_tour_participation.joint_tours")
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ trace_label = 'non_mandatory_tour_scheduling' model_settings_file_name = 'non_mandatory_tour_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] logger.info("Running non_mandatory_tour_scheduling with %d tours", len(tours)) persons_merged = persons_merged.to_frame() if 'SIMULATE_CHOOSER_COLUMNS' in model_settings: persons_merged =\ expressions.filter_chooser_columns(persons_merged, model_settings['SIMULATE_CHOOSER_COLUMNS']) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=non_mandatory_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) timetable = inject.get_injectable("timetable") estimator = estimation.manager.begin_estimation( 'non_mandatory_tour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) timetable.begin_transaction(estimator) # - non_mandatory tour scheduling is not segmented by tour type spec_info = {'spec': model_spec, 'estimator': estimator} choices = vectorize_tour_scheduling(non_mandatory_tours, persons_merged, tdd_alts, timetable, tour_segments=spec_info, tour_segment_col=None, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def tour_mode_choice_simulate(tours, persons_merged, network_los, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) persons_merged = persons_merged.to_frame() primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) constants = {} # model_constants can appear in expressions constants.update(config.get_model_constants(model_settings)) skim_dict = network_los.get_default_skim_dict() # setup skim keys orig_col_name = 'home_zone_id' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='out_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='in_period') odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='in_period') dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, # dot return skims for e.g. TNC bridge return fare "dor_skims": dor_skim_stack_wrapper, # odt return skims for e.g. TNC bridge return fare "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name, tod_key='out_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name, tod_key='in_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_dot') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) estimator = estimation.manager.begin_estimation('tour_mode_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_coefficients_template(model_settings=model_settings) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # (run_tour_mode_choice_simulate writes choosers post-annotation) # FIXME should normalize handling of tour_type and tour_purpose # mtctm1 school tour_type includes univ, which has different coefficients from elementary and HS # we should either add this column when tours created or add univ to tour_types not_university = (primary_tours_merged.tour_type != 'school') | ~primary_tours_merged.is_university primary_tours_merged['tour_purpose'] = \ primary_tours_merged.tour_type.where(not_university, 'univ') choices_list = [] for tour_purpose, tours_segment in primary_tours_merged.groupby( 'tour_purpose'): logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % ( tour_purpose, len(tours_segment.index), )) if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(tour_purpose) tvpb_logsum_dot.extend_trace_label(tour_purpose) # name index so tracing knows how to slice assert tours_segment.index.name == 'tour_id' choices_df = run_tour_mode_choice_simulate( tours_segment, tour_purpose, model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, network_los=network_los, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, tour_purpose), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices_df' % tour_purpose, choices_df.tour_mode, value_counts=True) choices_list.append(choices_df) choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_types in tvpb_mode_path_types.items(): for direction, skim in zip(['od', 'do'], [tvpb_logsum_odt, tvpb_logsum_dot]): path_type = path_types[direction] skim_cache = skim.cache[path_type] print( f"mode {mode} direction {direction} path_type {path_type}") for c in skim_cache: dest_col = f'{direction}_{c}' if dest_col not in choices_df: choices_df[ dest_col] = np.nan if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where(choices_df.tour_mode != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df.tour_mode) choices_df.tour_mode = estimator.get_survey_values( choices_df.tour_mode, 'tours', 'tour_mode') estimator.write_override_choices(choices_df.tour_mode) estimator.end_estimation() tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices_df.tour_mode, value_counts=True) # so we can trace with annotations assign_in_place(primary_tours, choices_df) # update tours table with mode choice (and optionally logsums) all_tours = tours.to_frame() assign_in_place(all_tours, choices_df) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id', warn_if_empty=True)
def demographics_processor(persons, persons_merged, demographics_spec, demographics_settings, chunk_size, trace_hh_id): # the choice model will be applied to each row of the choosers table (a pandas.DataFrame) persons_df = persons_merged.to_frame() logger.info( "Running demographics_processor with %d persons (chunk size = %s)" % (len(persons_df), chunk_size)) # locals whose values will be accessible to the execution context # when the expressions in spec are applied to choosers locals_dict = config.get_model_constants(demographics_settings) locals_dict.update(config.setting('globals')) trace_rows = trace_hh_id and persons_df['household_id'] == trace_hh_id # eval_variables evaluates each of the expressions in spec # in the context of each row in of the choosers dataframe results, trace_results, trace_assigned_locals \ = assign.assign_variables(demographics_spec, persons_df, locals_dict, df_alias='persons', trace_rows=trace_rows) # add assigned columns to persons as they are needed by downstream processors persons = persons.to_frame() assign_in_place(persons, results) pipeline.replace_table("persons", persons) # coc groups with counts # TODO - should we allow specifying which assigned columns are coc (e.g. in settings?) # for now, assume all assigned columns are coc, but this could cramp modelers style # if they want to create additional demographic columns for downstream use that aren't coc coc_columns = list(results.columns) inject.add_injectable("coc_column_names", coc_columns) # - create table with coc columns as indexes and a single column 'persons' with counts # index persons # coc_poverty coc_age # False False 20 # True 3 # True False 4 coc_grouped = results.groupby(coc_columns) coc_grouped = coc_grouped[coc_columns[0]].count().to_frame(name='persons') pipeline.replace_table("coc_results", coc_grouped) add_summary_results(coc_grouped) if trace_hh_id: if trace_results is not None: tracing.write_csv(trace_results, file_name="demographics", index_label='person_idx', column_labels=['label', 'person']) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="demographics_locals")
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ trace_label = 'mandatory_tour_scheduling' model_settings_file_name = 'mandatory_tour_scheduling.yaml' estimators = {} model_settings = config.read_model_settings(model_settings_file_name) logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) tours = tours.to_frame() mandatory_tours = tours[tours.tour_category == 'mandatory'] # - if no mandatory_tours if mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return persons_merged = persons_merged.to_frame() # - filter chooser columns for both logsums and simulate logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', []) model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', []) chooser_columns = logsum_columns + [ c for c in model_columns if c not in logsum_columns ] persons_merged = expressions.filter_chooser_columns( persons_merged, chooser_columns) # - add tour segmentation column # mtctm1 segments mandatory_scheduling spec by tour_type # (i.e. there are different specs for work and school tour_types) # mtctm1 logsum coefficients are segmented by primary_purpose # (i.e. there are different locsum coefficents for work, school, univ primary_purposes # for simplicity managing these different segmentation schemes, # we conflate them by segmenting the skims to align with primary_purpose tour_segment_col = 'mandatory_tour_seg' assert tour_segment_col not in mandatory_tours is_university_tour = \ (mandatory_tours.tour_type == 'school') & \ reindex(persons_merged.is_university, mandatory_tours.person_id) mandatory_tours[tour_segment_col] = \ mandatory_tours.tour_type.where(~is_university_tour, 'univ') # load specs spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {}) specs = {} estimators = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): # estimator for this tour_segment estimator = estimation.manager.begin_estimation( model_name='mandatory_tour_scheduling_%s' % spec_segment_name, bundle_name='mandatory_tour_scheduling') spec_file_name = spec_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=spec_file_name) coefficients_df = simulate.read_model_coefficients( spec_segment_settings[spec_segment_name]) specs[spec_segment_name] = simulate.eval_coefficients( model_spec, coefficients_df, estimator) if estimator: estimators[spec_segment_name] = estimator # add to local list estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(spec_settings) estimator.write_coefficients(coefficients_df) # - spec dict segmented by primary_purpose tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {}) tour_segments = {} for tour_segment_name, spec_segment_name in tour_segment_settings.items(): tour_segments[tour_segment_name] = {} tour_segments[tour_segment_name][ 'spec_segment_name'] = spec_segment_name tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name] tour_segments[tour_segment_name]['estimator'] = estimators.get( spec_segment_name) timetable = inject.get_injectable("timetable") if estimators: timetable.begin_transaction(list(estimators.values())) logger.info("Running mandatory_tour_scheduling with %d tours", len(tours)) choices = vts.vectorize_tour_scheduling(mandatory_tours, persons_merged, tdd_alts, timetable, tour_segments=tour_segments, tour_segment_col=tour_segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimators: # overrride choices for all estimators choices_list = [] for spec_segment_name, estimator in estimators.items(): model_choices = choices[( mandatory_tours.tour_type == spec_segment_name)] # FIXME vectorize_tour_scheduling calls used to write_choices but perhaps shouldn't estimator.write_choices(model_choices) override_choices = estimator.get_survey_values( model_choices, 'tours', 'tdd') estimator.write_override_choices(override_choices) choices_list.append(override_choices) estimator.end_estimation() choices = pd.concat(choices_list) # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) timetable.replace_table() # updated df for tracing mandatory_tours = tours[tours.tour_category == 'mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_scheduling", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def atwork_subtour_mode_choice(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) # setup skim keys orig_col_name = 'workplace_taz' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices = run_tour_mode_choice_simulate( subtours_merged, spec, tour_purpose='atwork', model_settings=model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') tracing.print_summary('%s choices' % trace_label, choices, value_counts=True) assign_in_place(tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label( trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id') force_garbage_collect()
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id): trace_label = "trip_purpose_and_destination" model_settings = config.read_model_settings( 'trip_purpose_and_destination.yaml') # for consistency, read sample_table_name setting from trip_destination settings file trip_destination_model_settings = config.read_model_settings( 'trip_destination.yaml') sample_table_name = trip_destination_model_settings.get( 'DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() if trips_df.empty: logger.info("%s - no trips. Nothing to do." % trace_label) return # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates assert (MAX_ITERATIONS > 0) # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry if 'destination' in trips_df: if 'failed' not in trips_df.columns: # trip_destination model cleaned up any failed trips logger.info("%s - no failed column from prior model run." % trace_label) return elif not trips_df.failed.any(): # 'failed' column but no failed trips from prior run of trip_destination logger.info("%s - no failed trips from prior model run." % trace_label) trips_df.drop(columns='failed', inplace=True) pipeline.replace_table("trips", trips_df) return else: logger.info( "trip_destination has already been run. Rerunning failed trips" ) flag_failed_trip_leg_mates(trips_df, 'failed') trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin( trips_df.tour_id)] logger.info("Rerunning %s failed trips and leg-mates" % trips_df.shape[0]) # drop any previously saved samples of failed trips if want_sample_table and pipeline.is_table(sample_table_name): logger.info( "Dropping any previously saved samples of failed trips") save_sample_df = pipeline.get_table(sample_table_name) save_sample_df.drop(trips_df.index, level='trip_id', inplace=True) pipeline.replace_table(sample_table_name, save_sample_df) del save_sample_df # if we estimated trip_destination, there should have been no failed trips # if we didn't, but it is enabled, it is probably a configuration error # if we just estimated trip_purpose, it isn't clear what they are trying to do , nor how to handle it assert not (estimation.manager.begin_estimation('trip_purpose') or estimation.manager.begin_estimation('trip_destination')) processed_trips = [] save_samples = [] i = 0 TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed'] while True: i += 1 for c in TRIP_RESULT_COLUMNS: if c in trips_df: del trips_df[c] trips_df, save_sample_df = run_trip_purpose_and_destination( trips_df, tours_merged_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, "i%s" % i)) # # if testing, make sure at least one trip fails if config.setting('testing_fail_trip_destination', False) \ and (i == 1) and not trips_df.failed.any(): fail_o = trips_df[ trips_df.trip_num < trips_df.trip_count].origin.max() trips_df.failed = (trips_df.origin == fail_o) & \ (trips_df.trip_num < trips_df.trip_count) num_failed_trips = trips_df.failed.sum() # if there were no failed trips, we are done if num_failed_trips == 0: processed_trips.append(trips_df[TRIP_RESULT_COLUMNS]) if save_sample_df is not None: save_samples.append(save_sample_df) break logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i)) file_name = "%s_i%s_failed_trips" % (trace_label, i) logger.info("writing failed trips to %s" % file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) # if max iterations reached, add remaining trips to processed_trips and give up # note that we do this BEFORE failing leg_mates so resulting trip legs are complete if i >= MAX_ITERATIONS: logger.warning("%s too many iterations %s" % (trace_label, i)) processed_trips.append(trips_df[TRIP_RESULT_COLUMNS]) if save_sample_df is not None: save_sample_df.drop(trips_df[trips_df.failed].index, level='trip_id', inplace=True) save_samples.append(save_sample_df) break # otherwise, if any trips failed, then their leg-mates trips must also fail flag_failed_trip_leg_mates(trips_df, 'failed') # add the good trips to processed_trips processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS]) # and keep the failed ones to retry trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin( trips_df.tour_id)] # add trip samples of processed_trips to processed_samples if save_sample_df is not None: # drop failed trip samples save_sample_df.drop(trips_df.index, level='trip_id', inplace=True) save_samples.append(save_sample_df) # - assign result columns to trips processed_trips = pd.concat(processed_trips) if len(save_samples) > 0: save_sample_df = pd.concat(save_samples) logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) pipeline.extend_table(sample_table_name, save_sample_df) logger.info("%s %s failed trips after %s iterations" % (trace_label, processed_trips.failed.sum(), i)) trips_df = trips.to_frame() assign_in_place(trips_df, processed_trips) trips_df = cleanup_failed_trips(trips_df) pipeline.replace_table("trips", trips_df) # check to make sure we wrote sample file if requestsd if want_sample_table and len(trips_df) > 0: assert pipeline.is_table(sample_table_name) # since we have saved samples for all successful trips # once we discard failed trips, we should samples for all trips save_sample_df = pipeline.get_table(sample_table_name) # expect samples only for intermediate trip destinatinos assert \ len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) del save_sample_df if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def atwork_subtour_mode_choice( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) # setup skim keys orig_col_name = 'workplace_taz' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices = run_tour_mode_choice_simulate( subtours_merged, spec, tour_purpose='atwork', model_settings=model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') tracing.print_summary('%s choices' % trace_label, choices, value_counts=True) assign_in_place(tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label(trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id') force_garbage_collect()