def trip_scheduling_choice(trips, tours, skim_dict, chunk_size, trace_hh_id): trace_label = 'trip_scheduling_choice' model_settings = config.read_model_settings('trip_scheduling_choice.yaml') spec = get_spec_for_segment(model_settings, 'SPECIFICATION', 'stage_one') trips_df = trips.to_frame() tours_df = tours.to_frame() outbound_trips = trips_df[trips_df[OUTBOUND_FLAG]] inbound_trips = trips_df[~trips_df[OUTBOUND_FLAG]] last_outbound_trip = trips_df.loc[outbound_trips.groupby('tour_id') ['trip_num'].idxmax()] first_inbound_trip = trips_df.loc[inbound_trips.groupby('tour_id') ['trip_num'].idxmin()] tours_df[NUM_OB_STOPS] = outbound_trips.groupby('tour_id').size().reindex( tours.index) - 1 tours_df[NUM_IB_STOPS] = inbound_trips.groupby('tour_id').size().reindex( tours.index) - 1 tours_df[LAST_OB_STOP] = last_outbound_trip[[ 'tour_id', 'origin' ]].set_index('tour_id').reindex(tours.index) tours_df[FIRST_IB_STOP] = first_inbound_trip[[ 'tour_id', 'destination' ]].set_index('tour_id').reindex(tours.index) preprocessor_settings = model_settings.get('PREPROCESSOR', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') do_skim_stack_wrapper = skim_dict.wrap('destination', 'origin') obib_skim_stack_wrapper = skim_dict.wrap(LAST_OB_STOP, FIRST_IB_STOP) skims = [ od_skim_stack_wrapper, do_skim_stack_wrapper, obib_skim_stack_wrapper ] locals_dict = { "od_skims": od_skim_stack_wrapper, "do_skims": do_skim_stack_wrapper, "obib_skims": obib_skim_stack_wrapper } simulate.set_skim_wrapper_targets(tours_df, skims) expressions.assign_columns(df=tours_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) tours_df = run_trip_scheduling_choice(spec, tours_df, skims, locals_dict, chunk_size, trace_hh_id, trace_label) pipeline.replace_table("tours", tours_df)
def run_alts_preprocessor(model_settings, alts, segment, locals_dict, trace_label): """ run preprocessor on alts, as specified by ALTS_PREPROCESSOR in model_settings we are agnostic on whether alts are merged or not Parameters ---------- model_settings: dict yaml model settings file as dict alts: pandas.DataFrame tdd_alts or tdd_alts merged wiht choosers (we are agnostic) segment: string segment selector as understood by caller (e.g. logsum_tour_purpose) locals_dict: dict we let caller worry about what needs to be in it. though actually depends on modelers needs trace_label: string Returns ------- alts: pandas.DataFrame annotated copy of alts """ preprocessor_settings = model_settings.get('ALTS_PREPROCESSOR', {}) if segment in preprocessor_settings: # segmented by logsum_tour_purpose preprocessor_settings = preprocessor_settings.get(segment) logger.debug( f"running ALTS_PREPROCESSOR with spec for {segment}: {preprocessor_settings.get('SPEC')}" ) elif 'SPEC' in preprocessor_settings: # unsegmented (either because no segmentation, or fallback if settings has generic preprocessor) logger.debug( f"running ALTS_PREPROCESSOR with unsegmented spec {preprocessor_settings.get('SPEC')}" ) else: logger.debug( f"skipping alts preprocesser because no ALTS_PREPROCESSOR segment for {segment}" ) preprocessor_settings = None if preprocessor_settings: logger.debug( f"run_alts_preprocessor calling assign_columns for {segment} preprocessor_settings" ) alts = alts.copy() expressions.assign_columns(df=alts, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) return alts
def annotate_jtp(model_settings, trace_label): # - annotate persons persons = inject.get_table('persons').to_frame() expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons)
def annotate_tables(model_settings, trace_label): trace_label = tracing.extend_trace_label(trace_label, 'annotate_tables') chunk.log_rss(trace_label) annotate_tables = model_settings.get('annotate_tables', []) if not annotate_tables: logger.warning( f"{trace_label} - annotate_tables setting is empty - nothing to do!" ) assert isinstance(annotate_tables, list), \ f"annotate_tables settings should be a list but is {type(annotate_tables)}" t0 = tracing.print_elapsed_time() for table_info in annotate_tables: tablename = table_info['tablename'] chunk.log_rss(f"{trace_label}.pre-get_table.{tablename}") df = inject.get_table(tablename).to_frame() chunk.log_df(trace_label, tablename, df) # - rename columns column_map = table_info.get('column_map', None) if column_map: warnings.warn( f"Setting 'column_map' has been changed to 'rename_columns'. " f"Support for 'column_map' in annotate_tables will be removed in future versions.", FutureWarning) logger.info( f"{trace_label} - renaming {tablename} columns {column_map}") df.rename(columns=column_map, inplace=True) # - annotate annotate = table_info.get('annotate', None) if annotate: logger.info( f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}" ) expressions.assign_columns(df=df, model_settings=annotate, trace_label=trace_label) chunk.log_df(trace_label, tablename, df) # - write table to pipeline pipeline.replace_table(tablename, df) del df chunk.log_df(trace_label, tablename, None)
def trip_departure_choice( trips, trips_merged, skim_dict, chunk_size, trace_hh_id): trace_label = 'trip_departure_choice' model_settings = config.read_model_settings('trip_departure_choice.yaml') spec = simulate.read_model_spec(file_name=model_settings['SPECIFICATION']) trips_merged_df = trips_merged.to_frame() # add tour-based chunk_id so we can chunk all trips in tour together tour_ids = trips_merged[TOUR_ID].unique() trips_merged_df['chunk_id'] = reindex(pd.Series(list(range(len(tour_ids))), tour_ids), trips_merged_df.tour_id) max_tour_id = trips_merged[TOUR_ID].max() trip_departure_choice.MAX_TOUR_ID = int(np.power(10, np.ceil(np.log10(max_tour_id)))) locals_d = config.get_model_constants(model_settings).copy() preprocessor_settings = model_settings.get('PREPROCESSOR', None) tour_legs = get_tour_legs(trips_merged_df) pipeline.get_rn_generator().add_channel('tour_legs', tour_legs) if preprocessor_settings: od_skim = skim_dict.wrap('origin', 'destination') do_skim = skim_dict.wrap('destination', 'origin') skims = [od_skim, do_skim] simulate.set_skim_wrapper_targets(trips_merged_df, skims) locals_d.update({ "od_skims": od_skim, "do_skims": do_skim, }) expressions.assign_columns( df=trips_merged_df, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) choices = apply_stage_two_model(spec, trips_merged_df, chunk_size, trace_label) trips_df = trips.to_frame() trip_length = len(trips_df) trips_df = pd.concat([trips_df, choices], axis=1) assert len(trips_df) == trip_length assert trips_df[trips_df['depart'].isnull()].empty pipeline.replace_table("trips", trips_df)
def initialize_tours(network_los, households, persons, trace_hh_id): trace_label = 'initialize_tours' tours = read_input_table("tours") # FIXME can't use households_sliced injectable as flag like persons table does in case of resume_after. # FIXME could just always slice... slice_happened = \ inject.get_injectable('households_sample_size', 0) > 0 \ or inject.get_injectable('households_sample_size', 0) > 0 if slice_happened: logger.info("slicing tours %s" % (tours.shape,)) # keep all persons in the sampled households tours = tours[tours.person_id.isin(persons.index)] # annotate before patching tour_id to allow addition of REQUIRED_TOUR_COLUMNS defined above model_settings = config.read_model_settings('initialize_tours.yaml', mandatory=True) expressions.assign_columns( df=tours, model_settings=model_settings.get('annotate_tours'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_tours')) skip_patch_tour_ids = model_settings.get('skip_patch_tour_ids', False) if skip_patch_tour_ids: pass else: tours = patch_tour_ids(tours) assert tours.index.name == 'tour_id' # replace table function with dataframe inject.add_table('tours', tours) pipeline.get_rn_generator().add_channel('tours', tours) tracing.register_traceable_table('tours', tours) logger.debug(f"{len(tours.household_id.unique())} unique household_ids in tours") logger.debug(f"{len(households.index.unique())} unique household_ids in households") assert not tours.index.duplicated().any() tours_without_persons = ~tours.person_id.isin(persons.index) if tours_without_persons.any(): logger.error(f"{tours_without_persons.sum()} tours out of {len(persons)} without persons\n" f"{pd.Series({'person_id': tours_without_persons.index.values})}") raise RuntimeError(f"{tours_without_persons.sum()} tours with bad person_id") if trace_hh_id: tracing.trace_df(tours, label='initialize_tours', warn_if_empty=True)
def compute_utilities(network_los, model_settings, choosers, model_constants, trace_label, trace=False, trace_column_names=None): """ Compute utilities """ with chunk.chunk_log(f'tvpb compute_utilities'): trace_label = tracing.extend_trace_label(trace_label, 'compute_utils') logger.debug( f"{trace_label} Running compute_utilities with {choosers.shape[0]} choosers" ) locals_dict = {'np': np, 'los': network_los} locals_dict.update(model_constants) # we don't grok coefficients, but allow them to use constants in spec alt columns spec = simulate.read_model_spec(file_name=model_settings['SPEC']) for c in spec.columns: if c != simulate.SPEC_LABEL_NAME: spec[c] = spec[c].map( lambda s: model_constants.get(s, s)).astype(float) with chunk.chunk_log(f'compute_utilities'): # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('PREPROCESSOR') if preprocessor_settings: # don't want to alter caller's dataframe choosers = choosers.copy() expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) utilities = simulate.eval_utilities( spec, choosers, locals_d=locals_dict, trace_all_rows=trace, trace_label=trace_label, trace_column_names=trace_column_names) return utilities
def annotate_tables(model_settings, trace_label): annotate_tables = model_settings.get('annotate_tables', []) if not annotate_tables: logger.warning(f"{trace_label} - annotate_tables setting is empty - nothing to do!") assert isinstance(annotate_tables, list), \ f"annotate_tables settings should be a list but is {type(annotate_tables)}" t0 = tracing.print_elapsed_time() for table_info in annotate_tables: tablename = table_info['tablename'] df = inject.get_table(tablename).to_frame() # - rename columns column_map = table_info.get('column_map', None) if column_map: warnings.warn(f"{trace_label} - annotate_tables option 'column_map' renamed 'rename_columns' " f"and moved to global settings file. Support for 'column_map' in annotate_tables " f"will be removed in future versions.", FutureWarning) logger.info(f"{trace_label} - renaming {tablename} columns {column_map}") df.rename(columns=column_map, inplace=True) # - annotate annotate = table_info.get('annotate', None) if annotate: logger.info(f"{trace_label} - annotating {tablename} SPEC {annotate['SPEC']}") expressions.assign_columns( df=df, model_settings=annotate, trace_label=trace_label) # fixme - narrow? # - write table to pipeline pipeline.replace_table(tablename, df)
def add_null_results(trace_label, mandatory_tour_frequency_settings): logger.info("Skipping %s: add_null_results", trace_label) persons = inject.get_table('persons').to_frame() persons['mandatory_tour_frequency'] = '' tours = pd.DataFrame() tours['tour_category'] = None tours['tour_type'] = None tours['person_id'] = None tours.index.name = 'tour_id' pipeline.replace_table("tours", tours) expressions.assign_columns( df=persons, model_settings=mandatory_tour_frequency_settings.get( 'annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons)
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id): """ This model predicts the frequency of making fully joint trips (see the alternatives above). """ trace_label = 'joint_tour_frequency' model_settings_file_name = 'joint_tour_frequency.yaml' estimator = estimation.manager.begin_estimation('joint_tour_frequency') model_settings = config.read_model_settings(model_settings_file_name) alternatives = simulate.read_model_alts( 'joint_tour_frequency_alternatives.csv', set_index='alt') # - only interested in households with more than one cdap travel_active person and # - at least one non-preschooler households = households.to_frame() multi_person_households = households[ households.participates_in_jtf_model].copy() # - only interested in persons in multi_person_households # FIXME - gratuitous pathological efficiency move, just let yaml specify persons? persons = persons.to_frame() persons = persons[persons.household_id.isin(multi_person_households.index)] logger.info( "Running joint_tour_frequency with %d multi-person households" % multi_person_households.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns(df=multi_person_households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(multi_person_households) choices = simulate.simple_simulate( choosers=multi_person_households, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='joint_tour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'households', 'joint_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # - create joint_tours based on joint_tour_frequency choices # - we need a person_id in order to generate the tour index (and for register_traceable_table) # - but we don't know the tour participants yet # - so we arbitrarily choose the first person in the household # - to be point person for the purpose of generating an index and setting origin temp_point_persons = persons.loc[persons.PNUM == 1] temp_point_persons['person_id'] = temp_point_persons.index temp_point_persons = temp_point_persons.set_index('household_id') temp_point_persons = temp_point_persons[['person_id', 'home_zone_id']] joint_tours = \ process_joint_tours(choices, alternatives, temp_point_persons) tours = pipeline.extend_table("tours", joint_tours) tracing.register_traceable_table('tours', joint_tours) pipeline.get_rn_generator().add_channel('tours', joint_tours) # - annotate households # we expect there to be an alt with no tours - which we can use to backfill non-travelers no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] households['joint_tour_frequency'] = choices.reindex( households.index).fillna(no_tours_alt).astype(str) households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\ reindex(households.index).fillna(0).astype(np.int8) pipeline.replace_table("households", households) tracing.print_summary('joint_tour_frequency', households.joint_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(households, label="joint_tour_frequency.households") tracing.trace_df(joint_tours, label="joint_tour_frequency.joint_tours", slicer='household_id') if estimator: survey_tours = estimation.manager.get_survey_table('tours') survey_tours = survey_tours[survey_tours.tour_category == 'joint'] print(f"len(survey_tours) {len(survey_tours)}") print(f"len(joint_tours) {len(joint_tours)}") different = False survey_tours_not_in_tours = survey_tours[~survey_tours.index. isin(joint_tours.index)] if len(survey_tours_not_in_tours) > 0: print(f"survey_tours_not_in_tours\n{survey_tours_not_in_tours}") different = True tours_not_in_survey_tours = joint_tours[~joint_tours.index. isin(survey_tours.index)] if len(survey_tours_not_in_tours) > 0: print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}") different = True assert not different
def run_trip_purpose(trips_df, estimator, chunk_size, trace_hh_id, trace_label): """ trip purpose - main functionality separated from model step so it can be called iteratively For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound) each trip is assigned a purpose based on an observed frequency distribution The distribution should always be segmented by tour purpose and tour direction. By default it is also segmented by person type. The join columns can be overwritten using the "probs_join_cols" parameter in the model settings. The model will attempt to segment by trip depart time as well if necessary and depart time ranges are specified in the probability lookup table. Returns ------- purpose: pandas.Series of purpose (str) indexed by trip_id """ # uniform across trip_purpose chunk_tag = 'trip_purpose' model_settings_file_name = 'trip_purpose.yaml' model_settings = config.read_model_settings(model_settings_file_name) probs_join_cols = model_settings.get('probs_join_cols', PROBS_JOIN_COLUMNS) spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv') probs_spec = pd.read_csv(config.config_file_path(spec_file_name), comment='#') # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation # coefficients_df = simulate.read_model_coefficients(model_settings) # probs_spec = map_coefficients(probs_spec, coefficients_df) if estimator: estimator.write_spec(model_settings, tag='PROBS_SPEC') estimator.write_model_settings(model_settings, model_settings_file_name) # estimator.write_coefficients(coefficients_df, model_settings) result_list = [] # - last trip of outbound tour gets primary_purpose last_trip = (trips_df.trip_num == trips_df.trip_count) purpose = trips_df.primary_purpose[last_trip & trips_df.outbound] result_list.append(purpose) logger.info("assign purpose to %s last outbound trips", purpose.shape[0]) # - last trip of inbound tour gets home (or work for atwork subtours) purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound] purpose = pd.Series(np.where(purpose == 'atwork', 'work', 'home'), index=purpose.index) result_list.append(purpose) logger.info("assign purpose to %s last inbound trips", purpose.shape[0]) # - intermediate stops (non-last trips) purpose assigned by probability table trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = config.get_model_constants(model_settings) expressions.assign_columns(df=trips_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) use_depart_time = model_settings.get('use_depart_time', True) for i, trips_chunk, chunk_trace_label in \ chunk.adaptive_chunked_choosers(trips_df, chunk_size, chunk_tag, trace_label): choices = choose_intermediate_trip_purpose( trips_chunk, probs_spec, estimator, probs_join_cols=probs_join_cols, use_depart_time=use_depart_time, trace_hh_id=trace_hh_id, trace_label=chunk_trace_label) result_list.append(choices) chunk.log_df(trace_label, f'result_list', result_list) if len(result_list) > 1: choices = pd.concat(result_list) return choices
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ trace_label = 'non_mandatory_tour_scheduling' model_settings_file_name = 'non_mandatory_tour_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] logger.info("Running non_mandatory_tour_scheduling with %d tours", len(tours)) persons_merged = persons_merged.to_frame() if 'SIMULATE_CHOOSER_COLUMNS' in model_settings: persons_merged =\ expressions.filter_chooser_columns(persons_merged, model_settings['SIMULATE_CHOOSER_COLUMNS']) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=non_mandatory_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) timetable = inject.get_injectable("timetable") estimator = estimation.manager.begin_estimation( 'non_mandatory_tour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) timetable.begin_transaction(estimator) # - non_mandatory tour scheduling is not segmented by tour type spec_info = {'spec': model_spec, 'estimator': estimator} choices = vectorize_tour_scheduling(non_mandatory_tours, persons_merged, tdd_alts, timetable, tour_segments=spec_info, tour_segment_col=None, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in non_mandatory_tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def run_tour_od(tours, persons, want_logsums, want_sample_table, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label): size_term_calculator = SizeTermCalculator( model_settings['SIZE_TERM_SELECTOR']) preprocessor_settings = model_settings.get('preprocessor', None) origin_col_name = model_settings['ORIG_COL_NAME'] chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME'] # maps segment names to compact (integer) ids segments = model_settings['SEGMENTS'] # interaction_sample_simulate insists choosers appear in same order as alts tours = tours.sort_index() choices_list = [] sample_list = [] for segment_name in segments: choosers = tours[tours[chooser_segment_column] == segment_name] choosers = pd.merge( choosers, persons.to_frame(columns=['is_university', 'demographic_segment']), left_on='person_id', right_index=True) # - annotate choosers if preprocessor_settings: expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, trace_label=trace_label) # size_term segment is segment_name segment_destination_size_terms = size_term_calculator.dest_size_terms_df( segment_name, trace_label) if choosers.shape[0] == 0: logger.info("%s skipping segment %s: no choosers", trace_label, segment_name) continue # - od_sample spec_segment_name = segment_name # spec_segment_name is segment_name od_sample_df = \ run_od_sample( spec_segment_name, choosers, model_settings, network_los, segment_destination_size_terms, estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label( trace_label, 'sample.%s' % segment_name)) if model_settings['ORIG_FILTER'] == 'original_MAZ > 0': pass elif model_settings['ORIG_FILTER'] == 'external_TAZ > 0': # sampled alts using internal mazs, so now we # have to convert to using the external tazs od_sample_df[origin_col_name] = map_maz_to_ext_maz( od_sample_df[origin_col_name]) else: raise ValueError( 'Not sure how you identified tour origins but you probably need ' 'to choose a different ORIG_FILTER setting') # - destination_logsums od_sample_df = \ run_od_logsums( spec_segment_name, choosers, od_sample_df, model_settings, network_los, estimator, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, 'logsums.%s' % segment_name)) # - od_simulate choices = \ run_od_simulate( spec_segment_name, choosers, od_sample_df, want_logsums=want_logsums, model_settings=model_settings, network_los=network_los, destination_size_terms=segment_destination_size_terms, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'simulate.%s' % segment_name)) choices_list.append(choices) if estimator: assert estimator.want_unsampled_alternatives if want_sample_table: # FIXME - sample_table od_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) sample_list.append(od_sample_df) # FIXME - want to do this here? del od_sample_df if len(choices_list) > 0: choices_df = pd.concat(choices_list) if len(sample_list) > 0: save_sample_df = pd.concat(sample_list) else: # this could happen either with small samples as above, or if no saved sample desired save_sample_df = None return choices_df, save_sample_df
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label): """ compute logsums for tours using skims for alt_tdd out_period and in_period """ trace_label = tracing.extend_trace_label(trace_label, 'logsums') logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser') logger.info( f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts" ) # - locals_dict constants = config.get_model_constants(logsum_settings) locals_dict = {} locals_dict.update(constants) if network_los.zone_system == los.THREE_ZONE: # TVPB constants can appear in expressions locals_dict.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) locals_dict.update(skims) # constrained coefficients can appear in expressions coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose) locals_dict.update(coefficients) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - compute logsums logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) logsums = simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=0, trace_label=trace_label) return logsums
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making at-work subtour tours (alternatives for this model come from a separate csv file which is configured by the user). """ trace_label = 'atwork_subtour_frequency' model_settings_file_name = 'atwork_subtour_frequency.yaml' tours = tours.to_frame() work_tours = tours[tours.tour_type == 'work'] # - if no work_tours if len(work_tours) == 0: add_null_results(trace_label, tours) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('atwork_subtour_frequency') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) alternatives = simulate.read_model_alts( 'atwork_subtour_frequency_alternatives.csv', set_index='alt') # merge persons into work_tours persons_merged = persons_merged.to_frame() work_tours = pd.merge(work_tours, persons_merged, left_on='person_id', right_index=True) logger.info("Running atwork_subtour_frequency with %d work tours", len(work_tours)) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: expressions.assign_columns(df=work_tours, model_settings=preprocessor_settings, trace_label=trace_label) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(work_tours) choices = simulate.simple_simulate( choosers=work_tours, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='atwork_subtour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'atwork_subtour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # add atwork_subtour_frequency column to tours # reindex since we are working with a subset of tours tours['atwork_subtour_frequency'] = choices.reindex(tours.index) pipeline.replace_table("tours", tours) # - create atwork_subtours based on atwork_subtour_frequency choice names work_tours = tours[tours.tour_type == 'work'] assert not work_tours.atwork_subtour_frequency.isnull().any() subtours = process_atwork_subtours(work_tours, alternatives) tours = pipeline.extend_table("tours", subtours) tracing.register_traceable_table('tours', subtours) pipeline.get_rn_generator().add_channel('tours', subtours) tracing.print_summary('atwork_subtour_frequency', tours.atwork_subtour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
def telecommute_frequency( persons_merged, persons, chunk_size, trace_hh_id): """ This model predicts the frequency of telecommute for a person (worker) who does not works from home. The alternatives of this model are 'No Telecommute', '1 day per week', '2 to 3 days per week' and '4 days per week'. This model reflects the choices of people who prefer a combination of working from home and office during a week. """ trace_label = 'telecommute_frequency' model_settings_file_name = 'telecommute_frequency.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_zone_id > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('telecommute_frequency') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='telecommute_frequency', estimator=estimator) choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'telecommute_frequency') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['telecommute_frequency'] = choices.reindex(persons.index).fillna('').astype(str) pipeline.replace_table("persons", persons) tracing.print_summary('telecommute_frequency', persons.telecommute_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' model_settings_file_name = 'mandatory_tour_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons", len(choosers)) # - if no mandatory tours if choosers.shape[0] == 0: add_null_results(trace_label, model_settings) return # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {} expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) estimator = estimation.manager.begin_estimation('mandatory_tour_frequency') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'mandatory_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # - create mandatory tours """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ alternatives = simulate.read_model_alts( 'mandatory_tour_frequency_alternatives.csv', set_index='alt') choosers['mandatory_tour_frequency'] = choices.reindex(choosers.index) mandatory_tours = process_mandatory_tours( persons=choosers, mandatory_tour_frequency_alts=alternatives) tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', mandatory_tours) pipeline.get_rn_generator().add_channel('tours', mandatory_tours) # - annotate persons persons = inject.get_table('persons').to_frame() # need to reindex as we only handled persons with cdap_activity == 'M' persons['mandatory_tour_frequency'] = choices.reindex( persons.index).fillna('').astype(str) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True) tracing.trace_df(persons, label="mandatory_tour_frequency.persons", warn_if_empty=True)
def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ trace_label = 'non_mandatory_tour_frequency' model_settings_file_name = 'non_mandatory_tour_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) # FIXME kind of tacky both that we know to add this here and del it below # 'tot_tours' is used in model_spec expressions alternatives = simulate.read_model_alts( 'non_mandatory_tour_frequency_alternatives.csv', set_index=None) alternatives['tot_tours'] = alternatives.sum(axis=1) # filter based on results of CDAP choosers = persons_merged.to_frame() choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {'person_max_window': person_max_window} expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers)) constants = config.get_model_constants(model_settings) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) spec_segments = model_settings.get('SPEC_SEGMENTS', {}) # segment by person type and pick the right spec for each person type choices_list = [] for segment_settings in spec_segments: segment_name = segment_settings['NAME'] ptype = segment_settings['PTYPE'] # pick the spec column for the segment segment_spec = model_spec[[segment_name]] chooser_segment = choosers[choosers.ptype == ptype] logger.info("Running segment '%s' of size %d", segment_name, len(chooser_segment)) if len(chooser_segment) == 0: # skip empty segments continue estimator = \ estimation.manager.begin_estimation(model_name=segment_name, bundle_name='non_mandatory_tour_frequency') coefficients_df = simulate.read_model_coefficients(segment_settings) segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator) if estimator: estimator.write_spec(model_settings, bundle_directory=True) estimator.write_model_settings(model_settings, model_settings_file_name, bundle_directory=True) # preserving coefficients file name makes bringing back updated coefficients more straightforward estimator.write_coefficients(coefficients_df, segment_settings) estimator.write_choosers(chooser_segment) estimator.write_alternatives(alternatives, bundle_directory=True) # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column # shuold we do it here or have interaction_simulate do it? # chooser index must be duplicated in column or it will be omitted from interaction_dataset # estimation requires that chooser_id is either in index or a column of interaction_dataset # so it can be reformatted (melted) and indexed by chooser_id and alt_id assert chooser_segment.index.name == 'person_id' assert 'person_id' not in chooser_segment.columns chooser_segment['person_id'] = chooser_segment.index # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? estimator.set_alt_id('alt_id') estimator.set_chooser_id(chooser_segment.index.name) choices = interaction_simulate( chooser_segment, alternatives, spec=segment_spec, locals_d=constants, chunk_size=chunk_size, trace_label='non_mandatory_tour_frequency.%s' % segment_name, trace_choice_name='non_mandatory_tour_frequency', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values( choices, 'persons', 'non_mandatory_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() choices_list.append(choices) # FIXME - force garbage collection? force_garbage_collect() del alternatives['tot_tours'] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate # is the index value of the chosen alternative in the alternatives table. choices = pd.concat(choices_list).sort_index() # add non_mandatory_tour_frequency column to persons persons = persons.to_frame() # we expect there to be an alt with no tours - which we can use to backfill non-travelers no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] # need to reindex as we only handled persons with cdap_activity in ['M', 'N'] persons['non_mandatory_tour_frequency'] = \ choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8) """ We have now generated non-mandatory tour frequencies, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) But before we do that, we run an additional probablilistic step to extend/increase tour counts beyond the strict limits of the tour_frequency alternatives chosen above (which are currently limited to at most 2 escort tours and 1 each of shopping, othmaint, othdiscr, eatout, and social tours) The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate is simply the index value of the chosen alternative in the alternatives table. get counts of each of the tour type alternatives (so we can extend) escort shopping othmaint othdiscr eatout social parent_id 2588676 2 0 0 1 1 0 2588677 0 1 0 1 0 0 """ # counts of each of the tour type alternatives (so we can extend) modeled_tour_counts = alternatives.loc[choices] modeled_tour_counts.index = choices.index # assign person ids to the index # - extend_tour_counts - probabalistic extended_tour_counts = \ extend_tour_counts(choosers, modeled_tour_counts.copy(), alternatives, trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts')) num_modeled_tours = modeled_tour_counts.sum().sum() num_extended_tours = extended_tour_counts.sum().sum() logger.info("extend_tour_counts increased tour count by %s from %s to %s" % (num_extended_tours - num_modeled_tours, num_modeled_tours, num_extended_tours)) """ create the non_mandatory tours based on extended_tour_counts """ if estimator: override_tour_counts = \ estimation.manager.get_survey_values(extended_tour_counts, table_name='persons', column_names=['_%s' % c for c in extended_tour_counts.columns]) override_tour_counts = \ override_tour_counts.rename(columns={('_%s' % c): c for c in extended_tour_counts.columns}) logger.info( "estimation get_survey_values override_tour_counts %s changed cells" % (override_tour_counts != extended_tour_counts).sum().sum()) extended_tour_counts = override_tour_counts """ create the non_mandatory tours based on extended_tour_counts """ non_mandatory_tours = process_non_mandatory_tours(persons, extended_tour_counts) assert len(non_mandatory_tours) == extended_tour_counts.sum().sum() if estimator: # make sure they created the right tours survey_tours = estimation.manager.get_survey_table( 'tours').sort_index() non_mandatory_survey_tours = survey_tours[survey_tours.tour_category == 'non_mandatory'] assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) assert non_mandatory_survey_tours.index.equals( non_mandatory_tours.sort_index().index) # make sure they created tours with the expected tour_ids columns = ['person_id', 'household_id', 'tour_type', 'tour_category'] survey_tours = \ estimation.manager.get_survey_values(non_mandatory_tours, table_name='tours', column_names=columns) tours_differ = (non_mandatory_tours[columns] != survey_tours[columns]).any(axis=1) if tours_differ.any(): print("tours_differ\n%s" % tours_differ) print("%s of %s tours differ" % (tours_differ.sum(), len(tours_differ))) print("differing survey_tours\n%s" % survey_tours[tours_differ]) print("differing modeled_tours\n%s" % non_mandatory_tours[columns][tours_differ]) assert (not tours_differ.any()) pipeline.extend_table("tours", non_mandatory_tours) tracing.register_traceable_table('tours', non_mandatory_tours) pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=trace_label) pipeline.replace_table("persons", persons) tracing.print_summary('non_mandatory_tour_frequency', persons.non_mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df( non_mandatory_tours, label="non_mandatory_tour_frequency.non_mandatory_tours", warn_if_empty=True) tracing.trace_df(choosers, label="non_mandatory_tour_frequency.choosers", warn_if_empty=True) tracing.trace_df( persons, label="non_mandatory_tour_frequency.annotated_persons", warn_if_empty=True)
def run_trip_destination(trips, tours_merged, estimator, chunk_size, trace_hh_id, trace_label, fail_some_trips_for_testing=False): """ trip destination - main functionality separated from model step so it can be called iteratively Run the trip_destination model, assigning destinations for each (intermediate) trip (last trips already have a destination - either the tour primary destination or Home) Set trip destination and origin columns, and a boolean failed flag for any failed trips (destination for flagged failed trips will be set to -1) Parameters ---------- trips tours_merged want_sample_table chunk_size trace_hh_id trace_label Returns ------- """ model_settings_file_name = 'trip_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) preprocessor_settings = model_settings.get('preprocessor', None) logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None land_use = inject.get_table('land_use') size_terms = inject.get_injectable('size_terms') network_los = inject.get_injectable('network_los') trips = trips.sort_index() trips['next_trip_id'] = np.roll(trips.index, -1) trips.next_trip_id = trips.next_trip_id.where( trips.trip_num < trips.trip_count, 0) # - initialize trip origin and destination to those of half-tour # (we will sequentially adjust intermediate trips origin and destination as we choose them) tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(np.int64) tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(np.int64) trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin) trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination) trips['failed'] = False if estimator: # need to check or override non-intermediate trip destination # should check consistency of survey trips origin, destination with parent tour and subsequent/prior trip? # FIXME if not consistent, do we fail or override? (seems weird to override them to bad values?) # expect all the same trips survey_trips = estimator.get_survey_table('trips').sort_index() assert survey_trips.index.equals(trips.index) first = (survey_trips.trip_num == 1) last = (survey_trips.trip_num == trips.trip_count) # expect survey's outbound first trip origin to be same as half tour origin assert ( survey_trips.origin[survey_trips.outbound & first] == tour_origin[survey_trips.outbound & first]).all() # expect outbound last trip destination to be same as half tour destination assert (survey_trips.destination[survey_trips.outbound & last] == tour_destination[survey_trips.outbound & last]).all() # expect inbound first trip origin to be same as half tour destination assert (survey_trips.origin[~survey_trips.outbound & first] == tour_destination[~survey_trips.outbound & first]).all() # expect inbound last trip destination to be same as half tour origin assert (survey_trips.destination[~survey_trips.outbound & last] == tour_origin[~survey_trips.outbound & last]).all() # - filter tours_merged (AFTER copying destination and origin columns to trips) # tours_merged is used for logsums, we filter it here upfront to save space and time tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS'] redundant_cols = model_settings.get( 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS', []) if redundant_cols: tours_merged_cols = [ c for c in tours_merged_cols if c not in redundant_cols ] tours_merged = tours_merged[tours_merged_cols] # - skims skim_hotel = SkimHotel(model_settings, network_los, trace_label) # - size_terms and alternatives alternatives = tour_destination_size_terms(land_use, size_terms, 'trip') # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by zone_id, purpose # e.g. size_terms.get(df.dest_zone_id, df.purpose) # returns a series of size_terms for each chooser's dest_zone_id and purpose with chooser index size_term_matrix = DataFrameMatrix(alternatives) # don't need size terms in alternatives, just zone_id index alternatives = alternatives.drop(alternatives.columns, axis=1) alternatives.index.name = model_settings['ALT_DEST_COL_NAME'] sample_list = [] # - process intermediate trips in ascending trip_num order intermediate = trips.trip_num < trips.trip_count if intermediate.any(): first_trip_num = trips[intermediate].trip_num.min() last_trip_num = trips[intermediate].trip_num.max() # iterate over trips in ascending trip_num order for trip_num in range(first_trip_num, last_trip_num + 1): nth_trips = trips[intermediate & (trips.trip_num == trip_num)] nth_trace_label = tracing.extend_trace_label( trace_label, 'trip_num_%s' % trip_num) locals_dict = {'network_los': network_los} locals_dict.update(config.get_model_constants(model_settings)) # - annotate nth_trips if preprocessor_settings: expressions.assign_columns( df=nth_trips, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=nth_trace_label) logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0]) # - choose destination for nth_trips, segmented by primary_purpose choices_list = [] for primary_purpose, trips_segment in nth_trips.groupby( 'primary_purpose'): choices, destination_sample = choose_trip_destination( primary_purpose, trips_segment, alternatives, tours_merged, model_settings, want_logsums, want_sample_table, size_term_matrix, skim_hotel, estimator, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label( nth_trace_label, primary_purpose)) choices_list.append(choices) if want_sample_table: assert destination_sample is not None sample_list.append(destination_sample) destinations_df = pd.concat(choices_list) if fail_some_trips_for_testing: if len(destinations_df) > 0: destinations_df = destinations_df.drop( destinations_df.index[0]) failed_trip_ids = nth_trips.index.difference(destinations_df.index) if failed_trip_ids.any(): logger.warning( "%s sidelining %s trips without viable destination alternatives" % (nth_trace_label, failed_trip_ids.shape[0])) next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids) trips.loc[failed_trip_ids, 'failed'] = True trips.loc[failed_trip_ids, 'destination'] = -1 trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values if len(destinations_df) == 0: assert failed_trip_ids.all() logger.warning( f"all {len(nth_trips)} {primary_purpose} trip_num {trip_num} trips failed" ) if len(destinations_df) > 0: # - assign choices to this trip's destinations # if estimator, then the choices will already have been overridden by trip_destination_simulate # because we need to overwrite choices before any failed choices are suppressed assign_in_place(trips, destinations_df.choice.to_frame('destination')) if want_logsums: assert 'logsum' in destinations_df.columns assign_in_place( trips, destinations_df.logsum.to_frame(logsum_column_name)) # - assign choice to next trip's origin destinations_df.index = nth_trips.next_trip_id.reindex( destinations_df.index) assign_in_place(trips, destinations_df.choice.to_frame('origin')) del trips['next_trip_id'] if len(sample_list) > 0: save_sample_df = pd.concat(sample_list) else: # this could happen if no intermediate trips, or if no saved sample desired save_sample_df = None return trips, save_sample_df
def compute_logsums(choosers, tour_purpose, logsum_settings, model_settings, network_los, chunk_size, chunk_tag, trace_label): """ Parameters ---------- choosers tour_purpose logsum_settings model_settings network_los chunk_size trace_hh_id trace_label Returns ------- logsums: pandas series computed logsums with same index as choosers """ trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums') logger.debug("Running compute_logsums with %d choosers" % choosers.shape[0]) # compute_logsums needs to know name of dest column in interaction_sample orig_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] dest_col_name = model_settings['ALT_DEST_COL_NAME'] # FIXME - are we ok with altering choosers (so caller doesn't have to set these)? assert ('in_period' not in choosers) and ('out_period' not in choosers) choosers['in_period'] = network_los.skim_time_period_label(model_settings['IN_PERIOD']) choosers['out_period'] = network_los.skim_time_period_label(model_settings['OUT_PERIOD']) assert ('duration' not in choosers) choosers['duration'] = model_settings['IN_PERIOD'] - model_settings['OUT_PERIOD'] logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) locals_dict = {} # model_constants can appear in expressions locals_dict.update(config.get_model_constants(logsum_settings)) # constrained coefficients can appear in expressions locals_dict.update(coefficients) # setup skim keys skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='out_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='in_period') odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='in_period') dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name, tod_key='out_period', segment_key='demographic_segment', trace_label=trace_label, tag='tvpb_logsum_odt') tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name, tod_key='in_period', segment_key='demographic_segment', trace_label=trace_label, tag='tvpb_logsum_dot') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions locals_dict.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) locals_dict.update(skims) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) logsums = simulate.simple_simulate_logsums( choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) return logsums
def _schedule_tours(tours, persons_merged, alts, spec, logsum_tour_purpose, model_settings, skims, timetable, window_id_col, previous_tour, tour_owner_id_col, estimator, tour_trace_label): """ previous_tour stores values used to add columns that can be used in the spec which have to do with the previous tours per person. Every column in the alternatives table is appended with the suffix "_previous" and made available. So if your alternatives table has columns for start and end, then start_previous and end_previous will be set to the start and end of the most recent tour for a person. The first time through, start_previous and end_previous are undefined, so make sure to protect with a tour_num >= 2 in the variable computation. Parameters ---------- tours : DataFrame chunk of tours to schedule with unique timetable window_id_col persons_merged : DataFrame DataFrame of persons to be merged with tours containing attributes referenced by expressions in spec alts : DataFrame DataFrame of alternatives which represent all possible time slots. tdd_interaction_dataset function will use timetable to filter them to omit unavailable alternatives spec : DataFrame The spec which will be passed to interaction_simulate. model_settings : dict timetable : TimeTable timetable of timewidows for person (or subtour) with rows for tours[window_id_col] window_id_col : str column name from tours that identifies timetable owner (or None if tours index) - person_id for non/mandatory tours - parent_tour_id for subtours, - None (tours index) for joint_tours since every tour may have different participants) previous_tour: Series series with value of tdd_alt choice for last previous tour scheduled for tour_owner_id_col : str column name from tours that identifies 'owner' of this tour (person_id for non/mandatory tours, parent_tour_id for subtours, household_id for joint_tours) tour_trace_label Returns ------- """ logger.info("%s schedule_tours running %d tour choices" % (tour_trace_label, len(tours))) # merge persons into tours # avoid dual suffix for redundant columns names (e.g. household_id) that appear in both tours = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, suffixes=('', '_y')) chunk.log_df(tour_trace_label, "tours", tours) # - add explicit window_id_col for timetable owner if it is index # if no timetable window_id_col specified, then add index as an explicit column # (this is not strictly necessary but its presence makes code simpler in several places) if window_id_col is None: window_id_col = tours.index.name tours[window_id_col] = tours.index # timetable can't handle multiple tours per window_id assert not tours[window_id_col].duplicated().any() # - build interaction dataset filtered to include only available tdd alts # dataframe columns start, end , duration, person_id, tdd # indexed (not unique) on tour_id choice_column = TDD_CHOICE_COLUMN alt_tdd = tdd_interaction_dataset(tours, alts, timetable, choice_column, window_id_col, tour_trace_label) print(f"tours {tours.shape} alts {alts.shape}") chunk.log_df(tour_trace_label, "alt_tdd", alt_tdd) # - add logsums if logsum_tour_purpose: logsums = \ compute_logsums(alt_tdd, tours, logsum_tour_purpose, model_settings, skims, tour_trace_label) else: logsums = 0 alt_tdd['mode_choice_logsum'] = logsums # - merge in previous tour columns # adds start_previous and end_previous, joins on index tours = \ tours.join(get_previous_tour_by_tourid(tours[tour_owner_id_col], previous_tour, alts)) chunk.log_df(tour_trace_label, "tours", tours) # - make choices locals_d = {'tt': timetable} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) preprocessor_settings = model_settings.get('ALTS_PREPROCESSOR', None) if preprocessor_settings and preprocessor_settings.get( logsum_tour_purpose): expressions.assign_columns( df=alt_tdd, model_settings=preprocessor_settings.get(logsum_tour_purpose), locals_dict=locals_d, trace_label=tour_trace_label) if estimator: # write choosers after annotation estimator.write_choosers(tours) estimator.set_alt_id(choice_column) estimator.write_interaction_sample_alternatives(alt_tdd) choices = interaction_sample_simulate(tours, alt_tdd, spec, choice_column=choice_column, locals_d=locals_d, chunk_size=0, trace_label=tour_trace_label, estimator=estimator) # - update previous_tour and timetable parameters # update previous_tour (series with most recent previous tdd choices) with latest values previous_tour.loc[tours[tour_owner_id_col]] = choices.values # update timetable with chosen tdd footprints timetable.assign(tours[window_id_col], choices) return choices
def parking_location(trips, trips_merged, land_use, network_los, chunk_size, trace_hh_id): """ Given a set of trips, each trip needs to have a parking location if it is eligible for remote parking. """ trace_label = 'parking_location' model_settings = config.read_model_settings('parking_location_choice.yaml') alt_destination_col_name = model_settings['ALT_DEST_COL_NAME'] preprocessor_settings = model_settings.get('PREPROCESSOR', None) trips_df = trips.to_frame() trips_merged_df = trips_merged.to_frame() land_use_df = land_use.to_frame() locals_dict = {'network_los': network_los} locals_dict.update(config.get_model_constants(model_settings)) if preprocessor_settings: expressions.assign_columns(df=trips_merged_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) parking_locations, save_sample_df = run_parking_destination( model_settings, trips_merged_df, land_use_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, ) assign_in_place(trips_df, parking_locations.to_frame(alt_destination_col_name)) pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True) if save_sample_df is not None: assert len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) sample_table_name = model_settings.get( 'PARKING_LOCATION_SAMPLE_TABLE_NAME') assert sample_table_name is not None logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) # lest they try to put tour samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df)
def iterate_location_choice(model_settings, persons_merged, persons, households, network_los, estimator, chunk_size, trace_hh_id, locutor, trace_label): """ iterate run_location_choice updating shadow pricing until convergence criteria satisfied or max_iterations reached. (If use_shadow_pricing not enabled, then just iterate once) Parameters ---------- model_settings : dict persons_merged : injected table persons : injected table network_los : los.Network_LOS chunk_size : int trace_hh_id : int locutor : bool whether this process is the privileged logger of shadow_pricing when multiprocessing trace_label : str Returns ------- adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided adds annotations to persons table """ chunk_tag = trace_label # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student) chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME'] dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME'] logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None persons_merged_df = persons_merged.to_frame() persons_merged_df = persons_merged_df[ persons_merged[chooser_filter_column]] persons_merged_df.sort_index( inplace=True ) # interaction_sample expects chooser index to be monotonic increasing # chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME'] assert chooser_segment_column in persons_merged_df, \ f"CHOOSER_SEGMENT_COLUMN '{chooser_segment_column}' not in persons_merged table." spc = shadow_pricing.load_shadow_price_calculator(model_settings) max_iterations = spc.max_iterations assert not (spc.use_shadow_pricing and estimator) logger.debug("%s max_iterations: %s" % (trace_label, max_iterations)) for iteration in range(1, max_iterations + 1): if spc.use_shadow_pricing and iteration > 1: spc.update_shadow_prices() choices_df, save_sample_df = run_location_choice( persons_merged_df, network_los, shadow_price_calculator=spc, want_logsums=logsum_column_name is not None, want_sample_table=want_sample_table, estimator=estimator, model_settings=model_settings, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_hh_id=trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration)) # choices_df is a pandas DataFrame with columns 'choice' and (optionally) 'logsum' if choices_df is None: break spc.set_choices( choices=choices_df['choice'], segment_ids=persons_merged_df[chooser_segment_column].reindex( choices_df.index)) if locutor: spc.write_trace_files(iteration) if spc.use_shadow_pricing and spc.check_fit(iteration): logging.info("%s converged after iteration %s" % ( trace_label, iteration, )) break # - shadow price table if locutor: if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings: inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices) if 'MODELED_SIZE_TABLE' in model_settings: inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size) persons_df = persons.to_frame() # add the choice values to the dest_choice_column in persons dataframe # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location # names for location choice and (optional) logsums columns NO_DEST_ZONE = -1 persons_df[dest_choice_column_name] = \ choices_df['choice'].reindex(persons_df.index).fillna(NO_DEST_ZONE).astype(int) # add the dest_choice_logsum column to persons dataframe if logsum_column_name: persons_df[logsum_column_name] = \ choices_df['logsum'].reindex(persons_df.index).astype('float') if save_sample_df is not None: # might be None for tiny samples even if sample_table_name was specified assert len(save_sample_df.index.get_level_values(0).unique()) == len( choices_df) # lest they try to put school and workplace samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("dest choice sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df) # - annotate persons table if 'annotate_persons' in model_settings: expressions.assign_columns( df=persons_df, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons_df) if trace_hh_id: tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) # - annotate households table if 'annotate_households' in model_settings: households_df = households.to_frame() expressions.assign_columns( df=households_df, model_settings=model_settings.get('annotate_households'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_households')) pipeline.replace_table("households", households_df) if trace_hh_id: tracing.trace_df(households_df, label=trace_label, warn_if_empty=True) if logsum_column_name: tracing.print_summary(logsum_column_name, choices_df['logsum'], value_counts=True) return persons_df
def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id): """ Transit pass subsidy model. """ trace_label = 'transit_pass_subsidy' model_settings_file_name = 'transit_pass_subsidy.yaml' choosers = persons_merged.to_frame() logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('transit_pass_subsidy') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='transit_pass_subsidy', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'transit_pass_subsidy') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['transit_pass_subsidy'] = choices.reindex(persons.index) pipeline.replace_table("persons", persons) tracing.print_summary('transit_pass_subsidy', persons.transit_pass_subsidy, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def joint_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each joint tour """ trace_label = 'joint_tour_scheduling' model_settings_file_name = 'joint_tour_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results(trace_label) return # use inject.get_table as this won't exist if there are no joint_tours joint_tour_participants = inject.get_table( 'joint_tour_participants').to_frame() persons_merged = persons_merged.to_frame() logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0]) # it may seem peculiar that we are concerned with persons rather than households # but every joint tour is (somewhat arbitrarily) assigned a "primary person" # some of whose characteristics are used in the spec # and we get household attributes along with person attributes in persons_merged persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0] # since a households joint tours each potentially different participants # they may also have different joint tour masks (free time of all participants) # so we have to either chunk processing by joint_tour_num and build timetable by household # or build timetables by unique joint_tour constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=joint_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) timetable = inject.get_injectable("timetable") estimator = estimation.manager.begin_estimation('joint_tour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) timetable.begin_transaction(estimator) choices = vectorize_joint_tour_scheduling(joint_tours, joint_tour_participants, persons_merged, tdd_alts, timetable, spec=model_spec, model_settings=model_settings, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True): nth_participants = \ joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)] estimator.log( "assign timetable for %s participants in %s tours with tour_num %s" % (len(nth_participants), len(nth_tours), tour_num)) # - update timetables of all joint tour participants timetable.assign(nth_participants.person_id, reindex(choices, nth_participants.tour_id)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing joint_tours = tours[tours.tour_category == 'joint'] if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_scheduling", slicer='household_id')
def work_from_home(persons_merged, persons, chunk_size, trace_hh_id): """ This model predicts whether a person (worker) works from home. The output from this model is TRUE (if works from home) or FALSE (works away from home). The workplace location choice is overridden for workers who work from home and set to -1. """ trace_label = 'work_from_home' model_settings_file_name = 'work_from_home.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_zone_id > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('work_from_home') constants = config.get_model_constants(model_settings) work_from_home_alt = model_settings['WORK_FROM_HOME_ALT'] # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) # - iterative what-if if specified iterations = model_settings.get('WORK_FROM_HOME_ITERATIONS', 1) iterations_coefficient_constant = model_settings.get( 'WORK_FROM_HOME_COEFFICIENT_CONSTANT', None) iterations_target_percent = model_settings.get( 'WORK_FROM_HOME_TARGET_PERCENT', None) iterations_target_percent_tolerance = model_settings.get( 'WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE', None) for iteration in range(iterations): logger.info("Running %s with %d persons iteration %d", trace_label, len(choosers), iteration) # re-read spec to reset substitution model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) choices = simulate.simple_simulate(choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='work_from_home', estimator=estimator) if iterations_target_percent is not None: current_percent = ((choices == work_from_home_alt).sum() / len(choices)) logger.info( "Running %s iteration %i current percent %f target percent %f", trace_label, iteration, current_percent, iterations_target_percent) if current_percent <= (iterations_target_percent + iterations_target_percent_tolerance ) and current_percent >= ( iterations_target_percent - iterations_target_percent_tolerance): logger.info( "Running %s iteration %i converged with coefficient %f", trace_label, iteration, coefficients_df.value[iterations_coefficient_constant]) break else: new_value = np.log( iterations_target_percent / np.maximum(current_percent, 0.0001) ) + coefficients_df.value[iterations_coefficient_constant] coefficients_df.value[ iterations_coefficient_constant] = new_value logger.info( "Running %s iteration %i new coefficient for next iteration %f", trace_label, iteration, new_value) iteration = iteration + 1 choices = (choices == work_from_home_alt) dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME'] if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'work_from_home') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['work_from_home'] = choices.reindex( persons.index).fillna(0).astype(bool) persons[dest_choice_column_name] = np.where( persons.work_from_home is True, -1, persons[dest_choice_column_name]) pipeline.replace_table("persons", persons) tracing.print_summary('work_from_home', persons.work_from_home, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def joint_tour_participation( tours, persons_merged, chunk_size, trace_hh_id): """ Predicts for each eligible person to participate or not participate in each joint tour. """ trace_label = 'joint_tour_participation' model_settings_file_name = 'joint_tour_participation.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(model_settings, trace_label) return persons_merged = persons_merged.to_frame() # - create joint_tour_participation_candidates table candidates = joint_tour_participation_candidates(joint_tours, persons_merged) tracing.register_traceable_table('joint_tour_participants', candidates) pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates) logger.info("Running joint_tours_participation with %d potential participants (candidates)" % candidates.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_time_window_overlap': person_time_window_overlap, 'persons': persons_merged } expressions.assign_columns( df=candidates, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - simple_simulate estimator = estimation.manager.begin_estimation('joint_tour_participation') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(candidates) # add tour-based chunk_id so we can chunk all trips in tour together assert 'chunk_id' not in candidates.columns unique_household_ids = candidates.household_id.unique() household_chunk_ids = pd.Series(range(len(unique_household_ids)), index=unique_household_ids) candidates['chunk_id'] = reindex(household_chunk_ids, candidates.household_id) choices = simulate.simple_simulate_by_chunk_id( choosers=candidates, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='participation', custom_chooser=participants_chooser, estimator=estimator) # choice is boolean (participate or not) choice_col = model_settings.get('participation_choice', 'participate') assert choice_col in model_spec.columns, \ "couldn't find participation choice column '%s' in spec" PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col) participate = (choices == PARTICIPATE_CHOICE) if estimator: estimator.write_choices(choices) # we override the 'participate' boolean series, instead of raw alternative index in 'choices' series # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index survey_participants_df = estimator.get_survey_table('joint_tour_participants') participate = pd.Series(choices.index.isin(survey_participants_df.index.values), index=choices.index) # but estimation software wants to know the choices value (alternative index) choices = participate.replace({True: PARTICIPATE_CHOICE, False: 1-PARTICIPATE_CHOICE}) # estimator.write_override_choices(participate) # write choices as boolean participate estimator.write_override_choices(choices) # write choices as int alt indexes estimator.end_estimation() # satisfaction indexed by tour_id tour_satisfaction = get_tour_satisfaction(candidates, participate) assert tour_satisfaction.all() candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id) PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id'] participants = candidates[participate][PARTICIPANT_COLS].copy() # assign participant_num # FIXME do we want something smarter than the participant with the lowest person_id? participants['participant_num'] = \ participants.sort_values(by=['tour_id', 'person_id']).\ groupby('tour_id').cumcount() + 1 pipeline.replace_table("joint_tour_participants", participants) # drop channel as we aren't using any more (and it has candidates that weren't chosen) pipeline.get_rn_generator().drop_channel('joint_tour_participants') # - assign joint tour 'point person' (participant_num == 1) point_persons = participants[participants.participant_num == 1] joint_tours['person_id'] = point_persons.set_index('tour_id').person_id # update number_of_participants which was initialized to 1 joint_tours['number_of_participants'] = participants.groupby('tour_id').size() assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']]) pipeline.replace_table("tours", tours) # - run annotations annotate_jtp(model_settings, trace_label) if trace_hh_id: tracing.trace_df(participants, label="joint_tour_participation.participants") tracing.trace_df(joint_tours, label="joint_tour_participation.joint_tours")
def joint_tour_composition(tours, households, persons, chunk_size, trace_hh_id): """ This model predicts the makeup of the travel party (adults, children, or mixed). """ trace_label = 'joint_tour_composition' model_settings_file_name = 'joint_tour_composition.yaml' tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(trace_label, tours) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('joint_tour_composition') # - only interested in households with joint_tours households = households.to_frame() households = households[households.num_hh_joint_tours > 0] persons = persons.to_frame() persons = persons[persons.household_id.isin(households.index)] logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0]) # - run preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns(df=households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) joint_tours_merged = pd.merge(joint_tours, households, left_on='household_id', right_index=True, how='left') # - simple_simulate model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(joint_tours_merged) choices = simulate.simple_simulate(choosers=joint_tours_merged, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='composition', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'composition') estimator.write_override_choices(choices) estimator.end_estimation() # add composition column to tours for tracing joint_tours['composition'] = choices # reindex since we ran model on a subset of households tours['composition'] = choices.reindex(tours.index).fillna('').astype(str) pipeline.replace_table("tours", tours) tracing.print_summary('joint_tour_composition', joint_tours.composition, value_counts=True) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_composition.joint_tours", slicer='household_id')
def run_tour_scheduling(model_name, chooser_tours, persons_merged, tdd_alts, tour_segment_col, chunk_size, trace_hh_id): trace_label = model_name model_settings_file_name = f'{model_name}.yaml' model_settings = config.read_model_settings(model_settings_file_name) if 'LOGSUM_SETTINGS' in model_settings: logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', []) else: logsum_columns = [] # - filter chooser columns for both logsums and simulate model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', []) chooser_columns = logsum_columns + [ c for c in model_columns if c not in logsum_columns ] persons_merged = expressions.filter_chooser_columns( persons_merged, chooser_columns) timetable = inject.get_injectable("timetable") # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {'tt': timetable} locals_d.update(config.get_model_constants(model_settings)) expressions.assign_columns(df=chooser_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) estimators = {} if 'TOUR_SPEC_SEGMENTS' in model_settings: # load segmented specs spec_segment_settings = model_settings.get('SPEC_SEGMENTS', {}) specs = {} for spec_segment_name, spec_settings in spec_segment_settings.items(): bundle_name = f'{model_name}_{spec_segment_name}' # estimator for this tour_segment estimator = estimation.manager.begin_estimation( model_name=bundle_name, bundle_name=bundle_name) spec_file_name = spec_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=spec_file_name) coefficients_df = simulate.read_model_coefficients(spec_settings) specs[spec_segment_name] = simulate.eval_coefficients( model_spec, coefficients_df, estimator) if estimator: estimators[spec_segment_name] = estimator # add to local list estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(spec_settings) estimator.write_coefficients(coefficients_df, spec_settings) # - spec dict segmented by primary_purpose tour_segment_settings = model_settings.get('TOUR_SPEC_SEGMENTS', {}) tour_segments = {} for tour_segment_name, spec_segment_name in tour_segment_settings.items( ): tour_segments[tour_segment_name] = {} tour_segments[tour_segment_name][ 'spec_segment_name'] = spec_segment_name tour_segments[tour_segment_name]['spec'] = specs[spec_segment_name] tour_segments[tour_segment_name]['estimator'] = estimators.get( spec_segment_name) # default tour_segment_col to 'tour_type' if segmented spec and tour_segment_col not specified if tour_segment_col is None and tour_segments: tour_segment_col = 'tour_type' else: # unsegmented spec assert 'SPEC_SEGMENTS' not in model_settings assert 'TOUR_SPEC_SEGMENTS' not in model_settings assert tour_segment_col is None estimator = estimation.manager.begin_estimation(model_name) spec_file_name = model_settings['SPEC'] model_spec = simulate.read_model_spec(file_name=spec_file_name) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimators[None] = estimator # add to local list estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) # - non_mandatory tour scheduling is not segmented by tour type tour_segments = {'spec': model_spec, 'estimator': estimator} if estimators: timetable.begin_transaction(list(estimators.values())) logger.info(f"Running {model_name} with %d tours", len(chooser_tours)) choices = vts.vectorize_tour_scheduling(chooser_tours, persons_merged, tdd_alts, timetable, tour_segments=tour_segments, tour_segment_col=tour_segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) if estimators: # overrride choices for all estimators choices_list = [] for spec_segment_name, estimator in estimators.items(): if spec_segment_name: model_choices = choices[( chooser_tours.tour_type == spec_segment_name)] else: model_choices = choices estimator.write_choices(model_choices) override_choices = estimator.get_survey_values( model_choices, 'tours', 'tdd') estimator.write_override_choices(override_choices) choices_list.append(override_choices) estimator.end_estimation() choices = pd.concat(choices_list) # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in chooser_tours.groupby('tour_num', sort=True): timetable.assign(window_row_ids=nth_tours['person_id'], tdds=choices.reindex(nth_tours.index)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') return choices
def free_parking(persons_merged, persons, chunk_size, trace_hh_id): """ """ trace_label = 'free_parking' model_settings_file_name = 'free_parking.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_zone_id > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('free_parking') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='free_parking_at_work', estimator=estimator) free_parking_alt = model_settings['FREE_PARKING_ALT'] choices = (choices == free_parking_alt) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'free_parking_at_work') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['free_parking_at_work'] = choices.reindex( persons.index).fillna(0).astype(bool) pipeline.replace_table("persons", persons) tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)