def get_alts_from_segmented_nested_logit(model_settings, segment_name, trace_label): """Infer alts from logit spec Parameters ---------- model_settings : dict segment_column_name : str trace_label : str Returns ------- list """ nest_spec = config.get_logit_model_settings(model_settings) coefficients = simulate.get_segment_coefficients(model_settings, segment_name) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) tour_mode_alts = [] for nest in logit.each_nest(nest_spec): if nest.is_leaf: tour_mode_alts.append(nest.name) return tour_mode_alts
def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ trace_label = 'auto_ownership_simulate' model_settings_file_name = 'auto_ownership.yaml' model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('auto_ownership') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choosers = households_merged.to_frame() logger.info("Running %s with %d households", trace_label, len(choosers)) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='auto_ownership', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'households', 'auto_ownership') estimator.write_override_choices(choices) estimator.end_estimation() households = households.to_frame() # no need to reindex as we used all households households['auto_ownership'] = choices pipeline.replace_table("households", households) tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True) if trace_hh_id: tracing.trace_df(households, label='auto_ownership', warn_if_empty=True)
def auto_ownership_simulate(households_merged, auto_ownership_spec, auto_ownership_settings, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ logger.info("Running auto_ownership_simulate with %d households" % len(households_merged)) nest_spec = config.get_logit_model_settings(auto_ownership_settings) constants = config.get_model_constants(auto_ownership_settings) choices = asim.simple_simulate(choosers=households_merged.to_frame(), spec=auto_ownership_spec, nest_spec=nest_spec, locals_d=constants, trace_label=trace_hh_id and 'auto_ownership', trace_choice_name='auto_ownership') tracing.print_summary('auto_ownership', choices, value_counts=True) orca.add_column('households', 'auto_ownership', choices) pipeline.add_dependent_columns('households', 'households_autoown') if trace_hh_id: trace_columns = ['auto_ownership' ] + orca.get_table('households_autoown').columns tracing.trace_df(orca.get_table('households').to_frame(), label='auto_ownership', columns=trace_columns, warn_if_empty=True)
def compute_ood_logsums(choosers, logsum_settings, od_skims, locals_dict, chunk_size, trace_label): """ Compute one (of two) out-of-direction logsums for destination alternatives Will either be trip_origin -> alt_dest or alt_dest -> primary_dest """ locals_dict.update(od_skims) expressions.annotate_preprocessors(choosers, locals_dict, od_skims, logsum_settings, trace_label) nest_spec = config.get_logit_model_settings(logsum_settings) logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsums = simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=od_skims, locals_d=locals_dict, chunk_size=chunk_size, trace_label=trace_label) assert logsums.index.equals(choosers.index) # FIXME not strictly necessary, but would make trace files more legible? # logsums = logsums.replace(-np.inf, -999) return logsums
def free_parking( persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor): """ """ trace_label = 'free_parking' model_settings = config.read_model_settings('free_parking.yaml') choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_taz > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name='free_parking.csv') nest_spec = config.get_logit_model_settings(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='free_parking_at_work') persons = persons.to_frame() # no need to reindex as we used all households free_parking_alt = model_settings['FREE_PARKING_ALT'] choices = (choices == free_parking_alt) persons['free_parking_at_work'] = choices.reindex(persons.index).fillna(0).astype(bool) pipeline.replace_table("persons", persons) tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def calc_rows_per_chunk(chunk_size, tours, persons_merged, alternatives, model_settings, trace_label=None): num_choosers = len(tours.index) # if not chunking, then return num_choosers # if chunk_size == 0: # return num_choosers, 0 chooser_row_size = tours.shape[1] sample_size = alternatives.shape[0] # persons_merged columns plus 2 previous tour columns extra_chooser_columns = persons_merged.shape[1] + 2 # one column per alternative plus skim and join columns alt_row_size = alternatives.shape[1] + 2 logsum_columns = 0 if 'LOGSUM_SETTINGS' in model_settings: logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) logsum_spec = simulate.read_model_spec( file_name=logsum_settings['SPEC']) logsum_nest_spec = config.get_logit_model_settings(logsum_settings) if logsum_nest_spec is None: # expression_values for each spec row # utilities and probs for each alt logsum_columns = logsum_spec.shape[0] + (2 * logsum_spec.shape[1]) else: # expression_values for each spec row # raw_utilities and base_probabilities) for each alt # nested_exp_utilities, nested_probabilities for each nest # less 1 as nested_probabilities lacks root nest_count = logit.count_nests(logsum_nest_spec) logsum_columns = logsum_spec.shape[0] + ( 2 * logsum_spec.shape[1]) + (2 * nest_count) - 1 row_size = (chooser_row_size + extra_chooser_columns + alt_row_size + logsum_columns) * sample_size logger.debug("%s #chunk_calc choosers %s" % (trace_label, tours.shape)) logger.debug("%s #chunk_calc extra_chooser_columns %s" % (trace_label, extra_chooser_columns)) logger.debug("%s #chunk_calc alternatives %s" % (trace_label, alternatives.shape)) logger.debug("%s #chunk_calc alt_row_size %s" % (trace_label, alt_row_size)) logger.debug("%s #chunk_calc logsum_columns %s" % (trace_label, logsum_columns)) return chunk.rows_per_chunk(chunk_size, row_size, num_choosers, trace_label)
def free_parking(persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor): """ """ trace_label = 'free_parking' model_settings = config.read_model_settings('free_parking.yaml') choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_taz > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name='free_parking.csv') nest_spec = config.get_logit_model_settings(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='free_parking_at_work') persons = persons.to_frame() # no need to reindex as we used all households free_parking_alt = model_settings['FREE_PARKING_ALT'] choices = (choices == free_parking_alt) persons['free_parking_at_work'] = choices.reindex( persons.index).fillna(0).astype(bool) pipeline.replace_table("persons", persons) tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def mandatory_tour_frequency(persons_merged, mandatory_tour_frequency_spec, mandatory_tour_frequency_settings, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers)) nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings) constants = config.get_model_constants(mandatory_tour_frequency_settings) choices = simulate.simple_simulate( choosers, spec=mandatory_tour_frequency_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( mandatory_tour_frequency_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True) inject.add_column("persons", "mandatory_tour_frequency", choices) create_mandatory_tours(trace_hh_id) # add mandatory_tour-dependent columns (e.g. tour counts) to persons pipeline.add_dependent_columns("persons", "persons_mtf") if trace_hh_id: trace_columns = ['mandatory_tour_frequency'] tracing.trace_df(inject.get_table('persons').to_frame(), label="mandatory_tour_frequency.persons", # columns=trace_columns, warn_if_empty=True)
def mandatory_tour_frequency(persons_merged, mandatory_tour_frequency_spec, mandatory_tour_frequency_settings, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers)) nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings) constants = config.get_model_constants(mandatory_tour_frequency_settings) choices = asim.simple_simulate( choosers, spec=mandatory_tour_frequency_spec, nest_spec=nest_spec, locals_d=constants, trace_label=trace_hh_id and 'mandatory_tour_frequency', trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( mandatory_tour_frequency_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True) orca.add_column("persons", "mandatory_tour_frequency", choices) pipeline.add_dependent_columns("persons", "persons_mtf") create_mandatory_tours_table() # FIXME - test prng repeatability r = pipeline.get_rn_generator().random_for_df(choices) orca.add_column("persons", "mtf_rand", [item for sublist in r for item in sublist]) if trace_hh_id: trace_columns = ['mandatory_tour_frequency'] tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="mandatory_tour_frequency", columns=trace_columns, warn_if_empty=True)
def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ trace_label = 'auto_ownership_simulate' model_settings = config.read_model_settings('auto_ownership.yaml') logger.info("Running %s with %d households", trace_label, len(households_merged)) model_spec = simulate.read_model_spec(file_name='auto_ownership.csv') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate(choosers=households_merged.to_frame(), spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='auto_ownership') households = households.to_frame() # no need to reindex as we used all households households['auto_ownership'] = choices pipeline.replace_table("households", households) tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True) if trace_hh_id: tracing.trace_df(households, label='auto_ownership', warn_if_empty=True)
def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ trace_label = 'auto_ownership_simulate' model_settings = config.read_model_settings('auto_ownership.yaml') logger.info("Running %s with %d households", trace_label, len(households_merged)) model_spec = simulate.read_model_spec(file_name='auto_ownership.csv') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=households_merged.to_frame(), spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='auto_ownership') households = households.to_frame() # no need to reindex as we used all households households['auto_ownership'] = choices pipeline.replace_table("households", households) tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True) if trace_hh_id: tracing.trace_df(households, label='auto_ownership', warn_if_empty=True)
def compute_ood_logsums( choosers, logsum_settings, od_skims, locals_dict, chunk_size, trace_label): """ Compute one (of two) out-of-direction logsums for destination alternatives Will either be trip_origin -> alt_dest or alt_dest -> primary_dest """ locals_dict.update(od_skims) expressions.annotate_preprocessors( choosers, locals_dict, od_skims, logsum_settings, trace_label) nest_spec = config.get_logit_model_settings(logsum_settings) logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsums = simulate.simple_simulate_logsums( choosers, logsum_spec, nest_spec, skims=od_skims, locals_d=locals_dict, chunk_size=chunk_size, trace_label=trace_label) assert logsums.index.equals(choosers.index) # FIXME not strictly necessary, but would make trace files more legible? # logsums = logsums.replace(-np.inf, -999) return logsums
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() persons_merged = persons_merged.to_frame() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) # setup skim keys orig_col_name = 'TAZ' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices_list = [] for tour_type, segment in primary_tours_merged.groupby('tour_type'): logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (tour_type, len(segment.index), )) # name index so tracing knows how to slice assert segment.index.name == 'tour_id' choices = run_tour_mode_choice_simulate( segment, spec, tour_type, model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, tour_type), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type, choices, value_counts=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices = pd.concat(choices_list) tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices, value_counts=True) # so we can trace with annotations primary_tours['tour_mode'] = choices # but only keep mode choice col all_tours = tours.to_frame() # uncomment to save annotations to table # assign_in_place(all_tours, annotations) assign_in_place(all_tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label(trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True)
def free_parking(persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor): """ """ trace_label = 'free_parking' model_settings_file_name = 'free_parking.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_taz > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('free_parking') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='free_parking_at_work', estimator=estimator) free_parking_alt = model_settings['FREE_PARKING_ALT'] choices = (choices == free_parking_alt) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'free_parking_at_work') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['free_parking_at_work'] = choices.reindex( persons.index).fillna(0).astype(bool) pipeline.replace_table("persons", persons) tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def atwork_subtour_mode_choice_simulate(tours, persons_merged, tour_mode_choice_spec, tour_mode_choice_settings, skim_dict, skim_stack, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' tours = tours.to_frame() subtours = tours[tours.tour_category == 'subtour'] # merge persons into tours choosers = pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True) nest_spec = config.get_logit_model_settings(tour_mode_choice_settings) constants = config.get_model_constants(tour_mode_choice_settings) logger.info("Running %s with %d subtours" % (trace_label, len(subtours.index))) tracing.print_summary('%s tour_type' % trace_label, subtours.tour_type, value_counts=True) if trace_hh_id: tracing.trace_df(tour_mode_choice_spec, tracing.extend_trace_label(trace_label, 'spec'), slicer='NONE', transpose=False) # setup skim keys odt_skim_stack_wrapper = skim_stack.wrap(left_key='workplace_taz', right_key='destination', skim_key="out_period") dot_skim_stack_wrapper = skim_stack.wrap(left_key='destination', right_key='workplace_taz', skim_key="in_period") od_skims = skim_dict.wrap('workplace_taz', 'destination') spec = get_segment_and_unstack(tour_mode_choice_spec, segment='workbased') if trace_hh_id: tracing.trace_df(spec, tracing.extend_trace_label(trace_label, 'spec'), slicer='NONE', transpose=False) choices = _mode_choice_simulate( choosers, odt_skim_stack_wrapper=odt_skim_stack_wrapper, dot_skim_stack_wrapper=dot_skim_stack_wrapper, od_skim_stack_wrapper=od_skims, spec=spec, constants=constants, nest_spec=nest_spec, trace_label=trace_label, trace_choice_name='tour_mode_choice') tracing.print_summary('%s choices' % trace_label, choices, value_counts=True) subtours['destination'] = choices assign_in_place(tours, subtours[['destination']]) if trace_hh_id: trace_columns = [ 'mode', 'person_id', 'tour_type', 'tour_num', 'parent_tour_id' ] tracing.trace_df(subtours, label=tracing.extend_trace_label(trace_label, 'mode'), slicer='tour_id', index_label='tour_id', columns=trace_columns, warn_if_empty=True) # FIXME - this forces garbage collection memory_info()
def stop_frequency( tours, tours_merged, stop_frequency_alts, skim_dict, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings = config.read_model_settings('stop_frequency.yaml') tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper } if constants is not None: locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) choices_list = [] for segment_type, choosers in tours_merged.groupby('primary_purpose'): logging.info("%s running segment %s with %s chooser rows" % (trace_label, segment_type, choosers.shape[0])) spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type) assert spec is not None, "spec for segment_type %s not found" % segment_type choices = simulate.simple_simulate( choosers=choosers, spec=spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_type), trace_choice_name='stops') # convert indexes to alternative names choices = pd.Series(spec.columns[choices.values], index=choices.index) choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) if 'primary_purpose' not in tours.columns: assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def trip_mode_choice( trips, tours_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings = config.read_model_settings('trip_mode_choice.yaml') model_spec = \ simulate.read_model_spec(file_name=model_settings['SPEC']) omnibus_coefficients = \ assign.read_constant_spec(config.config_file_path(model_settings['COEFFS'])) trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) tours_merged = tours_merged.to_frame() tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']] nest_spec = config.get_logit_model_settings(model_settings) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge( trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart) orig_col = 'origin' dest_col = 'destination' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col, skim_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "od_skims": od_skim_wrapper, } constants = config.get_model_constants(model_settings) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col }) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'): segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose], constants=constants) locals_dict.update(constants) annotate_preprocessors( trips_segment, locals_dict, skims, model_settings, segment_trace_label) locals_dict.update(skims) choices = simulate.simple_simulate( choosers=trips_segment, spec=model_spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, trace_label=segment_trace_label, trace_choice_name='trip_mode_choice') alts = model_spec.columns choices = choices.map(dict(list(zip(list(range(len(alts))), alts)))) # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose, # choices, value_counts=True) if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label(segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations trips_segment['trip_mode'] = choices tracing.trace_df(trips_segment, label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices = pd.concat(choices_list) trips_df = trips.to_frame() trips_df['trip_mode'] = choices tracing.print_summary('tour_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', choices, value_counts=True) assert not trips_df.trip_mode.isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label(trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def compute_logsums(primary_purpose, trips, destination_sample, tours_merged, model_settings, skim_hotel, chunk_size, trace_label): """ Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice for each alternative since we need out-of-direction logsum (i.e . origin to alt_dest, and alt_dest to half-tour destination) Returns ------- adds od_logsum and dp_logsum columns to trips (in place) """ trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums') logger.info("Running %s with %d samples", trace_label, destination_sample.shape[0]) # chunk usage is uniform so better to combine chunk_tag = 'trip_destination.compute_logsums' # FIXME should pass this in? network_los = inject.get_injectable('network_los') # - trips_merged - merge trips and tours_merged trips_merged = pd.merge(trips, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # - choosers - merge destination_sample and trips_merged # re/set index because pandas merge does not preserve left index if it has duplicate values! choosers = pd.merge(destination_sample, trips_merged.reset_index(), left_index=True, right_on='trip_id', how="left", suffixes=('', '_r')).set_index('trip_id') assert choosers.index.equals(destination_sample.index) logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) coefficients = simulate.get_segment_coefficients(logsum_settings, primary_purpose) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) locals_dict = {} locals_dict.update(config.get_model_constants(logsum_settings)) # coefficients can appear in expressions locals_dict.update(coefficients) skims = skim_hotel.logsum_skims() if network_los.zone_system == los.THREE_ZONE: # TVPB constants can appear in expressions locals_dict.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) # - od_logsums od_skims = { 'ORIGIN': model_settings['TRIP_ORIGIN'], 'DESTINATION': model_settings['ALT_DEST_COL_NAME'], "odt_skims": skims['odt_skims'], "dot_skims": skims['dot_skims'], "od_skims": skims['od_skims'], } if network_los.zone_system == los.THREE_ZONE: od_skims.update({ 'tvpb_logsum_odt': skims['tvpb_logsum_odt'], 'tvpb_logsum_dot': skims['tvpb_logsum_dot'] }) destination_sample['od_logsum'] = compute_ood_logsums( choosers, logsum_settings, nest_spec, logsum_spec, od_skims, locals_dict, chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'od'), chunk_tag=chunk_tag) # - dp_logsums dp_skims = { 'ORIGIN': model_settings['ALT_DEST_COL_NAME'], 'DESTINATION': model_settings['PRIMARY_DEST'], "odt_skims": skims['dpt_skims'], "dot_skims": skims['pdt_skims'], "od_skims": skims['dp_skims'], } if network_los.zone_system == los.THREE_ZONE: dp_skims.update({ 'tvpb_logsum_odt': skims['tvpb_logsum_dpt'], 'tvpb_logsum_dot': skims['tvpb_logsum_pdt'] }) destination_sample['dp_logsum'] = compute_ood_logsums( choosers, logsum_settings, nest_spec, logsum_spec, dp_skims, locals_dict, chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'dp'), chunk_tag=chunk_tag) return destination_sample
def joint_tour_participation( tours, persons_merged, chunk_size, trace_hh_id): """ Predicts for each eligible person to participate or not participate in each joint tour. """ trace_label = 'joint_tour_participation' model_settings = config.read_model_settings('joint_tour_participation.yaml') model_spec = simulate.read_model_spec(file_name='joint_tour_participation.csv') tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(model_settings, trace_label) return persons_merged = persons_merged.to_frame() # - create joint_tour_participation_candidates table candidates = joint_tour_participation_candidates(joint_tours, persons_merged) tracing.register_traceable_table('joint_tour_participants', candidates) pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates) logger.info("Running joint_tours_participation with %d potential participants (candidates)" % candidates.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_time_window_overlap': person_time_window_overlap, 'persons': persons_merged } expressions.assign_columns( df=candidates, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - simple_simulate nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=candidates, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='participation', custom_chooser=participants_chooser) # choice is boolean (participate or not) choice_col = model_settings.get('participation_choice', 'participate') assert choice_col in model_spec.columns, \ "couldn't find participation choice column '%s' in spec" PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col) participate = (choices == PARTICIPATE_CHOICE) # satisfaction indexed by tour_id tour_satisfaction = get_tour_satisfaction(candidates, participate) assert tour_satisfaction.all() candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id) PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id'] participants = candidates[participate][PARTICIPANT_COLS].copy() # assign participant_num # FIXME do we want something smarter than the participant with the lowest person_id? participants['participant_num'] = \ participants.sort_values(by=['tour_id', 'person_id']).\ groupby('tour_id').cumcount() + 1 pipeline.replace_table("joint_tour_participants", participants) # drop channel as we aren't using any more (and it has candidates that weren't chosen) pipeline.get_rn_generator().drop_channel('joint_tour_participants') # - assign joint tour 'point person' (participant_num == 1) point_persons = participants[participants.participant_num == 1] joint_tours['person_id'] = point_persons.set_index('tour_id').person_id # update number_of_participants which was initialized to 1 joint_tours['number_of_participants'] = participants.groupby('tour_id').size() assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']]) pipeline.replace_table("tours", tours) # - run annotations annotate_jtp(model_settings, trace_label) if trace_hh_id: tracing.trace_df(participants, label="joint_tour_participation.participants") tracing.trace_df(joint_tours, label="joint_tour_participation.joint_tours")
def tour_mode_choice_simulate(tours_merged, tour_mode_choice_spec, tour_mode_choice_settings, skim_dict, skim_stack, omx_file, trace_hh_id): """ Tour mode choice simulate """ trace_label = trace_hh_id and 'tour_mode_choice' tours = tours_merged.to_frame() nest_spec = config.get_logit_model_settings(tour_mode_choice_settings) constants = config.get_model_constants(tour_mode_choice_settings) logger.info("Running tour_mode_choice_simulate with %d tours" % len(tours.index)) tracing.print_summary('tour_mode_choice_simulate tour_type', tours.tour_type, value_counts=True) if trace_hh_id: tracing.trace_df(tour_mode_choice_spec, tracing.extend_trace_label(trace_label, 'spec'), slicer='NONE', transpose=False) # setup skim keys odt_skim_stack_wrapper = skim_stack.wrap(left_key='TAZ', right_key='destination', skim_key="out_period") dot_skim_stack_wrapper = skim_stack.wrap(left_key='destination', right_key='TAZ', skim_key="in_period") od_skims = skim_dict.wrap('TAZ', 'destination') choices_list = [] for tour_type, segment in tours.groupby('tour_type'): # if tour_type != 'work': # continue logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % ( tour_type, len(segment.index), )) # name index so tracing knows how to slice segment.index.name = 'tour_id' spec = get_segment_and_unstack(tour_mode_choice_spec, tour_type) if trace_hh_id: tracing.trace_df(spec, tracing.extend_trace_label( trace_label, 'spec.%s' % tour_type), slicer='NONE', transpose=False) choices = _mode_choice_simulate( segment, skim_dict=skim_dict, skim_stack=skim_stack, odt_skim_stack_wrapper=odt_skim_stack_wrapper, dot_skim_stack_wrapper=dot_skim_stack_wrapper, od_skim_stack_wrapper=od_skims, spec=spec, constants=constants, nest_spec=nest_spec, trace_label=tracing.extend_trace_label(trace_label, tour_type), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type, choices, value_counts=True) choices_list.append(choices) # FIXME - force garbage collection mem = asim.memory_info() logger.debug('memory_info tour_type %s, %s' % (tour_type, mem)) choices = pd.concat(choices_list) tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices, value_counts=True) orca.add_column("tours", "mode", choices) if trace_hh_id: trace_columns = ['mode', 'person_id', 'tour_type', 'tour_num'] tracing.trace_df(orca.get_table('tours').to_frame(), label=tracing.extend_trace_label(trace_label, 'mode'), slicer='tour_id', index_label='tour_id', columns=trace_columns, warn_if_empty=True) # FIXME - this forces garbage collection asim.memory_info()
def atwork_subtour_mode_choice( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) # setup skim keys orig_col_name = 'workplace_taz' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices = run_tour_mode_choice_simulate( subtours_merged, spec, tour_purpose='atwork', model_settings=model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') tracing.print_summary('%s choices' % trace_label, choices, value_counts=True) assign_in_place(tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label(trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id') force_garbage_collect()
def stop_frequency(tours, tours_merged, stop_frequency_alts, network_los, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings_file_name = 'stop_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already assert 'origin' in tours_merged assert 'destination' in tours_merged od_skim_stack_wrapper = network_los.get_default_skim_dict().wrap( 'origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper, 'network_los': network_los } locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) spec_segments = model_settings.get('SPEC_SEGMENTS') assert spec_segments is not None, f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}" segment_col = model_settings.get('SEGMENT_COL') assert segment_col is not None, f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}" nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for segment_settings in spec_segments: segment_name = segment_settings[segment_col] segment_value = segment_settings[segment_col] chooser_segment = tours_merged[tours_merged[segment_col] == segment_value] if len(chooser_segment) == 0: logging.info( f"{trace_label} skipping empty segment {segment_name}") continue logging.info( f"{trace_label} running segment {segment_name} with {chooser_segment.shape[0]} chooser rows" ) estimator = estimation.manager.begin_estimation( model_name=segment_name, bundle_name='stop_frequency') segment_spec = simulate.read_model_spec( file_name=segment_settings['SPEC']) assert segment_spec is not None, "spec for segment_type %s not found" % segment_name coefficients_file_name = segment_settings['COEFFICIENTS'] coefficients_df = simulate.read_model_coefficients( file_name=coefficients_file_name) segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator) if estimator: estimator.write_spec(segment_settings, bundle_directory=False) estimator.write_model_settings(model_settings, model_settings_file_name, bundle_directory=True) estimator.write_coefficients(coefficients_df, segment_settings) estimator.write_choosers(chooser_segment) estimator.set_chooser_id(chooser_segment.index.name) choices = simulate.simple_simulate( choosers=chooser_segment, spec=segment_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_name), trace_choice_name='stops', estimator=estimator) # convert indexes to alternative names choices = pd.Series(segment_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values( choices, 'tours', 'stop_frequency') # override choices estimator.write_override_choices(choices) estimator.end_estimation() choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) # FIXME should have added this when tours created? assert 'primary_purpose' not in tours if 'primary_purpose' not in tours.columns: # if not already there, then it will have been added by annotate tours preprocessor assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if estimator: # make sure they created trips with the expected tour_ids columns = ['person_id', 'household_id', 'tour_id', 'outbound'] survey_trips = estimation.manager.get_survey_table(table_name='trips') different = False survey_trips_not_in_trips = survey_trips[~survey_trips.index. isin(trips.index)] if len(survey_trips_not_in_trips) > 0: print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}") different = True trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index )] if len(survey_trips_not_in_trips) > 0: print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}") different = True assert not different survey_trips = \ estimation.manager.get_survey_values(trips, table_name='trips', column_names=columns) trips_differ = (trips[columns] != survey_trips[columns]).any(axis=1) if trips_differ.any(): print("trips_differ\n%s" % trips_differ) print("%s of %s tours differ" % (trips_differ.sum(), len(trips_differ))) print("differing survey_trips\n%s" % survey_trips[trips_differ]) print("differing modeled_trips\n%s" % trips[columns][trips_differ]) assert (not trips_differ.any()) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def joint_tour_composition(tours, households, persons, chunk_size, trace_hh_id): """ This model predicts the makeup of the travel party (adults, children, or mixed). """ trace_label = 'joint_tour_composition' model_settings_file_name = 'joint_tour_composition.yaml' tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(trace_label, tours) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('joint_tour_composition') # - only interested in households with joint_tours households = households.to_frame() households = households[households.num_hh_joint_tours > 0] persons = persons.to_frame() persons = persons[persons.household_id.isin(households.index)] logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0]) # - run preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns(df=households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) joint_tours_merged = pd.merge(joint_tours, households, left_on='household_id', right_index=True, how='left') # - simple_simulate model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(joint_tours_merged) choices = simulate.simple_simulate(choosers=joint_tours_merged, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='composition', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'composition') estimator.write_override_choices(choices) estimator.end_estimation() # add composition column to tours for tracing joint_tours['composition'] = choices # reindex since we ran model on a subset of households tours['composition'] = choices.reindex(tours.index).fillna('').astype(str) pipeline.replace_table("tours", tours) tracing.print_summary('joint_tour_composition', joint_tours.composition, value_counts=True) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_composition.joint_tours", slicer='household_id')
def atwork_subtour_frequency(tours, persons_merged, atwork_subtour_frequency_spec, atwork_subtour_frequency_settings, atwork_subtour_frequency_alternatives, chunk_size, trace_hh_id): """ This model predicts the frequency of making at-work subtour tours (alternatives for this model come from a separate csv file which is configured by the user). """ trace_label = 'atwork_subtour_frequency' tours = tours.to_frame() persons_merged = persons_merged.to_frame() work_tours = tours[tours.tour_type == 'work'] # merge persons into work_tours work_tours = pd.merge(work_tours, persons_merged, left_on='person_id', right_index=True) logger.info("Running atwork_subtour_frequency with %d work tours" % len(work_tours)) nest_spec = config.get_logit_model_settings( atwork_subtour_frequency_settings) constants = config.get_model_constants(atwork_subtour_frequency_settings) choices = simulate.simple_simulate( choosers=work_tours, spec=atwork_subtour_frequency_spec, nest_spec=nest_spec, locals_d=constants, trace_label=trace_label, trace_choice_name='atwork_subtour_frequency') # convert indexes to alternative names choices = pd.Series(atwork_subtour_frequency_spec.columns[choices.values], index=choices.index) tracing.print_summary('atwork_subtour_frequency', choices, value_counts=True) # reindex since we are working with a subset of tours choices = choices.reindex(tours.index) # add atwork_subtour_frequency column to tours tours['atwork_subtour_frequency'] = choices pipeline.replace_table("tours", tours) # - create atwork_subtours based on atwork_subtour_frequency choice names work_tours = tours[tours.tour_type == 'work'] assert not work_tours.atwork_subtour_frequency.isnull().any() subtours = process_atwork_subtours(work_tours, atwork_subtour_frequency_alternatives) pipeline.extend_table("tours", subtours) tracing.register_traceable_table('tours', subtours) pipeline.get_rn_generator().add_channel(subtours, 'tours') if trace_hh_id: trace_columns = ['atwork_subtour_frequency'] tracing.trace_df(inject.get_table('tours').to_frame(), label=trace_label, columns=trace_columns, warn_if_empty=True)
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label): """ compute logsums for tours using skims for alt_tdd out_period and in_period """ trace_label = tracing.extend_trace_label(trace_label, 'logsums') logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser') logger.info("%s compute_logsums for %d choosers%s alts" % (trace_label, choosers.shape[0], alt_tdd.shape[0])) # - setup skims skim_dict = inject.get_injectable('skim_dict') skim_stack = inject.get_injectable('skim_stack') orig_col_name = 'TAZ' dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get( tour_purpose) odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='in_period') dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, } # - locals_dict constants = config.get_model_constants(logsum_settings) locals_dict = {} locals_dict.update(constants) locals_dict.update(skims) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - compute logsums coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose) logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients) # constrained coefficients can appear in expressions locals_dict.update(coefficients) logsums = simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=0, trace_label=trace_label) return logsums
def joint_tour_composition( tours, households, persons, chunk_size, trace_hh_id): """ This model predicts the makeup of the travel party (adults, children, or mixed). """ trace_label = 'joint_tour_composition' model_settings = config.read_model_settings('joint_tour_composition.yaml') model_spec = simulate.read_model_spec(file_name='joint_tour_composition.csv') tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(trace_label, tours) return # - only interested in households with joint_tours households = households.to_frame() households = households[households.num_hh_joint_tours > 0] persons = persons.to_frame() persons = persons[persons.household_id.isin(households.index)] logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0]) # - run preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns( df=households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) joint_tours_merged = pd.merge(joint_tours, households, left_on='household_id', right_index=True, how='left') # - simple_simulate nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=joint_tours_merged, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='composition') # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) # add composition column to tours for tracing joint_tours['composition'] = choices # reindex since we ran model on a subset of households tours['composition'] = choices.reindex(tours.index).fillna('').astype(str) pipeline.replace_table("tours", tours) tracing.print_summary('joint_tour_composition', joint_tours.composition, value_counts=True) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_composition.joint_tours", slicer='household_id')
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id): """ This model predicts the frequency of making fully joint trips (see the alternatives above). """ trace_label = 'joint_tour_frequency' model_settings_file_name = 'joint_tour_frequency.yaml' estimator = estimation.manager.begin_estimation('joint_tour_frequency') model_settings = config.read_model_settings(model_settings_file_name) alternatives = simulate.read_model_alts( 'joint_tour_frequency_alternatives.csv', set_index='alt') # - only interested in households with more than one cdap travel_active person and # - at least one non-preschooler households = households.to_frame() multi_person_households = households[ households.participates_in_jtf_model].copy() # - only interested in persons in multi_person_households # FIXME - gratuitous pathological efficiency move, just let yaml specify persons? persons = persons.to_frame() persons = persons[persons.household_id.isin(multi_person_households.index)] logger.info( "Running joint_tour_frequency with %d multi-person households" % multi_person_households.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns(df=multi_person_households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(multi_person_households) choices = simulate.simple_simulate( choosers=multi_person_households, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='joint_tour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'households', 'joint_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # - create joint_tours based on joint_tour_frequency choices # - we need a person_id in order to generate the tour index (and for register_traceable_table) # - but we don't know the tour participants yet # - so we arbitrarily choose the first person in the household # - to be point person for the purpose of generating an index and setting origin temp_point_persons = persons.loc[persons.PNUM == 1] temp_point_persons['person_id'] = temp_point_persons.index temp_point_persons = temp_point_persons.set_index('household_id') temp_point_persons = temp_point_persons[['person_id', 'home_zone_id']] joint_tours = \ process_joint_tours(choices, alternatives, temp_point_persons) tours = pipeline.extend_table("tours", joint_tours) tracing.register_traceable_table('tours', joint_tours) pipeline.get_rn_generator().add_channel('tours', joint_tours) # - annotate households # we expect there to be an alt with no tours - which we can use to backfill non-travelers no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] households['joint_tour_frequency'] = choices.reindex( households.index).fillna(no_tours_alt).astype(str) households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\ reindex(households.index).fillna(0).astype(np.int8) pipeline.replace_table("households", households) tracing.print_summary('joint_tour_frequency', households.joint_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(households, label="joint_tour_frequency.households") tracing.trace_df(joint_tours, label="joint_tour_frequency.joint_tours", slicer='household_id') if estimator: survey_tours = estimation.manager.get_survey_table('tours') survey_tours = survey_tours[survey_tours.tour_category == 'joint'] print(f"len(survey_tours) {len(survey_tours)}") print(f"len(joint_tours) {len(joint_tours)}") different = False survey_tours_not_in_tours = survey_tours[~survey_tours.index. isin(joint_tours.index)] if len(survey_tours_not_in_tours) > 0: print(f"survey_tours_not_in_tours\n{survey_tours_not_in_tours}") different = True tours_not_in_survey_tours = joint_tours[~joint_tours.index. isin(survey_tours.index)] if len(survey_tours_not_in_tours) > 0: print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}") different = True assert not different
def atwork_subtour_mode_choice(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) # setup skim keys orig_col_name = 'workplace_taz' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices = run_tour_mode_choice_simulate( subtours_merged, spec, tour_purpose='atwork', model_settings=model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') tracing.print_summary('%s choices' % trace_label, choices, value_counts=True) assign_in_place(tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label( trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id') force_garbage_collect()
def stop_frequency(tours, tours_merged, stop_frequency_alts, skim_dict, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings = config.read_model_settings('stop_frequency.yaml') tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = {"od_skims": od_skim_stack_wrapper} if constants is not None: locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) choices_list = [] for segment_type, choosers in tours_merged.groupby('primary_purpose'): logging.info("%s running segment %s with %s chooser rows" % (trace_label, segment_type, choosers.shape[0])) spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type) assert spec is not None, "spec for segment_type %s not found" % segment_type choices = simulate.simple_simulate( choosers=choosers, spec=spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_type), trace_choice_name='stops') # convert indexes to alternative names choices = pd.Series(spec.columns[choices.values], index=choices.index) choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) if 'primary_purpose' not in tours.columns: assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label): """ compute logsums for tours using skims for alt_tdd out_period and in_period """ trace_label = tracing.extend_trace_label(trace_label, 'logsums') logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser') logger.info( f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts" ) # - locals_dict constants = config.get_model_constants(logsum_settings) locals_dict = {} locals_dict.update(constants) if network_los.zone_system == los.THREE_ZONE: # TVPB constants can appear in expressions locals_dict.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) locals_dict.update(skims) # constrained coefficients can appear in expressions coefficients = simulate.get_segment_coefficients(logsum_settings, tour_purpose) locals_dict.update(coefficients) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - compute logsums logsum_spec = simulate.read_model_spec(file_name=logsum_settings['SPEC']) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) logsums = simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=0, trace_label=trace_label) return logsums
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' model_settings = config.read_model_settings('mandatory_tour_frequency.yaml') model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt') choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons", len(choosers)) # - if no mandatory tours if choosers.shape[0] == 0: add_null_results(trace_label, model_settings) return # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {} expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( model_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) # - create mandatory tours """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ choosers['mandatory_tour_frequency'] = choices mandatory_tours = process_mandatory_tours( persons=choosers, mandatory_tour_frequency_alts=alternatives ) tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', mandatory_tours) pipeline.get_rn_generator().add_channel('tours', mandatory_tours) # - annotate persons persons = inject.get_table('persons').to_frame() # need to reindex as we only handled persons with cdap_activity == 'M' persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True) tracing.trace_df(persons, label="mandatory_tour_frequency.persons", warn_if_empty=True)
def tour_scheduling_calc_row_size(tours, persons_merged, alternatives, skims, spec, model_settings, trace_label): # this will not be consistent across mandatory tours (highest), non_mandatory tours, and atwork subtours (lowest) TIMETABLE_AVAILABILITY_REDUCTION_FACTOR = 1 # this appears to be more stable LOGSUM_DUPLICATE_REDUCTION_FACTOR = 0.5 sizer = chunk.RowSizeEstimator(trace_label) # chooser is tours merged with persons_merged chooser_row_size = len(tours.columns) + len(persons_merged.columns) # e.g. start, end, duration, <chooser_column> alt_row_size = alternatives.shape[1] + 1 # non-available alternatives will be sliced out so this is a over-estimate # for atwork subtours this may be a gross over-estimate, but that is presumably ok since we are adaptive sample_size = len(alternatives) * TIMETABLE_AVAILABILITY_REDUCTION_FACTOR sizer.add_elements(chooser_row_size, 'tours') # tours_merged with persons # alt_tdd tdd_interaction_dataset is cross join of choosers with alternatives sizer.add_elements((chooser_row_size + alt_row_size) * sample_size, 'interaction_df') # eval_interaction_utilities is parsimonious and doesn't create a separate column for each partial utility sizer.add_elements( sample_size, 'interaction_utilities') # <- this is probably always the HWM sizer.drop_elements('interaction_df') sizer.drop_elements('interaction_utilities') sizer.add_elements(alt_row_size, 'utilities_df') sizer.add_elements(alt_row_size, 'probs') if 'LOGSUM_SETTINGS' in model_settings: logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) logsum_spec = simulate.read_model_spec( file_name=logsum_settings['SPEC']) logsum_nest_spec = config.get_logit_model_settings(logsum_settings) if logsum_nest_spec is None: # expression_values for each spec row # utilities and probs for each alt logsum_columns = logsum_spec.shape[0] + (2 * logsum_spec.shape[1]) else: # expression_values for each spec row # raw_utilities and base_probabilities) for each alt # nested_exp_utilities, nested_probabilities for each nest # less 1 as nested_probabilities lacks root nest_count = logit.count_nests(logsum_nest_spec) logsum_columns = logsum_spec.shape[0] + ( 2 * logsum_spec.shape[1]) + (2 * nest_count) - 1 if USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS: sizer.add_elements(logsum_columns * sample_size, 'logsum_columns') else: # if USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS is false compute_logsums prunes alt_tdd # to only compute logsums for unique (tour_id, out_period, in_period, duration) in alt_tdd # which cuts the number of alts by roughly 50% (44% for 100 hh mtctm1 test dataset) # grep the log for USE_BRUTE_FORCE_TO_COMPUTE_LOGSUMS to check actual % savings duplicate_sample_reduction = 0.5 sizer.add_elements( logsum_columns * sample_size * LOGSUM_DUPLICATE_REDUCTION_FACTOR, 'logsum_columns') row_size = sizer.get_hwm() if simulate.tvpb_skims(skims): # DISABLE_TVPB_OVERHEAD logger.debug("disable calc_row_size for THREE_ZONE with tap skims") return 0 return row_size
def joint_tour_participation( tours, persons_merged, chunk_size, trace_hh_id): """ Predicts for each eligible person to participate or not participate in each joint tour. """ trace_label = 'joint_tour_participation' model_settings_file_name = 'joint_tour_participation.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(model_settings, trace_label) return persons_merged = persons_merged.to_frame() # - create joint_tour_participation_candidates table candidates = joint_tour_participation_candidates(joint_tours, persons_merged) tracing.register_traceable_table('joint_tour_participants', candidates) pipeline.get_rn_generator().add_channel('joint_tour_participants', candidates) logger.info("Running joint_tours_participation with %d potential participants (candidates)" % candidates.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_time_window_overlap': person_time_window_overlap, 'persons': persons_merged } expressions.assign_columns( df=candidates, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - simple_simulate estimator = estimation.manager.begin_estimation('joint_tour_participation') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(candidates) # add tour-based chunk_id so we can chunk all trips in tour together assert 'chunk_id' not in candidates.columns unique_household_ids = candidates.household_id.unique() household_chunk_ids = pd.Series(range(len(unique_household_ids)), index=unique_household_ids) candidates['chunk_id'] = reindex(household_chunk_ids, candidates.household_id) choices = simulate.simple_simulate_by_chunk_id( choosers=candidates, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='participation', custom_chooser=participants_chooser, estimator=estimator) # choice is boolean (participate or not) choice_col = model_settings.get('participation_choice', 'participate') assert choice_col in model_spec.columns, \ "couldn't find participation choice column '%s' in spec" PARTICIPATE_CHOICE = model_spec.columns.get_loc(choice_col) participate = (choices == PARTICIPATE_CHOICE) if estimator: estimator.write_choices(choices) # we override the 'participate' boolean series, instead of raw alternative index in 'choices' series # its value depends on whether the candidate's 'participant_id' is in the joint_tour_participant index survey_participants_df = estimator.get_survey_table('joint_tour_participants') participate = pd.Series(choices.index.isin(survey_participants_df.index.values), index=choices.index) # but estimation software wants to know the choices value (alternative index) choices = participate.replace({True: PARTICIPATE_CHOICE, False: 1-PARTICIPATE_CHOICE}) # estimator.write_override_choices(participate) # write choices as boolean participate estimator.write_override_choices(choices) # write choices as int alt indexes estimator.end_estimation() # satisfaction indexed by tour_id tour_satisfaction = get_tour_satisfaction(candidates, participate) assert tour_satisfaction.all() candidates['satisfied'] = reindex(tour_satisfaction, candidates.tour_id) PARTICIPANT_COLS = ['tour_id', 'household_id', 'person_id'] participants = candidates[participate][PARTICIPANT_COLS].copy() # assign participant_num # FIXME do we want something smarter than the participant with the lowest person_id? participants['participant_num'] = \ participants.sort_values(by=['tour_id', 'person_id']).\ groupby('tour_id').cumcount() + 1 pipeline.replace_table("joint_tour_participants", participants) # drop channel as we aren't using any more (and it has candidates that weren't chosen) pipeline.get_rn_generator().drop_channel('joint_tour_participants') # - assign joint tour 'point person' (participant_num == 1) point_persons = participants[participants.participant_num == 1] joint_tours['person_id'] = point_persons.set_index('tour_id').person_id # update number_of_participants which was initialized to 1 joint_tours['number_of_participants'] = participants.groupby('tour_id').size() assign_in_place(tours, joint_tours[['person_id', 'number_of_participants']]) pipeline.replace_table("tours", tours) # - run annotations annotate_jtp(model_settings, trace_label) if trace_hh_id: tracing.trace_df(participants, label="joint_tour_participation.participants") tracing.trace_df(joint_tours, label="joint_tour_participation.joint_tours")
def run_tour_mode_choice_simulate(choosers, tour_purpose, model_settings, mode_column_name, logsum_column_name, network_los, skims, constants, estimator, chunk_size, trace_label=None, trace_choice_name=None): """ This is a utility to run a mode choice model for each segment (usually segments are tour/trip purposes). Pass in the tours/trip that need a mode, the Skim object, the spec to evaluate with, and any additional expressions you want to use in the evaluation of variables. """ spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients = simulate.get_segment_coefficients(model_settings, tour_purpose) spec = simulate.eval_coefficients(spec, coefficients, estimator) nest_spec = config.get_logit_model_settings(model_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) locals_dict = {} locals_dict.update(constants) locals_dict.update(skims) # coefficients can appear in expressions locals_dict.update(coefficients) assert ('in_period' not in choosers) and ('out_period' not in choosers) in_time = skims['in_time_col_name'] out_time = skims['out_time_col_name'] choosers['in_period'] = network_los.skim_time_period_label( choosers[in_time]) choosers['out_period'] = network_los.skim_time_period_label( choosers[out_time]) expressions.annotate_preprocessors(choosers, locals_dict, skims, model_settings, trace_label) trace_column_names = choosers.index.name assert trace_column_names == 'tour_id' if trace_column_names not in choosers: choosers[trace_column_names] = choosers.index if estimator: # write choosers after annotation estimator.write_choosers(choosers) choices = mode_choice_simulate(choosers=choosers, spec=spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=trace_label, trace_choice_name=trace_choice_name, trace_column_names=trace_column_names, estimator=estimator) return choices
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' model_settings = config.read_model_settings( 'mandatory_tour_frequency.yaml') model_spec = simulate.read_model_spec( file_name='mandatory_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt') choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons", len(choosers)) # - if no mandatory tours if choosers.shape[0] == 0: add_null_results(trace_label, model_settings) return # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {} expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index).reindex( persons_merged.local.index) # - create mandatory tours """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ choosers['mandatory_tour_frequency'] = choices mandatory_tours = process_mandatory_tours( persons=choosers, mandatory_tour_frequency_alts=alternatives) tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', mandatory_tours) pipeline.get_rn_generator().add_channel('tours', mandatory_tours) # - annotate persons persons = inject.get_table('persons').to_frame() # need to reindex as we only handled persons with cdap_activity == 'M' persons['mandatory_tour_frequency'] = choices.reindex( persons.index).fillna('').astype(str) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True) tracing.trace_df(persons, label="mandatory_tour_frequency.persons", warn_if_empty=True)
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making at-work subtour tours (alternatives for this model come from a separate csv file which is configured by the user). """ trace_label = 'atwork_subtour_frequency' model_settings = config.read_model_settings( 'atwork_subtour_frequency.yaml') model_spec = simulate.read_model_spec( file_name='atwork_subtour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('atwork_subtour_frequency_alternatives.csv'), set_index='alt') tours = tours.to_frame() persons_merged = persons_merged.to_frame() work_tours = tours[tours.tour_type == 'work'] # - if no work_tours if len(work_tours) == 0: add_null_results(trace_label, tours) return # merge persons into work_tours work_tours = pd.merge(work_tours, persons_merged, left_on='person_id', right_index=True) logger.info("Running atwork_subtour_frequency with %d work tours", len(work_tours)) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: assign_columns(df=work_tours, model_settings=preprocessor_settings, trace_label=trace_label) choices = simulate.simple_simulate( choosers=work_tours, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='atwork_subtour_frequency') # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) tracing.print_summary('atwork_subtour_frequency', choices, value_counts=True) # add atwork_subtour_frequency column to tours # reindex since we are working with a subset of tours tours['atwork_subtour_frequency'] = choices.reindex(tours.index) pipeline.replace_table("tours", tours) # - create atwork_subtours based on atwork_subtour_frequency choice names work_tours = tours[tours.tour_type == 'work'] assert not work_tours.atwork_subtour_frequency.isnull().any() subtours = process_atwork_subtours(work_tours, alternatives) tours = pipeline.extend_table("tours", subtours) tracing.register_traceable_table('tours', subtours) pipeline.get_rn_generator().add_channel('tours', subtours) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label): """ compute logsums for tours using skims for alt_tdd out_period and in_period """ trace_label = tracing.extend_trace_label(trace_label, 'logsums') logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser') logger.info("%s compute_logsums for %d choosers%s alts" % (trace_label, choosers.shape[0], alt_tdd.shape[0])) # - setup skims skim_dict = inject.get_injectable('skim_dict') skim_stack = inject.get_injectable('skim_stack') orig_col_name = 'TAZ' dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(tour_purpose) odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, } # - locals_dict constants = config.get_model_constants(logsum_settings) omnibus_coefficient_spec = get_coeffecients_spec(logsum_settings) coefficient_spec = omnibus_coefficient_spec[tour_purpose] coefficients = assign.evaluate_constants(coefficient_spec, constants=constants) locals_dict = {} locals_dict.update(coefficients) locals_dict.update(constants) locals_dict.update(skims) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - compute logsums logsum_spec = get_logsum_spec(logsum_settings) nest_spec = config.get_logit_model_settings(logsum_settings) logsums = simulate.simple_simulate_logsums( choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=0, trace_label=trace_label) return logsums
def trip_mode_choice(trips, network_los, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings_file_name = 'trip_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'trip_mode' trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) # give trip mode choice the option to run without calling tours_merged. Useful for xborder # model where tour_od_choice needs trip mode choice logsums before some of the join keys # needed by tour_merged (e.g. home_zone_id) exist tours_cols = [ col for col in model_settings['TOURS_MERGED_CHOOSER_COLUMNS'] if col not in trips_df.columns ] if len(tours_cols) > 0: tours_merged = inject.get_table('tours_merged').to_frame( columns=tours_cols) else: tours_merged = pd.DataFrame() # - trips_merged - merge trips and tours_merged trips_merged = pd.merge(trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = network_los.skim_time_period_label( trips_merged.depart) orig_col = 'origin' dest_col = 'destination' min_per_period = network_los.skim_time_periods['period_minutes'] periods_per_hour = 60 / min_per_period constants = {} constants.update(config.get_model_constants(model_settings)) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col, 'MIN_PER_PERIOD': min_per_period, 'PERIODS_PER_HOUR': periods_per_hour }) skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col, dim3_key='trip_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col, dim3_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_wrapper, } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_recipe = model_settings.get('TVPB_recipe', 'tour_mode_choice') tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col, tod_key='trip_period', segment_key='demographic_segment', recipe=tvpb_recipe, cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, }) # This if-clause gives the user the option of NOT inheriting constants # from the tvpb settings. previously, these constants were inherited # automatically, which had the undesirable effect of overwriting any # trip mode choice model constants/coefficients that shared the same # name. The default behavior is still the same (True), but the user # can now avoid any chance of squashing these local variables by # adding `use_TVPB_constants: False` to the trip_mode_choice.yaml file. # the tvpb will still use the constants as defined in the recipe # specified above in `tvpb.wrap_logsum()` but they will not be used # in the trip mode choice expressions. if model_settings.get('use_TVPB_constants', True): constants.update( network_los.setting( 'TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) # don't create estimation data bundle if trip mode choice is being called # from another model step (e.g. tour mode choice logsum creation) if pipeline._PIPELINE.rng().step_name != 'trip_mode_choice': estimator = None else: estimator = estimation.manager.begin_estimation('trip_mode_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_coefficients_template(model_settings=model_settings) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby( 'primary_purpose'): segment_trace_label = tracing.extend_trace_label( trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % ( primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(primary_purpose) # tvpb_logsum_dot.extend_trace_label(primary_purpose) coefficients = simulate.get_segment_coefficients( model_settings, primary_purpose) locals_dict = {} locals_dict.update(constants) constants_keys = constants.keys() if any([coeff in constants_keys for coeff in coefficients.keys()]): logger.warning( "coefficients are obscuring constants in locals_dict") locals_dict.update(coefficients) # have to initialize chunker for preprocessing in order to access # tvpb logsum terms in preprocessor expressions. with chunk.chunk_log(tracing.extend_trace_label( trace_label, 'preprocessing'), base=True): expressions.annotate_preprocessors(trips_segment, locals_dict, skims, model_settings, segment_trace_label) if estimator: # write choosers after annotation estimator.write_choosers(trips_segment) locals_dict.update(skims) choices = mode_choice_simulate( choosers=trips_segment, spec=simulate.eval_coefficients(model_spec, coefficients, estimator), nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label), skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=segment_trace_label, trace_choice_name='trip_mode_choice', estimator=estimator) if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label( segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations assign_in_place(trips_segment, choices) tracing.trace_df(trips_segment, label=tracing.extend_trace_label( segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] for c in skim_cache: dest_col = c if dest_col not in choices_df: choices_df[ dest_col] = np.nan if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where( choices_df[mode_column_name] != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df.trip_mode) choices_df.trip_mode = estimator.get_survey_values( choices_df.trip_mode, 'trips', 'trip_mode') estimator.write_override_choices(choices_df.trip_mode) estimator.end_estimation() trips_df = trips.to_frame() assign_in_place(trips_df, choices_df) tracing.print_summary('trip_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', trips_df[mode_column_name], value_counts=True) assert not trips_df[mode_column_name].isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label( trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def trip_mode_choice_simulate(trips_merged, trip_mode_choice_spec, trip_mode_choice_settings, skim_dict, skim_stack, omx_file, trace_hh_id): """ Trip mode choice simulate """ trips = trips_merged.to_frame() nest_spec = config.get_logit_model_settings(trip_mode_choice_settings) constants = config.get_model_constants(trip_mode_choice_settings) logger.info("Running trip_mode_choice_simulate with %d trips" % len(trips)) odt_skim_stack_wrapper = skim_stack.wrap(left_key='OTAZ', right_key='DTAZ', skim_key="start_period") od_skims = skim_dict.wrap('OTAZ', 'DTAZ') choices_list = [] # loop by tour_type in order to easily query the expression coefficient file for tour_type, segment in trips.groupby('tour_type'): logger.info("running %s tour_type '%s'" % ( len(segment.index), tour_type, )) # name index so tracing knows how to slice segment.index.name = 'trip_id' # FIXME - check that destination is not null trace_label = trace_hh_id and ('trip_mode_choice_%s' % tour_type) choices = _mode_choice_simulate( segment, skim_dict=skim_dict, skim_stack=skim_stack, odt_skim_stack_wrapper=odt_skim_stack_wrapper, dot_skim_stack_wrapper=None, od_skim_stack_wrapper=od_skims, spec=get_segment_and_unstack(trip_mode_choice_spec, tour_type), constants=constants, nest_spec=nest_spec, trace_label=trace_label, trace_choice_name='trip_mode_choice') # FIXME - no point in printing verbose value_counts now that we have tracing? tracing.print_summary('trip_mode_choice_simulate %s choices' % tour_type, choices, value_counts=True) choices_list.append(choices) # FIXME - force garbage collection mem = asim.memory_info() logger.debug('memory_info tour_type %s, %s' % (tour_type, mem)) choices = pd.concat(choices_list) tracing.print_summary('trip_mode_choice_simulate all tour type choices', choices, value_counts=True) # FIXME - is this a NOP if trips table doesn't exist orca.add_column("trips", "trip_mode", choices) if trace_hh_id: tracing.trace_df(orca.get_table('trips').to_frame(), label="trip_mode", slicer='trip_id', index_label='trip_id', warn_if_empty=True) # FIXME - this forces garbage collection asim.memory_info()
def trip_mode_choice( trips, tours_merged, network_los, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings = config.read_model_settings('trip_mode_choice.yaml') logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'trip_mode' model_spec = \ simulate.read_model_spec(file_name=model_settings['SPEC']) omnibus_coefficients = \ assign.read_constant_spec(config.config_file_path(model_settings['COEFFICIENTS'])) trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) tours_merged = tours_merged.to_frame() tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']] nest_spec = config.get_logit_model_settings(model_settings) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge( trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = network_los.skim_time_period_label(trips_merged.depart) orig_col = 'origin' dest_col = 'destination' constants = {} constants.update(config.get_model_constants(model_settings)) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col }) skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col, dim3_key='trip_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col, dim3_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_wrapper, } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col, tod_key='trip_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, # 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update(network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'): segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(primary_purpose) # tvpb_logsum_dot.extend_trace_label(primary_purpose) locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose], constants=constants) locals_dict.update(constants) expressions.annotate_preprocessors( trips_segment, locals_dict, skims, model_settings, segment_trace_label) locals_dict.update(skims) choices = mode_choice_simulate( choosers=trips_segment, spec=model_spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=trace_label, trace_choice_name='trip_mode_choice') if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label(segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations assign_in_place(trips_segment, choices) tracing.trace_df(trips_segment, label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] print(f"mode {mode} path_type {path_type}") for c in skim_cache: dest_col = c if dest_col not in choices_df: choices_df[dest_col] = np.nan choices_df[dest_col].where(choices_df[mode_column_name] != mode, skim_cache[c], inplace=True) # update trips table with choices (and otionally logssums) trips_df = trips.to_frame() assign_in_place(trips_df, choices_df) tracing.print_summary('tour_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', trips_df[mode_column_name], value_counts=True) assert not trips_df[mode_column_name].isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label(trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)