def create_mandatory_tours(trace_hh_id): # FIXME - move this to body? persons = inject.get_table('persons') configs_dir = inject.get_injectable('configs_dir') persons = persons.to_frame(columns=["mandatory_tour_frequency", "is_worker", "school_taz", "workplace_taz"]) persons = persons[~persons.mandatory_tour_frequency.isnull()] tour_frequency_alternatives = inject.get_injectable('mandatory_tour_frequency_alternatives') mandatory_tours = process_mandatory_tours(persons, tour_frequency_alternatives) expressions.assign_columns( df=mandatory_tours, model_settings='annotate_tours_with_dest', configs_dir=configs_dir, trace_label='create_mandatory_tours') tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', tours) pipeline.get_rn_generator().add_channel(mandatory_tours, 'tours') if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True)
def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ trace_label = 'auto_ownership_simulate' model_settings_file_name = 'auto_ownership.yaml' model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('auto_ownership') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choosers = households_merged.to_frame() logger.info("Running %s with %d households", trace_label, len(choosers)) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='auto_ownership', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'households', 'auto_ownership') estimator.write_override_choices(choices) estimator.end_estimation() households = households.to_frame() # no need to reindex as we used all households households['auto_ownership'] = choices pipeline.replace_table("households", households) tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True) if trace_hh_id: tracing.trace_df(households, label='auto_ownership', warn_if_empty=True)
def trip_purpose( trips, chunk_size, trace_hh_id): """ trip purpose model step - calls run_trip_purpose to run the actual model adds purpose column to trips """ trace_label = "trip_purpose" trips_df = trips.to_frame() choices = run_trip_purpose( trips_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label ) trips_df['purpose'] = choices # we should have assigned a purpose to all trips assert not trips_df.purpose.isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def trace_df(self, df, trace_label, extension): assert len(df) > 0 tracing.trace_df(df, label=tracing.extend_trace_label( trace_label, extension), slicer='NONE', transpose=False)
def individual_utilities( persons, cdap_indiv_spec, locals_d, trace_hh_id=None, trace_label=None): """ Calculate CDAP utilities for all individuals. Parameters ---------- persons : pandas.DataFrame DataFrame of individual persons data. cdap_indiv_spec : pandas.DataFrame CDAP spec applied to individuals. Returns ------- utilities : pandas.DataFrame Will have index of `persons` and columns for each of the alternatives. plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] """ # calculate single person utilities indiv_utils = simulate.eval_utilities(cdap_indiv_spec, persons, locals_d, trace_label=trace_label) # add columns from persons to facilitate building household interactions useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] indiv_utils[useful_columns] = persons[useful_columns] if trace_hh_id: tracing.trace_df(indiv_utils, '%s.indiv_utils' % trace_label, column_labels=['activity', 'person']) return indiv_utils
def individual_utilities( persons, cdap_indiv_spec, locals_d, trace_hh_id=None, trace_label=None): """ Calculate CDAP utilities for all individuals. Parameters ---------- persons : pandas.DataFrame DataFrame of individual persons data. cdap_indiv_spec : pandas.DataFrame CDAP spec applied to individuals. Returns ------- utilities : pandas.DataFrame Will have index of `persons` and columns for each of the alternatives. plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] """ # calculate single person utilities indiv_utils = simulate.eval_utilities(cdap_indiv_spec, persons, locals_d, trace_label) # add columns from persons to facilitate building household interactions useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] indiv_utils[useful_columns] = persons[useful_columns] if trace_hh_id: tracing.trace_df(indiv_utils, '%s.indiv_utils' % trace_label, column_labels=['activity', 'person']) return indiv_utils
def auto_ownership_simulate(households_merged, auto_ownership_spec, auto_ownership_settings, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ logger.info("Running auto_ownership_simulate with %d households" % len(households_merged)) nest_spec = config.get_logit_model_settings(auto_ownership_settings) constants = config.get_model_constants(auto_ownership_settings) choices = asim.simple_simulate(choosers=households_merged.to_frame(), spec=auto_ownership_spec, nest_spec=nest_spec, locals_d=constants, trace_label=trace_hh_id and 'auto_ownership', trace_choice_name='auto_ownership') tracing.print_summary('auto_ownership', choices, value_counts=True) orca.add_column('households', 'auto_ownership', choices) pipeline.add_dependent_columns('households', 'households_autoown') if trace_hh_id: trace_columns = ['auto_ownership' ] + orca.get_table('households_autoown').columns tracing.trace_df(orca.get_table('households').to_frame(), label='auto_ownership', columns=trace_columns, warn_if_empty=True)
def free_parking( persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor): """ """ trace_label = 'free_parking' model_settings = config.read_model_settings('free_parking.yaml') choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_taz > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name='free_parking.csv') nest_spec = config.get_logit_model_settings(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='free_parking_at_work') persons = persons.to_frame() # no need to reindex as we used all households free_parking_alt = model_settings['FREE_PARKING_ALT'] choices = (choices == free_parking_alt) persons['free_parking_at_work'] = choices.reindex(persons.index).fillna(0).astype(bool) pipeline.replace_table("persons", persons) tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def create_simple_trips(tours, households, persons, trace_hh_id): """ Create a simple trip table """ logger.info("Running simple trips table creation with %d tours" % len(tours.index)) tours_df = tours.to_frame() # we now have a tour_id column tours_df.reset_index(inplace=True) tours_df['household_id'] = reindex(persons.household_id, tours_df.person_id) tours_df['TAZ'] = reindex(households.TAZ, tours_df.household_id) # create inbound and outbound records trips = pd.concat([tours_df, tours_df], ignore_index=True) # first half are outbound, second half are inbound trips['INBOUND'] = np.repeat([False, True], len(trips.index) / 2) # TRIPID for outbound trips = 1, inbound_trips = 2 trips['trip_num'] = np.repeat([1, 2], len(trips.index) / 2) # set key fields from tour fields: 'TAZ','destination','start','end' trips['OTAZ'] = trips.TAZ trips['OTAZ'][trips.INBOUND] = trips.destination[trips.INBOUND] trips['DTAZ'] = trips.destination trips['DTAZ'][trips.INBOUND] = trips.TAZ[trips.INBOUND] trips['start_trip'] = trips.start trips['start_trip'][trips.INBOUND] = trips.end[trips.INBOUND] trips['end_trip'] = trips.end trips['end_trip'][trips.INBOUND] = trips.start[trips.INBOUND] # create a stable (predictable) index based on tour_id and trip_num possible_trips_count = 2 trips['trip_id'] = (trips.tour_id * possible_trips_count) + (trips.trip_num - 1) trips.set_index('trip_id', inplace=True, verify_integrity=True) trip_columns = [ 'tour_id', 'INBOUND', 'trip_num', 'OTAZ', 'DTAZ', 'start_trip', 'end_trip' ] trips = trips[trip_columns] orca.add_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel(trips, 'trips') if trace_hh_id: tracing.trace_df(trips, label="trips", warn_if_empty=True)
def free_parking(persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor): """ """ trace_label = 'free_parking' model_settings = config.read_model_settings('free_parking.yaml') choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_taz > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name='free_parking.csv') nest_spec = config.get_logit_model_settings(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='free_parking_at_work') persons = persons.to_frame() # no need to reindex as we used all households free_parking_alt = model_settings['FREE_PARKING_ALT'] choices = (choices == free_parking_alt) persons['free_parking_at_work'] = choices.reindex( persons.index).fillna(0).astype(bool) pipeline.replace_table("persons", persons) tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def atwork_subtour_destination( tours, persons_merged, skim_dict, skim_stack, land_use, size_terms, chunk_size, trace_hh_id): persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() destination_size_terms = tour_destination_size_terms(land_use, size_terms, 'atwork') destination_sample = atwork_subtour_destination_sample( subtours, persons_merged, skim_dict, destination_size_terms, chunk_size, trace_hh_id) destination_sample = atwork_subtour_destination_logsums( persons_merged, destination_sample, skim_dict, skim_stack, chunk_size, trace_hh_id) choices = atwork_subtour_destination_simulate( subtours, persons_merged, destination_sample, skim_dict, destination_size_terms, chunk_size, trace_hh_id) subtours['destination'] = choices assign_in_place(tours, subtours[['destination']]) pipeline.replace_table("tours", tours) tracing.print_summary('subtour destination', subtours.destination, describe=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def mandatory_scheduling(mandatory_tours_merged, tdd_alts, tdd_school_spec, tdd_work_spec, mandatory_scheduling_settings, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ tours = mandatory_tours_merged.to_frame() alts = tdd_alts.to_frame() constants = config.get_model_constants(mandatory_scheduling_settings) school_spec = tdd_school_spec.to_frame() school_tours = tours[tours.tour_type == "school"] logger.info("Running mandatory_scheduling school_tours with %d tours" % len(school_tours)) school_choices = vectorize_tour_scheduling( school_tours, alts, school_spec, constants=constants, chunk_size=chunk_size, trace_label='mandatory_scheduling.school') work_spec = tdd_work_spec.to_frame() work_tours = tours[tours.tour_type == "work"] logger.info("Running %d work tour scheduling choices" % len(work_tours)) work_choices = vectorize_tour_scheduling( work_tours, alts, work_spec, constants=constants, chunk_size=chunk_size, trace_label='mandatory_scheduling.work') choices = pd.concat([school_choices, work_choices]) tracing.print_summary('mandatory_scheduling tour_departure_and_duration', choices, describe=True) orca.add_column("mandatory_tours", "tour_departure_and_duration", choices) if trace_hh_id: tracing.trace_df(orca.get_table('mandatory_tours').to_frame(), label="mandatory_tours", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ model_name = 'mandatory_tour_scheduling' trace_label = model_name persons_merged = persons_merged.to_frame() tours = tours.to_frame() mandatory_tours = tours[tours.tour_category == 'mandatory'] # - if no mandatory_tours if mandatory_tours.shape[0] == 0: tracing.no_results(model_name) return # - add tour segmentation column # mtctm1 segments mandatory_scheduling spec by tour_type # (i.e. there are different specs for work and school tour_types) # mtctm1 logsum coefficients are segmented by primary_purpose # (i.e. there are different logsum coefficients for work, school, univ primary_purposes # for simplicity managing these different segmentation schemes, # we conflate them by segmenting tour processing to align with primary_purpose tour_segment_col = 'mandatory_tour_seg' assert tour_segment_col not in mandatory_tours is_university_tour = \ (mandatory_tours.tour_type == 'school') & \ reindex(persons_merged.is_university, mandatory_tours.person_id) mandatory_tours[tour_segment_col] = \ mandatory_tours.tour_type.where(~is_university_tour, 'univ') choices = run_tour_scheduling(model_name, mandatory_tours, persons_merged, tdd_alts, tour_segment_col, chunk_size, trace_hh_id) assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing mandatory_tours = tours[tours.tour_category == 'mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(mandatory_tours, label=trace_label, slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def initialize_tours(network_los, households, persons, trace_hh_id): trace_label = 'initialize_tours' tours = read_input_table("tours") # FIXME can't use households_sliced injectable as flag like persons table does in case of resume_after. # FIXME could just always slice... slice_happened = \ inject.get_injectable('households_sample_size', 0) > 0 \ or inject.get_injectable('households_sample_size', 0) > 0 if slice_happened: logger.info("slicing tours %s" % (tours.shape,)) # keep all persons in the sampled households tours = tours[tours.person_id.isin(persons.index)] # annotate before patching tour_id to allow addition of REQUIRED_TOUR_COLUMNS defined above model_settings = config.read_model_settings('initialize_tours.yaml', mandatory=True) expressions.assign_columns( df=tours, model_settings=model_settings.get('annotate_tours'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_tours')) skip_patch_tour_ids = model_settings.get('skip_patch_tour_ids', False) if skip_patch_tour_ids: pass else: tours = patch_tour_ids(tours) assert tours.index.name == 'tour_id' # replace table function with dataframe inject.add_table('tours', tours) pipeline.get_rn_generator().add_channel('tours', tours) tracing.register_traceable_table('tours', tours) logger.debug(f"{len(tours.household_id.unique())} unique household_ids in tours") logger.debug(f"{len(households.index.unique())} unique household_ids in households") assert not tours.index.duplicated().any() tours_without_persons = ~tours.person_id.isin(persons.index) if tours_without_persons.any(): logger.error(f"{tours_without_persons.sum()} tours out of {len(persons)} without persons\n" f"{pd.Series({'person_id': tours_without_persons.index.values})}") raise RuntimeError(f"{tours_without_persons.sum()} tours with bad person_id") if trace_hh_id: tracing.trace_df(tours, label='initialize_tours', warn_if_empty=True)
def cdap_simulate(persons_merged, cdap_settings, cdap_indiv_spec, cdap_interaction_coefficients, cdap_fixed_relative_proportions, chunk_size, trace_hh_id): """ CDAP stands for Coordinated Daily Activity Pattern, which is a choice of high-level activity pattern for each person, in a coordinated way with other members of a person's household. Because Python requires vectorization of computation, there are some specialized routines in the cdap directory of activitysim for this purpose. This module simply applies those utilities using the simulation framework. """ persons_df = persons_merged.to_frame() constants = config.get_model_constants(cdap_settings) logger.info("Running cdap_simulate with %d persons" % len(persons_df.index)) choices = run_cdap( persons=persons_df, cdap_indiv_spec=cdap_indiv_spec, cdap_interaction_coefficients=cdap_interaction_coefficients, cdap_fixed_relative_proportions=cdap_fixed_relative_proportions, locals_d=constants, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label='cdap') tracing.print_summary('cdap_activity', choices.cdap_activity, value_counts=True) print pd.crosstab(persons_df.ptype, choices.cdap_activity, margins=True) choices = choices.reindex(persons_merged.index) orca.add_column("persons", "cdap_activity", choices.cdap_activity) orca.add_column("persons", "cdap_rank", choices.cdap_rank) pipeline.add_dependent_columns("persons", "persons_cdap") pipeline.add_dependent_columns("households", "households_cdap") if trace_hh_id: tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="cdap", columns=['ptype', 'cdap_rank', 'cdap_activity'], warn_if_empty=True)
def non_mandatory_tour_destination( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings = config.read_model_settings('non_mandatory_tour_destination.yaml') tours = tours.to_frame() persons_merged = persons_merged.to_frame() # choosers are tours - in a sense tours are choosing their destination non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return choices = tour_destination.run_tour_destination( tours, persons_merged, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) non_mandatory_tours['destination'] = choices assign_in_place(tours, non_mandatory_tours[['destination']]) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'non_mandatory'], label="non_mandatory_tour_destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def trip_destination( trips, tours_merged, chunk_size, trace_hh_id): """ Choose a destination for all 'intermediate' trips based on trip purpose. Final trips already have a destination (the primary tour destination for outbound trips, and home for inbound trips.) """ trace_label = 'trip_destination' model_settings = config.read_model_settings('trip_destination.yaml') CLEANUP = model_settings.get('CLEANUP', True) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) trips_df = run_trip_destination( trips_df, tours_merged_df, chunk_size, trace_hh_id, trace_label) if trips_df.failed.any(): logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum()) file_name = "%s_failed_trips" % trace_label logger.info("writing failed trips to %s", file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) if CLEANUP: trips_df = cleanup_failed_trips(trips_df) elif trips_df.failed.any(): logger.warning("%s keeping %s sidelined failed trips" % (trace_label, trips_df.failed.sum())) pipeline.replace_table("trips", trips_df) print("trips_df\n", trips_df.shape) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def mandatory_tour_frequency(persons_merged, mandatory_tour_frequency_spec, mandatory_tour_frequency_settings, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers)) nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings) constants = config.get_model_constants(mandatory_tour_frequency_settings) choices = simulate.simple_simulate( choosers, spec=mandatory_tour_frequency_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( mandatory_tour_frequency_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True) inject.add_column("persons", "mandatory_tour_frequency", choices) create_mandatory_tours(trace_hh_id) # add mandatory_tour-dependent columns (e.g. tour counts) to persons pipeline.add_dependent_columns("persons", "persons_mtf") if trace_hh_id: trace_columns = ['mandatory_tour_frequency'] tracing.trace_df(inject.get_table('persons').to_frame(), label="mandatory_tour_frequency.persons", # columns=trace_columns, warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, tdd_non_mandatory_spec, non_mandatory_tour_scheduling_settings, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ trace_label = 'non_mandatory_tour_scheduling' tours = tours.to_frame() persons_merged = persons_merged.to_frame() non_mandatory_tours = tours[tours.non_mandatory] logger.info("Running non_mandatory_tour_scheduling with %d tours" % len(tours)) constants = config.get_model_constants( non_mandatory_tour_scheduling_settings) tdd_choices = vectorize_tour_scheduling(non_mandatory_tours, persons_merged, tdd_alts, tdd_non_mandatory_spec, constants=constants, chunk_size=chunk_size, trace_label=trace_label) # add tdd_choices columns to tours for c in tdd_choices.columns: tours.loc[tdd_choices.index, c] = tdd_choices[c] pipeline.replace_table("tours", tours) non_mandatory_tours = tours[tours.non_mandatory] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_scheduling", slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def persons(households, trace_hh_id): df = read_raw_persons(households) logger.info("loaded persons %s" % (df.shape, )) # replace table function with dataframe inject.add_table('persons', df) pipeline.get_rn_generator().add_channel('persons', df) if trace_hh_id: tracing.register_traceable_table('persons', df) tracing.trace_df(df, "raw.persons", warn_if_empty=True) return df
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack, land_use, size_terms, chunk_size, trace_hh_id): persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() destination_size_terms = tour_destination_size_terms( land_use, size_terms, 'atwork') destination_sample = atwork_subtour_destination_sample( subtours, persons_merged, skim_dict, destination_size_terms, chunk_size, trace_hh_id) destination_sample = atwork_subtour_destination_logsums( persons_merged, destination_sample, skim_dict, skim_stack, chunk_size, trace_hh_id) choices = atwork_subtour_destination_simulate(subtours, persons_merged, destination_sample, skim_dict, destination_size_terms, chunk_size, trace_hh_id) subtours['destination'] = choices assign_in_place(tours, subtours[['destination']]) pipeline.replace_table("tours", tours) tracing.print_summary('subtour destination', subtours.destination, describe=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def mandatory_tour_frequency(persons_merged, mandatory_tour_frequency_spec, mandatory_tour_frequency_settings, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons" % len(choosers)) nest_spec = config.get_logit_model_settings(mandatory_tour_frequency_settings) constants = config.get_model_constants(mandatory_tour_frequency_settings) choices = asim.simple_simulate( choosers, spec=mandatory_tour_frequency_spec, nest_spec=nest_spec, locals_d=constants, trace_label=trace_hh_id and 'mandatory_tour_frequency', trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( mandatory_tour_frequency_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) tracing.print_summary('mandatory_tour_frequency', choices, value_counts=True) orca.add_column("persons", "mandatory_tour_frequency", choices) pipeline.add_dependent_columns("persons", "persons_mtf") create_mandatory_tours_table() # FIXME - test prng repeatability r = pipeline.get_rn_generator().random_for_df(choices) orca.add_column("persons", "mtf_rand", [item for sublist in r for item in sublist]) if trace_hh_id: trace_columns = ['mandatory_tour_frequency'] tracing.trace_df(orca.get_table('persons_merged').to_frame(), label="mandatory_tour_frequency", columns=trace_columns, warn_if_empty=True)
def write_trace_data(trace_results, trace_zones, zones, trace_assigned_locals, trace_zone_rows): if trace_results is None: logger.warn('trace_zones not found in zones = %s' % (trace_zones)) return df = zones.loc[zones[trace_zone_rows].index] df = df.merge(trace_results, how='left', left_index=True, right_index=True) tracing.trace_df(df, label='buffered_zones', index_label='None', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_locals(df, file_name='netbuffer_locals')
def trip_purpose( trips, chunk_size, trace_hh_id): """ trip purpose model step - calls run_trip_purpose to run the actual model adds purpose column to trips """ trace_label = "trip_purpose" trips_df = trips.to_frame() estimator = estimation.manager.begin_estimation('trip_purpose') if estimator: chooser_cols_for_estimation = ['person_id', 'household_id', 'tour_id', 'trip_num'] estimator.write_choosers(trips_df[chooser_cols_for_estimation]) choices = run_trip_purpose( trips_df, estimator, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label ) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'trips', 'purpose') # override choices estimator.write_override_choices(choices) estimator.end_estimation() trips_df['purpose'] = choices # we should have assigned a purpose to all trips assert not trips_df.purpose.isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def choose_intermediate_trip_purpose(trips, probs_spec, trace_hh_id, trace_label): """ chose purpose for intermediate trips based on probs_spec which assigns relative weights (summing to 1) to the possible purpose choices Returns ------- purpose: pandas.Series of purpose (str) indexed by trip_id """ probs_join_cols = ['primary_purpose', 'outbound', 'person_type'] non_purpose_cols = probs_join_cols + ['depart_range_start', 'depart_range_end'] purpose_cols = [c for c in probs_spec.columns if c not in non_purpose_cols] num_trips = len(trips.index) have_trace_targets = trace_hh_id and tracing.has_trace_targets(trips) # probs shold sum to 1 across rows sum_probs = probs_spec[purpose_cols].sum(axis=1) probs_spec.loc[:, purpose_cols] = probs_spec.loc[:, purpose_cols].div(sum_probs, axis=0) # left join trips to probs (there may be multiple rows per trip for multiple depart ranges) choosers = pd.merge(trips.reset_index(), probs_spec, on=probs_join_cols, how='left').set_index('trip_id') chunk.log_df(trace_label, 'choosers', choosers) # select the matching depart range (this should result on in exactly one chooser row per trip) choosers = choosers[(choosers.start >= choosers['depart_range_start']) & ( choosers.start <= choosers['depart_range_end'])] # choosers should now match trips row for row assert choosers.index.is_unique assert len(choosers.index) == num_trips choices, rands = logit.make_choices( choosers[purpose_cols], trace_label=trace_label, trace_choosers=choosers) if have_trace_targets: tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, 'trip_purpose']) tracing.trace_df(rands, '%s.rands' % trace_label, columns=[None, 'rand']) choices = choices.map(pd.Series(purpose_cols)) return choices
def persons(households, trace_hh_id): df = read_raw_persons(households) logger.info("loaded persons %s" % (df.shape,)) df.index.name = 'person_id' # replace table function with dataframe inject.add_table('persons', df) pipeline.get_rn_generator().add_channel('persons', df) if trace_hh_id: tracing.register_traceable_table('persons', df) tracing.trace_df(df, "raw.persons", warn_if_empty=True) return df
def individual_utilities(persons, cdap_indiv_spec, locals_d, trace_hh_id=None, trace_label=None): """ Calculate CDAP utilities for all individuals. Parameters ---------- persons : pandas.DataFrame DataFrame of individual persons data. cdap_indiv_spec : pandas.DataFrame CDAP spec applied to individuals. Returns ------- utilities : pandas.DataFrame Will have index of `persons` and columns for each of the alternatives. plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] """ # calculate single person utilities individual_vars = eval_variables(cdap_indiv_spec.index, persons, locals_d) indiv_utils = individual_vars.dot(cdap_indiv_spec) # add columns from persons to facilitate building household interactions useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] indiv_utils[useful_columns] = persons[useful_columns] # if DUMP: # tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label, # transpose=False, slicer='NONE') if trace_hh_id: tracing.trace_df(individual_vars, '%s.individual_vars' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(indiv_utils, '%s.indiv_utils' % trace_label, column_labels=['activity', 'person']) return indiv_utils
def persons(households, trace_hh_id): df = read_raw_persons(households) logger.info("loaded persons %s" % (df.shape, )) # replace table function with dataframe inject.add_table('persons', df) pipeline.get_rn_generator().add_channel('persons', df) tracing.register_traceable_table('persons', df) if trace_hh_id: tracing.trace_df(df, "raw.persons", warn_if_empty=True) logger.debug( f"{len(df.household_id.unique())} unique household_ids in persons") logger.debug( f"{len(households.index.unique())} unique household_ids in households") assert not households.index.duplicated().any() assert not df.index.duplicated().any() persons_without_households = ~df.household_id.isin(households.index) if persons_without_households.any(): logger.error( f"{persons_without_households.sum()} persons out of {len(persons)} without households\n" f"{pd.Series({'person_id': persons_without_households.index.values})}" ) raise RuntimeError( f"{persons_without_households.sum()} persons with bad household_id" ) households_without_persons = df.groupby('household_id').size().reindex( households.index).isnull() if households_without_persons.any(): logger.error( f"{households_without_persons.sum()} households out of {len(households.index)} without persons\n" f"{pd.Series({'household_id': households_without_persons.index.values})}" ) raise RuntimeError( f"{households_without_persons.sum()} households with no persons") return df
def non_mandatory_tour_destination(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings = config.read_model_settings( 'non_mandatory_tour_destination.yaml') tours = tours.to_frame() persons_merged = persons_merged.to_frame() # choosers are tours - in a sense tours are choosing their destination non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return choices = tour_destination.run_tour_destination(tours, persons_merged, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) non_mandatory_tours['destination'] = choices assign_in_place(tours, non_mandatory_tours[['destination']]) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'non_mandatory'], label="non_mandatory_tour_destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def non_mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for non-mandatory tours """ model_name = 'non_mandatory_tour_scheduling' trace_label = model_name persons_merged = persons_merged.to_frame() tours = tours.to_frame() non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(model_name) return tour_segment_col = None choices = run_tour_scheduling(model_name, non_mandatory_tours, persons_merged, tdd_alts, tour_segment_col, chunk_size, trace_hh_id) assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, non_mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label=trace_label, slicer='person_id', index_label='tour_id', columns=None, warn_if_empty=True)
def persons(store, households_sample_size, households, trace_hh_id): df = store["persons"] if households_sample_size > 0: # keep all persons in the sampled households df = df[df.household_id.isin(households.index)] logger.info("loaded persons %s" % (df.shape, )) # replace table function with dataframe orca.add_table('persons', df) pipeline.get_rn_generator().add_channel(df, 'persons') if trace_hh_id: tracing.register_traceable_table('persons', df) tracing.trace_df(df, "persons", warn_if_empty=True) return df
def households(store, households_sample_size, trace_hh_id): df_full = store["households"] # if we are tracing hh exclusively if trace_hh_id and households_sample_size == 1: # df contains only trace_hh (or empty if not in full store) df = tracing.slice_ids(df_full, trace_hh_id) # if we need sample a subset of full store elif households_sample_size > 0 and df_full.shape[0] > households_sample_size: logger.info("sampling %s of %s households" % (households_sample_size, df_full.shape[0])) # take the requested random sample df = asim.random_rows(df_full, households_sample_size) # if tracing and we missed trace_hh in sample, but it is in full store if trace_hh_id and trace_hh_id not in df.index and trace_hh_id in df_full.index: # replace first hh in sample with trace_hh logger.debug("replacing household %s with %s in household sample" % (df.index[0], trace_hh_id)) df_hh = tracing.slice_ids(df_full, trace_hh_id) df = pd.concat([df_hh, df[1:]]) else: df = df_full logger.info("loaded households %s" % (df.shape,)) # replace table function with dataframe inject.add_table('households', df) pipeline.get_rn_generator().add_channel(df, 'households') if trace_hh_id: tracing.register_traceable_table('households', df) tracing.trace_df(df, "households", warn_if_empty=True) return df
def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ trace_label = 'auto_ownership_simulate' model_settings = config.read_model_settings('auto_ownership.yaml') logger.info("Running %s with %d households", trace_label, len(households_merged)) model_spec = simulate.read_model_spec(file_name='auto_ownership.csv') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate(choosers=households_merged.to_frame(), spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='auto_ownership') households = households.to_frame() # no need to reindex as we used all households households['auto_ownership'] = choices pipeline.replace_table("households", households) tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True) if trace_hh_id: tracing.trace_df(households, label='auto_ownership', warn_if_empty=True)
def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ trace_label = 'auto_ownership_simulate' model_settings = config.read_model_settings('auto_ownership.yaml') logger.info("Running %s with %d households", trace_label, len(households_merged)) model_spec = simulate.read_model_spec(file_name='auto_ownership.csv') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=households_merged.to_frame(), spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='auto_ownership') households = households.to_frame() # no need to reindex as we used all households households['auto_ownership'] = choices pipeline.replace_table("households", households) tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True) if trace_hh_id: tracing.trace_df(households, label='auto_ownership', warn_if_empty=True)
def create_non_mandatory_tours(trace_hh_id): """ We have now generated non-mandatory tours, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) """ persons = inject.get_table('persons') alts = inject.get_injectable('non_mandatory_tour_frequency_alts') non_mandatory_tours = process_non_mandatory_tours( persons.non_mandatory_tour_frequency.dropna(), alts ) tours = pipeline.extend_table("tours", non_mandatory_tours) tracing.register_traceable_table('tours', tours) pipeline.get_rn_generator().add_channel(non_mandatory_tours, 'tours') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_frequency.non_mandatory_tours", warn_if_empty=True)
def extend_tour_counts(persons, tour_counts, alternatives, trace_hh_id, trace_label): """ extend tour counts based on a probability table counts can only be extended if original count is between 1 and 4 and tours can only be extended if their count is at the max possible (e.g. 2 for escort, 1 otherwise) so escort might be increased to 3 or 4 and other tour types might be increased to 2 or 3 Parameters ---------- persons: pandas dataframe (need this for join columns) tour_counts: pandas dataframe one row per person, once column per tour_type alternatives alternatives from nmtv interaction_simulate only need this to know max possible frequency for a tour type trace_hh_id trace_label Returns ------- extended tour_counts tour_counts looks like this: escort shopping othmaint othdiscr eatout social parent_id 2588676 2 0 0 1 1 0 2588677 0 1 0 1 0 0 """ assert tour_counts.index.name == persons.index.name PROBABILITY_COLUMNS = ['0_tours', '1_tours', '2_tours'] JOIN_COLUMNS = ['ptype', 'has_mandatory_tour', 'has_joint_tour'] TOUR_TYPE_COL = 'nonmandatory_tour_type' probs_spec = extension_probs() persons = persons[JOIN_COLUMNS] # only extend if there are 1 - 4 non_mandatory tours to start with extend_tour_counts = tour_counts.sum(axis=1).between(1, 4) if not extend_tour_counts.any(): return tour_counts have_trace_targets = trace_hh_id and tracing.has_trace_targets(extend_tour_counts) for i, tour_type in enumerate(alternatives.columns): i_tour_type = i + 1 # (probs_spec nonmandatory_tour_type column is 1-based) tour_type_trace_label = tracing.extend_trace_label(trace_label, tour_type) # - only extend tour if frequency is max possible frequency for this tour type tour_type_is_maxed = \ extend_tour_counts & (tour_counts[tour_type] == alternatives[tour_type].max()) maxed_tour_count_idx = tour_counts.index[tour_type_is_maxed] if len(maxed_tour_count_idx) == 0: continue # - get extension probs for tour_type choosers = pd.merge( persons.loc[maxed_tour_count_idx], probs_spec[probs_spec[TOUR_TYPE_COL] == i_tour_type], on=JOIN_COLUMNS, how='left' ).set_index(maxed_tour_count_idx) assert choosers.index.name == tour_counts.index.name # - random choice of extension magnituce based on relative probs choices, rands = logit.make_choices( choosers[PROBABILITY_COLUMNS], trace_label=tour_type_trace_label, trace_choosers=choosers) # - extend tour_count (0-based prob alternative choice equals magnitude of extension) if choices.any(): tour_counts.loc[choices.index, tour_type] += choices if have_trace_targets: tracing.trace_df(choices, tracing.extend_trace_label(tour_type_trace_label, 'choices'), columns=[None, 'choice']) tracing.trace_df(rands, tracing.extend_trace_label(tour_type_trace_label, 'rands'), columns=[None, 'rand']) return tour_counts
def iterate_location_choice( model_settings, persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor, trace_label): """ iterate run_location_choice updating shadow pricing until convergence criteria satisfied or max_iterations reached. (If use_shadow_pricing not enabled, then just iterate once) Parameters ---------- model_settings : dict persons_merged : injected table persons : injected table skim_dict : skim.SkimDict skim_stack : skim.SkimStack chunk_size : int trace_hh_id : int locutor : bool whether this process is the privileged logger of shadow_pricing when multiprocessing trace_label : str Returns ------- adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] and annotations to persons table """ # column containing segment id chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME'] # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student) chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME'] persons_merged_df = persons_merged.to_frame() persons_merged_df = persons_merged_df[persons_merged[chooser_filter_column]] spc = shadow_pricing.load_shadow_price_calculator(model_settings) max_iterations = spc.max_iterations logging.debug("%s max_iterations: %s" % (trace_label, max_iterations)) choices = None for iteration in range(1, max_iterations + 1): if spc.use_shadow_pricing and iteration > 1: spc.update_shadow_prices() choices = run_location_choice( persons_merged_df, skim_dict, skim_stack, spc, model_settings, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration)) choices_df = choices.to_frame('dest_choice') choices_df['segment_id'] = \ persons_merged_df[chooser_segment_column].reindex(choices_df.index) spc.set_choices(choices_df) if locutor: spc.write_trace_files(iteration) if spc.use_shadow_pricing and spc.check_fit(iteration): logging.info("%s converged after iteration %s" % (trace_label, iteration,)) break # - shadow price table if locutor: if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings: inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices) if 'MODELED_SIZE_TABLE' in model_settings: inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size) dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME'] tracing.print_summary(dest_choice_column_name, choices, value_counts=True) persons_df = persons.to_frame() # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location NO_DEST_TAZ = -1 persons_df[dest_choice_column_name] = \ choices.reindex(persons_df.index).fillna(NO_DEST_TAZ).astype(int) # - annotate persons table if 'annotate_persons' in model_settings: expressions.assign_columns( df=persons_df, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons_df) if trace_hh_id: tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) # - annotate households table if 'annotate_households' in model_settings: households_df = households.to_frame() expressions.assign_columns( df=households_df, model_settings=model_settings.get('annotate_households'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_households')) pipeline.replace_table("households", households_df) if trace_hh_id: tracing.trace_df(households_df, label=trace_label, warn_if_empty=True) return persons_df
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d, trace_hh_id, trace_label): """ Generate the activity choices for the 'extra' household members who weren't handled by cdap Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate, simple cross-sectional distribution is looked up for the remaining household members" The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec, EXCEPT that the values computed are relative proportions, not utilities (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0) Parameters ---------- persons : pandas.DataFrame Table of persons data indexed on _persons_index_ We expect, at least, columns [_hh_id_, _ptype_] cdap_fixed_relative_proportions spec to compute/specify the relative proportions of each activity (M, N, H) that should be used to choose activities for additional household members not handled by CDAP. locals_d : Dict dictionary of local variables that eval_variables adds to the environment for an evaluation of an expression that begins with @ Returns ------- choices : pandas.Series list of alternatives chosen for all extra members, indexed by _persons_index_ """ trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices') # extra household members have cdap_ran > MAX_HHSIZE choosers = persons[persons['cdap_rank'] > MAX_HHSIZE] if len(choosers.index) == 0: return pd.Series() # eval the expression file values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d) # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities proportions = values.dot(cdap_fixed_relative_proportions) # convert relative proportions to probability probs = proportions.div(proportions.sum(axis=1), axis=0) # select an activity pattern alternative for each person based on probability # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented # as the integer (0 based) index of the chosen column from probs idx_choices, rands = logit.make_choices(probs, trace_label=trace_label) # convert choice from column index to activity name choices = pd.Series(probs.columns[idx_choices].values, index=probs.index) # if DUMP: # tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label, # transpose=False, slicer='NONE') # tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label, # transpose=False, slicer='NONE') # tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label, # transpose=False, # slicer='NONE') if trace_hh_id: tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label, columns=[None, 'rand']) return choices
def trip_mode_choice( trips, tours_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings = config.read_model_settings('trip_mode_choice.yaml') model_spec = \ simulate.read_model_spec(file_name=model_settings['SPEC']) omnibus_coefficients = \ assign.read_constant_spec(config.config_file_path(model_settings['COEFFS'])) trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) tours_merged = tours_merged.to_frame() tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']] nest_spec = config.get_logit_model_settings(model_settings) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge( trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart) orig_col = 'origin' dest_col = 'destination' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col, skim_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "od_skims": od_skim_wrapper, } constants = config.get_model_constants(model_settings) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col }) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'): segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose], constants=constants) locals_dict.update(constants) annotate_preprocessors( trips_segment, locals_dict, skims, model_settings, segment_trace_label) locals_dict.update(skims) choices = simulate.simple_simulate( choosers=trips_segment, spec=model_spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, trace_label=segment_trace_label, trace_choice_name='trip_mode_choice') alts = model_spec.columns choices = choices.map(dict(list(zip(list(range(len(alts))), alts)))) # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose, # choices, value_counts=True) if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label(segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations trips_segment['trip_mode'] = choices tracing.trace_df(trips_segment, label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices = pd.concat(choices_list) trips_df = trips.to_frame() trips_df['trip_mode'] = choices tracing.print_summary('tour_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', choices, value_counts=True) assert not trips_df.trip_mode.isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label(trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize, trace_hh_id=None, trace_label=None): """ Calculate household utilities for each activity pattern alternative for households of hhsize The resulting activity pattern for each household will be coded as a string of activity codes. e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home Parameters ---------- indiv_utils : pandas.DataFrame CDAP utilities for each individual, ignoring interactions ind_utils has index of _persons_index_ and a column for each alternative i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home) interaction_coefficients : pandas.DataFrame Rules and coefficients for generating interaction specs for different household sizes hhsize : int the size of household for which activity perttern should be calculated (1..MAX_HHSIZE) Returns ------- choices : pandas.Series the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH') with same index (_hh_index_) as utils """ if hhsize == 1: # for 1 person households, there are no interactions to account for # and the household utils are the same as the individual utils choosers = vars = None # extract the individual utilities for individuals from hhsize 1 households utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']] # index on household_id, not person_id set_hh_index(utils) else: choosers = hh_choosers(indiv_utils, hhsize=hhsize) spec = build_cdap_spec(interaction_coefficients, hhsize, trace_spec=(trace_hh_id in choosers.index), trace_label=trace_label) utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label) if len(utils.index) == 0: return pd.Series() probs = logit.utils_to_probs(utils, trace_label=trace_label) # select an activity pattern alternative for each household based on probability # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs idx_choices, rands = logit.make_choices(probs, trace_label=trace_label) # convert choice expressed as index into alternative name from util column label choices = pd.Series(utils.columns[idx_choices].values, index=utils.index) if trace_hh_id: if hhsize > 1: tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize), column_labels=['expression', 'person']) tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize), columns=[None, 'rand']) return choices
def assign_cdap_rank(persons, trace_hh_id=None, trace_label=None): """ Assign an integer index, cdap_rank, to each household member. (Starting with 1, not 0) Modifies persons df in place The cdap_rank order is important, because cdap only assigns activities to the first MAX_HHSIZE persons in each household. This will preferentially be two working adults and the three youngest children. Rank is assigned starting at 1. This necessitates some care indexing, but is preferred as it follows the convention of 1-based pnums in expression files. According to the documentation of reOrderPersonsForCdap in mtctm2.abm.ctramp HouseholdCoordinatedDailyActivityPatternModel: "Method reorders the persons in the household for use with the CDAP model, which only explicitly models the interaction of five persons in a HH. Priority in the reordering is first given to full time workers (up to two), then to part time workers (up to two workers, of any type), then to children (youngest to oldest, up to three). If the method is called for a household with less than 5 people, the cdapPersonArray is the same as the person array." We diverge from the above description in that a cdap_rank is assigned to all persons, including 'extra' household members, whose activity is assigned subsequently. The pair _hh_id_, cdap_rank will uniquely identify each household member. Parameters ---------- persons : pandas.DataFrame Table of persons data. Must contain columns _hh_size_, _hh_id_, _ptype_, _age_ Returns ------- cdap_rank : pandas.Series integer cdap_rank of every person, indexed on _persons_index_ """ # transient categories used to categorize persons in cdap_rank before assigning final rank RANK_WORKER = 1 RANK_CHILD = 2 RANK_BACKFILL = 3 RANK_UNASSIGNED = 9 persons['cdap_rank'] = RANK_UNASSIGNED # choose up to 2 workers, preferring full over part, older over younger workers = \ persons.loc[persons[_ptype_].isin(WORKER_PTYPES), [_hh_id_, _ptype_]]\ .sort_values(by=[_hh_id_, _ptype_], ascending=[True, True])\ .groupby(_hh_id_).head(2) # tag the selected workers persons.loc[workers.index, 'cdap_rank'] = RANK_WORKER del workers # choose up to 3, preferring youngest children = \ persons.loc[persons[_ptype_].isin(CHILD_PTYPES), [_hh_id_, _ptype_, _age_]]\ .sort_values(by=[_hh_id_, _ptype_], ascending=[True, True])\ .groupby(_hh_id_).head(3) # tag the selected children persons.loc[children.index, 'cdap_rank'] = RANK_CHILD del children # choose up to MAX_HHSIZE, preferring anyone already chosen # others = \ # persons[[_hh_id_, 'cdap_rank']]\ # .sort_values(by=[_hh_id_, 'cdap_rank'], ascending=[True, True])\ # .groupby(_hh_id_).head(MAX_HHSIZE) # choose up to MAX_HHSIZE, choosing randomly others = persons[[_hh_id_, 'cdap_rank']].copy() others['random_order'] = pipeline.get_rn_generator().random_for_df(persons) others = \ others\ .sort_values(by=[_hh_id_, 'random_order'], ascending=[True, True])\ .groupby(_hh_id_).head(MAX_HHSIZE) # tag the backfilled persons persons.loc[others[others.cdap_rank == RANK_UNASSIGNED].index, 'cdap_rank'] \ = RANK_BACKFILL del others # assign person number in cdapPersonArray preference order # i.e. convert cdap_rank from category to index in order of category rank within household # groupby rank() is slow, so we compute rank artisanally # save time by sorting only the columns we need (persons is big, and sort moves data) p = persons[[_hh_id_, 'cdap_rank', _age_]]\ .sort_values(by=[_hh_id_, 'cdap_rank', _age_], ascending=[True, True, True]) rank = p.groupby(_hh_id_).size().map(range) rank = [item+1 for sublist in rank for item in sublist] p['cdap_rank'] = rank persons['cdap_rank'] = p['cdap_rank'] # assignment aligns on index values # if DUMP: # tracing.trace_df(persons, '%s.DUMP.cdap_person_array' % trace_label, # transpose=False, slicer='NONE') if trace_hh_id: tracing.trace_df(persons, '%s.cdap_rank' % trace_label) return persons['cdap_rank']
def cdap_simulate(persons_merged, persons, households, cdap_indiv_spec, cdap_interaction_coefficients, cdap_fixed_relative_proportions, chunk_size, trace_hh_id): """ CDAP stands for Coordinated Daily Activity Pattern, which is a choice of high-level activity pattern for each person, in a coordinated way with other members of a person's household. Because Python requires vectorization of computation, there are some specialized routines in the cdap directory of activitysim for this purpose. This module simply applies those utilities using the simulation framework. """ trace_label = 'cdap' model_settings = config.read_model_settings('cdap.yaml') persons_merged = persons_merged.to_frame() constants = config.get_model_constants(model_settings) cdap_interaction_coefficients = \ cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients) # specs are built just-in-time on demand and cached as injectables # prebuilding here allows us to write them to the output directory # (also when multiprocessing locutor might not see all household sizes) logger.info("Pre-building cdap specs") for hhsize in range(2, cdap.MAX_HHSIZE + 1): spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True) if inject.get_injectable('locutor', False): spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True) logger.info("Running cdap_simulate with %d persons", len(persons_merged.index)) choices = cdap.run_cdap( persons=persons_merged, cdap_indiv_spec=cdap_indiv_spec, cdap_interaction_coefficients=cdap_interaction_coefficients, cdap_fixed_relative_proportions=cdap_fixed_relative_proportions, locals_d=constants, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label) # - assign results to persons table and annotate persons = persons.to_frame() choices = choices.reindex(persons.index) persons['cdap_activity'] = choices.cdap_activity persons['cdap_rank'] = choices.cdap_rank expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) # - annotate households table households = households.to_frame() expressions.assign_columns( df=households, model_settings=model_settings.get('annotate_households'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_households')) pipeline.replace_table("households", households) tracing.print_summary('cdap_activity', persons.cdap_activity, value_counts=True) logger.info("cdap crosstabs:\n%s" % pd.crosstab(persons.ptype, persons.cdap_activity, margins=True)) if trace_hh_id: tracing.trace_df(inject.get_table('persons_merged').to_frame(), label="cdap", columns=['ptype', 'cdap_rank', 'cdap_activity'], warn_if_empty=True)
def best_transit_path(set_random_seed, network_los, best_transit_path_spec): model_settings = config.read_model_settings('best_transit_path.yaml') logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE) omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE) od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod}) trace_od = (od_df.omaz[0], od_df.dmaz[0]) logger.info("trace_od omaz %s dmaz %s" % trace_od) # build exploded atap_btap_df # FIXME - pathological knowledge about mode - should be parameterized # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap atap_btap_df = network_los.get_tappairs_mazpairs(od_df.omaz, od_df.dmaz, ofilter='drive_time', dfilter='walk_alightingActual') # add in tod column atap_btap_df = atap_btap_df.merge( right=od_df[['tod']], left_on='idx', right_index=True, how='left' ) logger.info("len od_df %s", len(od_df.index)) logger.info("len atap_btap_df %s", len(atap_btap_df.index)) logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index)))) if trace_od: trace_orig, trace_dest = trace_od trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest) else: trace_oabd_rows = None constants = config.get_model_constants(model_settings) locals_d = { 'np': np, 'network_los': network_los } if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d, trace_rows=trace_oabd_rows) # copy results for column in results.columns: atap_btap_df[column] = results[column] # drop rows if no utility n = len(atap_btap_df.index) atap_btap_df = atap_btap_df.dropna(subset=['utility']) logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n) # choose max utility atap_btap_df = atap_btap_df.sort_values(by='utility').groupby('idx').tail(1) if trace_od: if not trace_oabd_rows.any(): logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest) else: tracing.trace_df(atap_btap_df, label='best_transit_path', slicer='NONE', transpose=False) tracing.trace_df(trace_results, label='trace_best_transit_path', slicer='NONE', transpose=False) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
def atwork_subtour_scheduling( tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings = config.read_model_settings('tour_scheduling_atwork.yaml') model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv') persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') do_skim_wrapper = skim_dict.wrap('destination', 'origin') skims = { "od_skims": od_skim_wrapper, "do_skims": do_skim_wrapper, } annotate_preprocessors( subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(int).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) tdd_choices = vectorize_subtour_scheduling( parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, chunk_size=chunk_size, trace_label=trace_label) assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df(DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def compute_accessibility(accessibility, skim_dict, land_use, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ trace_label = 'compute_accessibility' model_settings = config.read_model_settings('accessibility.yaml') assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv')) accessibility_df = accessibility.to_frame() logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df))) constants = config.get_model_constants(model_settings) land_use_columns = model_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() # #bug # # land_use_df = land_use_df[land_use_df.index % 2 == 1] # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5) # # print "land_use_df", land_use_df.index # print "accessibility_df", accessibility_df.index # #bug orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values orig_zone_count = len(orig_zones) dest_zone_count = len(dest_zones) logger.info("Running %s with %d dest zones %d orig zones" % (trace_label, dest_zone_count, orig_zone_count)) # create OD dataframe od_df = pd.DataFrame( data={ 'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count), 'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count) } ) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None # merge land_use_columns into od_df land_use_df = land_use_df[land_use_columns] od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index() locals_d = { 'log': np.log, 'exp': np.exp, 'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones), 'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True) } if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows) for column in results.columns: data = np.asanyarray(results[column]) data.shape = (orig_zone_count, dest_zone_count) accessibility_df[column] = np.log(np.sum(data, axis=1) + 1) # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df) if trace_od: if not trace_od_rows.any(): logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest)) else: # add OD columns to trace results df = pd.concat([od_df[trace_od_rows], trace_results], axis=1) # dump the trace results table (with _temp variables) to aid debugging tracing.trace_df(df, label='accessibility', index_label='skim_offset', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def households(households_sample_size, override_hh_ids, trace_hh_id): df_full = read_input_table("households") households_sliced = False logger.info("full household list contains %s households" % df_full.shape[0]) # only using households listed in override_hh_ids if override_hh_ids is not None: # trace_hh_id will not used if it is not in list of override_hh_ids logger.info("override household list containing %s households" % len(override_hh_ids)) df = df_full[df_full.index.isin(override_hh_ids)] households_sliced = True if df.shape[0] < len(override_hh_ids): logger.info("found %s of %s households in override household list" % (df.shape[0], len(override_hh_ids))) if df.shape[0] == 0: raise RuntimeError('No override households found in store') # if we are tracing hh exclusively elif trace_hh_id and households_sample_size == 1: # df contains only trace_hh (or empty if not in full store) df = tracing.slice_ids(df_full, trace_hh_id) households_sliced = True # if we need a subset of full store elif households_sample_size > 0 and df_full.shape[0] > households_sample_size: logger.info("sampling %s of %s households" % (households_sample_size, df_full.shape[0])) """ Because random seed is set differently for each step, sampling of households using Random.global_rng would sample differently depending upon which step it was called from. We use a one-off rng seeded with the pseudo step name 'sample_households' to provide repeatable sampling no matter when the table is loaded. Note that the external_rng is also seeded with base_seed so the sample will (rightly) change if the pipeline rng's base_seed is changed """ prng = pipeline.get_rn_generator().get_external_rng('sample_households') df = df_full.take(prng.choice(len(df_full), size=households_sample_size, replace=False)) households_sliced = True # if tracing and we missed trace_hh in sample, but it is in full store if trace_hh_id and trace_hh_id not in df.index and trace_hh_id in df_full.index: # replace first hh in sample with trace_hh logger.debug("replacing household %s with %s in household sample" % (df.index[0], trace_hh_id)) df_hh = df_full.loc[[trace_hh_id]] df = pd.concat([df_hh, df[1:]]) else: df = df_full # persons table inject.add_injectable('households_sliced', households_sliced) logger.info("loaded households %s" % (df.shape,)) df.index.name = 'household_id' # FIXME - pathological knowledge of name of chunk_id column used by chunked_choosers_by_chunk_id assert 'chunk_id' not in df.columns df['chunk_id'] = pd.Series(list(range(len(df))), df.index) # replace table function with dataframe inject.add_table('households', df) pipeline.get_rn_generator().add_channel('households', df) if trace_hh_id: tracing.register_traceable_table('households', df) tracing.trace_df(df, "raw.households", warn_if_empty=True) return df
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() persons_merged = persons_merged.to_frame() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) # setup skim keys orig_col_name = 'TAZ' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices_list = [] for tour_type, segment in primary_tours_merged.groupby('tour_type'): logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (tour_type, len(segment.index), )) # name index so tracing knows how to slice assert segment.index.name == 'tour_id' choices = run_tour_mode_choice_simulate( segment, spec, tour_type, model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, tour_type), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type, choices, value_counts=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices = pd.concat(choices_list) tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices, value_counts=True) # so we can trace with annotations primary_tours['tour_mode'] = choices # but only keep mode choice col all_tours = tours.to_frame() # uncomment to save annotations to table # assign_in_place(all_tours, annotations) assign_in_place(all_tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label(trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True)
def atwork_subtour_mode_choice( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) # setup skim keys orig_col_name = 'workplace_taz' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices = run_tour_mode_choice_simulate( subtours_merged, spec, tour_purpose='atwork', model_settings=model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') tracing.print_summary('%s choices' % trace_label, choices, value_counts=True) assign_in_place(tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label(trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id') force_garbage_collect()
def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ trace_label = 'non_mandatory_tour_frequency' model_settings = config.read_model_settings('non_mandatory_tour_frequency.yaml') model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('non_mandatory_tour_frequency_alternatives.csv'), set_index=None) choosers = persons_merged.to_frame() # FIXME kind of tacky both that we know to add this here and del it below # 'tot_tours' is used in model_spec expressions alternatives['tot_tours'] = alternatives.sum(axis=1) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_max_window': person_max_window } expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # filter based on results of CDAP choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers)) constants = config.get_model_constants(model_settings) choices_list = [] # segment by person type and pick the right spec for each person type for ptype, segment in choosers.groupby('ptype'): name = PTYPE_NAME[ptype] # pick the spec column for the segment spec = model_spec[[name]] # drop any zero-valued rows spec = spec[spec[name] != 0] logger.info("Running segment '%s' of size %d", name, len(segment)) choices = interaction_simulate( segment, alternatives, spec=spec, locals_d=constants, chunk_size=chunk_size, trace_label='non_mandatory_tour_frequency.%s' % name, trace_choice_name='non_mandatory_tour_frequency') choices_list.append(choices) # FIXME - force garbage collection? # force_garbage_collect() choices = pd.concat(choices_list) del alternatives['tot_tours'] # del tot_tours column we added above # - add non_mandatory_tour_frequency column to persons persons = persons.to_frame() # need to reindex as we only handled persons with cdap_activity in ['M', 'N'] # (we expect there to be an alt with no tours - which we can use to backfill non-travelers) no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] persons['non_mandatory_tour_frequency'] = \ choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8) """ We have now generated non-mandatory tours, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) """ # - get counts of each of the alternatives (so we can extend) # (choices is just the index values for the chosen alts) """ escort shopping othmaint othdiscr eatout social parent_id 2588676 2 0 0 1 1 0 2588677 0 1 0 1 0 0 """ tour_counts = alternatives.loc[choices] tour_counts.index = choices.index # assign person ids to the index prev_tour_count = tour_counts.sum().sum() # - extend_tour_counts tour_counts = extend_tour_counts(choosers, tour_counts, alternatives, trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts')) extended_tour_count = tour_counts.sum().sum() logging.info("extend_tour_counts increased nmtf tour count by %s from %s to %s" % (extended_tour_count - prev_tour_count, prev_tour_count, extended_tour_count)) # - create the non_mandatory tours non_mandatory_tours = process_non_mandatory_tours(persons, tour_counts) assert len(non_mandatory_tours) == extended_tour_count pipeline.extend_table("tours", non_mandatory_tours) tracing.register_traceable_table('tours', non_mandatory_tours) pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=trace_label) pipeline.replace_table("persons", persons) tracing.print_summary('non_mandatory_tour_frequency', persons.non_mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_frequency.non_mandatory_tours", warn_if_empty=True) tracing.trace_df(choosers, label="non_mandatory_tour_frequency.choosers", warn_if_empty=True) tracing.trace_df(persons, label="non_mandatory_tour_frequency.annotated_persons", warn_if_empty=True)
def build_cdap_spec(interaction_coefficients, hhsize, trace_spec=False, trace_label=None, cache=True): """ Build a spec file for computing utilities of alternative household member interaction patterns for households of specified size. We generate this spec automatically from a table of rules and coefficients because the interaction rules are fairly simple and can be expressed compactly whereas there is a lot of redundancy between the spec files for different household sizes, as well as in the vectorized expression of the interaction alternatives within the spec file itself interaction_coefficients has five columns: activity A single character activity type name (M, N, or H) interaction_ptypes List of ptypes in the interaction (in order of increasing ptype) or empty for wildcards (meaning that the interaction applies to all ptypes in that size hh) cardinality the number of persons in the interaction (e.g. 3 for a 3-way interaction) slug a human friendly efficient name so we can dump a readable spec trace file for debugging this slug is replaced with the numerical coefficient value after we dump the trace file coefficient The coefficient to apply for all hh interactions for this activity and set of ptypes The generated spec will have the eval expression in the index, and a utility column for each alternative (e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize 2) In order to be able to dump the spec in a human-friendly fashion to facilitate debugging the cdap_interaction_coefficients table, we first populate utility columns in the spec file with the coefficient slugs, dump the spec file, and then replace the slugs with coefficients. Parameters ---------- interaction_coefficients : pandas.DataFrame Rules and coefficients for generating interaction specs for different household sizes hhsize : int household size for which the spec should be built. Returns ------- spec: pandas.DataFrame """ t0 = tracing.print_elapsed_time() # if DUMP: # # dump the interaction_coefficients table because it has been preprocessed # tracing.trace_df(interaction_coefficients, # '%s.hhsize%d_interaction_coefficients' % (trace_label, hhsize), # transpose=False, slicer='NONE') # cdap spec is same for all households of MAX_HHSIZE and greater hhsize = min(hhsize, MAX_HHSIZE) if cache: spec = get_cached_spec(hhsize) if spec is not None: return spec expression_name = "Expression" # generate a list of activity pattern alternatives for this hhsize # e.g. ['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN'] for hhsize=2 alternatives = [''.join(tup) for tup in itertools.product('HMN', repeat=hhsize)] # spec df has expression column plus a column for each alternative spec = pd.DataFrame(columns=[expression_name] + alternatives) # Before processing the interaction_coefficients, we add add rows to the spec to carry # the alternative utilities previously computed for each individual into all hh alternative # columns in which the individual assigned that alternative. The Expression column contains # the name of the choosers column with that individuals utility for the individual alternative # and the hh alternative columns that should receive that utility are given a value of 1 # e.g. M_p1 is a column in choosers with the individual utility to person p1 of alternative M # Expression MM MN MH NM NN NH HM HN HH # M_p1 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 # N_p1 0.0 0.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 for pnum in range(1, hhsize+1): for activity in ['M', 'N', 'H']: new_row_index = len(spec) spec.loc[new_row_index, expression_name] = add_pn(activity, pnum) # list of alternative columns where person pnum has expression activity # e.g. for M_p1 we want the columns where activity M is in position p1 alternative_columns = [alt for alt in alternatives if alt[pnum - 1] == activity] spec.loc[new_row_index, alternative_columns] = 1 # ignore rows whose cardinality exceeds hhsize relevant_rows = interaction_coefficients.cardinality <= hhsize # for each row in the interaction_coefficients table for row in interaction_coefficients[relevant_rows].itertuples(): # if it is a wildcard all_people interaction if not row.interaction_ptypes: # wildcard interactions only apply if the interaction includes all household members # this will be the case if the cardinality of the wildcard equals the hhsize # conveniently, the slug is given the name of the alternative column (e.g. HHHH) # conveniently, for wildcards, the slug has been assigned the name of the alternative # (e.g. HHHH) that it applies to, since the interaction includes all household members # and there are no ptypes to append to it # FIXME - should we be doing this for greater than HH_MAXSIZE households? if row.slug in alternatives: spec.loc[len(spec), [expression_name, row.slug]] = ['1', row.slug] continue if not (0 <= row.cardinality <= MAX_INTERACTION_CARDINALITY): raise RuntimeError("Bad row cardinality %d for %s" % (row.cardinality, row.slug)) # for all other interaction rules, we need to generate a row in the spec for each # possible combination of interacting persons # e.g. for (1, 2), (1,3), (2,3) for a coefficient with cardinality 2 in hhsize 3 for tup in itertools.combinations(list(range(1, hhsize+1)), row.cardinality): # determine the name of the chooser column with the ptypes for this interaction if row.cardinality == 1: interaction_column = "ptype_p%d" % tup[0] else: # column named (e.g.) p1_p3 for an interaction between p1 and p3 interaction_column = '_'.join(['p%s' % pnum for pnum in tup]) # build expression that evaluates True iff the interaction is between specified ptypes # (e.g.) p1_p3==13 for an interaction between p1 and p3 of ptypes 1 and 3 (or 3 and1 ) expression = "%s==%s" % (interaction_column, row.interaction_ptypes) # create list of columns with names matching activity for each of the persons in tup # e.g. ['MMM', 'MMN', 'MMH'] for an interaction between p1 and p3 with activity 'M' # alternative_columns = \ # filter(lambda alt: all([alt[p - 1] == row.activity for p in tup]), alternatives) alternative_columns = \ [alt for alt in alternatives if all([alt[p - 1] == row.activity for p in tup])] # a row for this interaction may already exist, # e.g. if there are rules for both HH13 and MM13, we don't need to add rows for both # since they are triggered by the same expressions (e.g. p1_p2==13, p1_p3=13,...) existing_row_index = (spec[expression_name] == expression) if (existing_row_index).any(): # if the rows exist, simply update the appropriate alternative columns in spec spec.loc[existing_row_index, alternative_columns] = row.slug spec.loc[existing_row_index, expression_name] = expression else: # otherwise, add a new row to spec new_row_index = len(spec) spec.loc[new_row_index, alternative_columns] = row.slug spec.loc[new_row_index, expression_name] = expression # eval expression goes in the index spec.set_index(expression_name, inplace=True) simulate.uniquify_spec_index(spec) if trace_spec: tracing.trace_df(spec, '%s.hhsize%d_spec' % (trace_label, hhsize), transpose=False, slicer='NONE') # replace slug with coefficient d = interaction_coefficients.set_index('slug')['coefficient'].to_dict() for c in spec.columns: spec[c] =\ spec[c].map(lambda x: d.get(x, x or 0.0)).fillna(0) if trace_spec: tracing.trace_df(spec, '%s.hhsize%d_spec_patched' % (trace_label, hhsize), transpose=False, slicer='NONE') if cache: cache_spec(hhsize, spec) t0 = tracing.print_elapsed_time("build_cdap_spec hh_size %s" % hhsize, t0) return spec
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' model_settings = config.read_model_settings('mandatory_tour_frequency.yaml') model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt') choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons", len(choosers)) # - if no mandatory tours if choosers.shape[0] == 0: add_null_results(trace_label, model_settings) return # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {} expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( model_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) # - create mandatory tours """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ choosers['mandatory_tour_frequency'] = choices mandatory_tours = process_mandatory_tours( persons=choosers, mandatory_tour_frequency_alts=alternatives ) tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', mandatory_tours) pipeline.get_rn_generator().add_channel('tours', mandatory_tours) # - annotate persons persons = inject.get_table('persons').to_frame() # need to reindex as we only handled persons with cdap_activity == 'M' persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True) tracing.trace_df(persons, label="mandatory_tour_frequency.persons", warn_if_empty=True)
def trip_purpose_and_destination( trips, tours_merged, chunk_size, trace_hh_id): trace_label = "trip_purpose_and_destination" model_settings = config.read_model_settings('trip_purpose_and_destination.yaml') MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() if trips_df.empty: logger.info("%s - no trips. Nothing to do." % trace_label) return # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates assert (MAX_ITERATIONS > 0) # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry if 'destination' in trips_df: if trips_df.failed.any(): logger.info('trip_destination has already been run. Rerunning failed trips') flag_failed_trip_leg_mates(trips_df, 'failed') trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)] logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0]) else: # no failed trips from prior run of trip_destination logger.info("%s - no failed trips from prior model run." % trace_label) del trips_df['failed'] pipeline.replace_table("trips", trips_df) return results = [] i = 0 RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed'] while True: i += 1 for c in RESULT_COLUMNS: if c in trips_df: del trips_df[c] trips_df = run_trip_purpose_and_destination( trips_df, tours_merged_df, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, "i%s" % i)) num_failed_trips = trips_df.failed.sum() # if there were no failed trips, we are done if num_failed_trips == 0: results.append(trips_df[RESULT_COLUMNS]) break logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i)) file_name = "%s_i%s_failed_trips" % (trace_label, i) logger.info("writing failed trips to %s" % file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) # if max iterations reached, add remaining trips to results and give up # note that we do this BEFORE failing leg_mates so resulting trip legs are complete if i >= MAX_ITERATIONS: logger.warning("%s too many iterations %s" % (trace_label, i)) results.append(trips_df[RESULT_COLUMNS]) break # otherwise, if any trips failed, then their leg-mates trips must also fail flag_failed_trip_leg_mates(trips_df, 'failed') # add the good trips to results results.append(trips_df[~trips_df.failed][RESULT_COLUMNS]) # and keep the failed ones to retry trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)] # - assign result columns to trips results = pd.concat(results) logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i)) trips_df = trips.to_frame() assign_in_place(trips_df, results) trips_df = cleanup_failed_trips(trips_df) pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def joint_tour_scheduling( tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each joint tour """ trace_label = 'joint_tour_scheduling' model_settings = config.read_model_settings('joint_tour_scheduling.yaml') model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv') tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results(trace_label) return # use inject.get_table as this won't exist if there are no joint_tours joint_tour_participants = inject.get_table('joint_tour_participants').to_frame() persons_merged = persons_merged.to_frame() logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0]) # it may seem peculiar that we are concerned with persons rather than households # but every joint tour is (somewhat arbitrarily) assigned a "primary person" # some of whose characteristics are used in the spec # and we get household attributes along with person attributes in persons_merged persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0] # since a households joint tours each potentially different participants # they may also have different joint tour masks (free time of all participants) # so we have to either chunk processing by joint_tour_num and build timetable by household # or build timetables by unique joint_tour constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=joint_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) tdd_choices, timetable = vectorize_joint_tour_scheduling( joint_tours, joint_tour_participants, persons_merged, tdd_alts, spec=model_spec, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) timetable.replace_table() assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) # updated df for tracing joint_tours = tours[tours.tour_category == 'joint'] if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_scheduling", slicer='household_id')
def stop_frequency( tours, tours_merged, stop_frequency_alts, skim_dict, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings = config.read_model_settings('stop_frequency.yaml') tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper } if constants is not None: locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) choices_list = [] for segment_type, choosers in tours_merged.groupby('primary_purpose'): logging.info("%s running segment %s with %s chooser rows" % (trace_label, segment_type, choosers.shape[0])) spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type) assert spec is not None, "spec for segment_type %s not found" % segment_type choices = simulate.simple_simulate( choosers=choosers, spec=spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_type), trace_choice_name='stops') # convert indexes to alternative names choices = pd.Series(spec.columns[choices.values], index=choices.index) choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) if 'primary_purpose' not in tours.columns: assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def school_location_simulate(persons_merged, school_location_sample, school_location_spec, school_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ School location model on school_location_sample annotated with mode_choice logsum to select a school_taz from sample alternatives """ choosers = persons_merged.to_frame() school_location_sample = school_location_sample.to_frame() destination_size_terms = destination_size_terms.to_frame() trace_label = 'school_location_simulate' alt_col_name = school_location_settings["ALT_COL_NAME"] constants = config.get_model_constants(school_location_settings) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap("TAZ", alt_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = school_location_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] tracing.dump_df(DUMP, choosers, 'school_location_simulate', 'choosers') choices_list = [] for school_type in ['university', 'highschool', 'gradeschool']: locals_d['segment'] = school_type choosers_segment = choosers[choosers["is_" + school_type]] alts_segment = school_location_sample[ school_location_sample['school_type'] == school_type] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list alts_segment = \ pd.merge(alts_segment, destination_size_terms, left_on=alt_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alts_segment, trace_label, '%s_alternatives' % school_type) choices = interaction_sample_simulate( choosers_segment, alts_segment, spec=school_location_spec[[school_type]], choice_column=alt_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, school_type), trace_choice_name='school_location') choices_list.append(choices) choices = pd.concat(choices_list) # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location choices = choices.reindex(persons_merged.index).fillna(-1).astype(int) tracing.dump_df(DUMP, choices, trace_label, 'choices') tracing.print_summary('school_taz', choices, describe=True) inject.add_column("persons", "school_taz", choices) pipeline.add_dependent_columns("persons", "persons_school") if trace_hh_id: trace_columns = ['school_taz' ] + inject.get_table('persons_school').columns tracing.trace_df(inject.get_table('persons_merged').to_frame(), label="school_location", columns=trace_columns, warn_if_empty=True)
def joint_tour_composition( tours, households, persons, chunk_size, trace_hh_id): """ This model predicts the makeup of the travel party (adults, children, or mixed). """ trace_label = 'joint_tour_composition' model_settings = config.read_model_settings('joint_tour_composition.yaml') model_spec = simulate.read_model_spec(file_name='joint_tour_composition.csv') tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(trace_label, tours) return # - only interested in households with joint_tours households = households.to_frame() households = households[households.num_hh_joint_tours > 0] persons = persons.to_frame() persons = persons[persons.household_id.isin(households.index)] logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0]) # - run preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns( df=households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) joint_tours_merged = pd.merge(joint_tours, households, left_on='household_id', right_index=True, how='left') # - simple_simulate nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=joint_tours_merged, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='composition') # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) # add composition column to tours for tracing joint_tours['composition'] = choices # reindex since we ran model on a subset of households tours['composition'] = choices.reindex(tours.index).fillna('').astype(str) pipeline.replace_table("tours", tours) tracing.print_summary('joint_tour_composition', joint_tours.composition, value_counts=True) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_composition.joint_tours", slicer='household_id')