def atwork_subtour_destination_logsums( persons_merged, destination_sample, skim_dict, skim_stack, chunk_size, trace_hh_id): """ add logsum column to existing atwork_subtour_destination_sample table logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in atwork_subtour_destination_sample, and computing the logsum of all the utilities +-----------+--------------+----------------+------------+----------------+ | person_id | dest_TAZ | rand | pick_count | logsum (added) | +===========+==============+================+============+================+ | 23750 | 14 | 0.565502716034 | 4 | 1.85659498857 | +-----------+--------------+----------------+------------+----------------+ + 23750 | 16 | 0.711135838871 | 6 | 1.92315598631 | +-----------+--------------+----------------+------------+----------------+ + ... | | | | | +-----------+--------------+----------------+------------+----------------+ | 23751 | 12 | 0.408038878552 | 1 | 2.40612135416 | +-----------+--------------+----------------+------------+----------------+ | 23751 | 14 | 0.972732479292 | 2 | 1.44009018355 | +-----------+--------------+----------------+------------+----------------+ """ trace_label = 'atwork_subtour_destination_logsums' model_settings = config.read_model_settings('atwork_subtour_destination.yaml') logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) # FIXME - MEMORY HACK - only include columns actually used in spec persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings) # merge persons into tours choosers = pd.merge(destination_sample, persons_merged, left_on='person_id', right_index=True, how="left") logger.info("Running %s with %s rows", trace_label, len(choosers)) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') tour_purpose = 'atwork' logsums = logsum.compute_logsums( choosers, tour_purpose, logsum_settings, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) destination_sample['mode_choice_logsum'] = logsums return destination_sample
def run_location_logsums( segment_name, persons_merged_df, skim_dict, skim_stack, location_sample_df, model_settings, chunk_size, trace_hh_id, trace_label): """ add logsum column to existing location_sample table logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in location_sample, and computing the logsum of all the utilities +-----------+--------------+----------------+------------+----------------+ | PERID | dest_TAZ | rand | pick_count | logsum (added) | +===========+==============+================+============+================+ | 23750 | 14 | 0.565502716034 | 4 | 1.85659498857 | +-----------+--------------+----------------+------------+----------------+ + 23750 | 16 | 0.711135838871 | 6 | 1.92315598631 | +-----------+--------------+----------------+------------+----------------+ + ... | | | | | +-----------+--------------+----------------+------------+----------------+ | 23751 | 12 | 0.408038878552 | 1 | 2.40612135416 | +-----------+--------------+----------------+------------+----------------+ | 23751 | 14 | 0.972732479292 | 2 | 1.44009018355 | +-----------+--------------+----------------+------------+----------------+ """ assert not location_sample_df.empty logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) # FIXME - MEMORY HACK - only include columns actually used in spec persons_merged_df = \ logsum.filter_chooser_columns(persons_merged_df, logsum_settings, model_settings) logger.info("Running %s with %s rows" % (trace_label, len(location_sample_df.index))) choosers = location_sample_df.join(persons_merged_df, how='left') tour_purpose = model_settings['LOGSUM_TOUR_PURPOSE'] if isinstance(tour_purpose, dict): tour_purpose = tour_purpose[segment_name] logsums = logsum.compute_logsums( choosers, tour_purpose, logsum_settings, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) # "add_column series should have an index matching the table to which it is being added" # when the index has duplicates, however, in the special case that the series index exactly # matches the table index, then the series value order is preserved # logsums now does, since workplace_location_sample was on left side of merge de-dup merge location_sample_df['mode_choice_logsum'] = logsums return location_sample_df
def free_parking( persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor): """ """ trace_label = 'free_parking' model_settings = config.read_model_settings('free_parking.yaml') choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_taz > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name='free_parking.csv') nest_spec = config.get_logit_model_settings(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='free_parking_at_work') persons = persons.to_frame() # no need to reindex as we used all households free_parking_alt = model_settings['FREE_PARKING_ALT'] choices = (choices == free_parking_alt) persons['free_parking_at_work'] = choices.reindex(persons.index).fillna(0).astype(bool) pipeline.replace_table("persons", persons) tracing.print_summary('free_parking', persons.free_parking_at_work, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def atwork_subtour_destination_sample( tours, persons_merged, skim_dict, destination_size_terms, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_location_sample' model_settings = config.read_model_settings('atwork_subtour_destination.yaml') model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination_sample.csv') # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] constants = config.get_model_constants(model_settings) sample_size = model_settings["SAMPLE_SIZE"] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] logger.info("Running atwork_subtour_location_sample with %d tours", len(choosers)) # create wrapper with keys for this lookup - in this case there is a workplace_taz # in the choosers and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap('workplace_taz', 'TAZ') locals_d = { 'skims': skims } if constants is not None: locals_d.update(constants) choices = interaction_sample( choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps choices['person_id'] = choosers.person_id return choices
def non_mandatory_tour_destination( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings = config.read_model_settings('non_mandatory_tour_destination.yaml') tours = tours.to_frame() persons_merged = persons_merged.to_frame() # choosers are tours - in a sense tours are choosing their destination non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return choices = tour_destination.run_tour_destination( tours, persons_merged, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) non_mandatory_tours['destination'] = choices assign_in_place(tours, non_mandatory_tours[['destination']]) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'non_mandatory'], label="non_mandatory_tour_destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def trip_destination( trips, tours_merged, chunk_size, trace_hh_id): """ Choose a destination for all 'intermediate' trips based on trip purpose. Final trips already have a destination (the primary tour destination for outbound trips, and home for inbound trips.) """ trace_label = 'trip_destination' model_settings = config.read_model_settings('trip_destination.yaml') CLEANUP = model_settings.get('CLEANUP', True) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) trips_df = run_trip_destination( trips_df, tours_merged_df, chunk_size, trace_hh_id, trace_label) if trips_df.failed.any(): logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum()) file_name = "%s_failed_trips" % trace_label logger.info("writing failed trips to %s", file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) if CLEANUP: trips_df = cleanup_failed_trips(trips_df) elif trips_df.failed.any(): logger.warning("%s keeping %s sidelined failed trips" % (trace_label, trips_df.failed.sum())) pipeline.replace_table("trips", trips_df) print("trips_df\n", trips_df.shape) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def initialize_households(): trace_label = 'initialize_households' model_settings = config.read_model_settings('initialize_households.yaml', mandatory=True) annotate_tables(model_settings, trace_label) # - initialize shadow_pricing size tables after annotating household and person tables # since these are scaled to model size, they have to be created while single-process shadow_pricing.add_size_tables() # - preload person_windows t0 = tracing.print_elapsed_time() inject.get_table('person_windows').to_frame() t0 = tracing.print_elapsed_time("preload person_windows", t0, debug=True)
def initialize_landuse(): trace_label = 'initialize_landuse' model_settings = config.read_model_settings('initialize_landuse.yaml', mandatory=True) annotate_tables(model_settings, trace_label) # create accessibility land_use = pipeline.get_table('land_use') accessibility_df = pd.DataFrame(index=land_use.index) # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df)
def workplace_location( persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor): """ workplace location choice model iterate_location_choice adds location choice column and annotations to persons table """ trace_label = 'workplace_location' model_settings = config.read_model_settings('workplace_location.yaml') iterate_location_choice( model_settings, persons_merged, persons, households, skim_dict, skim_stack, chunk_size, trace_hh_id, locutor, trace_label )
def get_shadow_pricing_info(): """ return dict with info about dtype and shapes of desired and modeled size tables block shape is (num_zones, num_segments + 1) Returns ------- shadow_pricing_info: dict dtype: <sp_dtype>, block_shapes: dict {<model_selector>: <block_shape>} """ land_use = inject.get_table('land_use') size_terms = inject.get_injectable('size_terms') shadow_settings = config.read_model_settings('shadow_pricing.yaml') # shadow_pricing_models is dict of {<model_selector>: <model_name>} shadow_pricing_models = shadow_settings['shadow_pricing_models'] blocks = OrderedDict() for model_selector in shadow_pricing_models: sp_rows = len(land_use) sp_cols = len(size_terms[size_terms.model_selector == model_selector]) # extra tally column for TALLY_CHECKIN and TALLY_CHECKOUT semaphores blocks[block_name(model_selector)] = (sp_rows, sp_cols + 1) sp_dtype = np.int64 shadow_pricing_info = { 'dtype': sp_dtype, 'block_shapes': blocks, } for k in shadow_pricing_info: logger.debug("shadow_pricing_info %s: %s" % (k, shadow_pricing_info.get(k))) return shadow_pricing_info
def run_destination_logsums( tour_purpose, persons_merged, destination_sample, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label): """ add logsum column to existing tour_destination_sample table logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in destination_sample, and computing the logsum of all the utilities """ logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) # FIXME - MEMORY HACK - only include columns actually used in spec persons_merged = logsum.filter_chooser_columns(persons_merged, logsum_settings, model_settings) # merge persons into tours choosers = pd.merge(destination_sample, persons_merged, left_on='person_id', right_index=True, how="left") logger.info("Running %s with %s rows", trace_label, len(choosers)) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') logsums = logsum.compute_logsums( choosers, tour_purpose, logsum_settings, model_settings, skim_dict, skim_stack, chunk_size, trace_hh_id, trace_label) destination_sample['mode_choice_logsum'] = logsums return destination_sample
def auto_ownership_simulate(households, households_merged, chunk_size, trace_hh_id): """ Auto ownership is a standard model which predicts how many cars a household with given characteristics owns """ trace_label = 'auto_ownership_simulate' model_settings = config.read_model_settings('auto_ownership.yaml') logger.info("Running %s with %d households", trace_label, len(households_merged)) model_spec = simulate.read_model_spec(file_name='auto_ownership.csv') nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=households_merged.to_frame(), spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='auto_ownership') households = households.to_frame() # no need to reindex as we used all households households['auto_ownership'] = choices pipeline.replace_table("households", households) tracing.print_summary('auto_ownership', households.auto_ownership, value_counts=True) if trace_hh_id: tracing.trace_df(households, label='auto_ownership', warn_if_empty=True)
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id): trace_label = "trip_purpose_and_destination" model_settings = config.read_model_settings( 'trip_purpose_and_destination.yaml') MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() if trips_df.empty: logger.info("%s - no trips. Nothing to do." % trace_label) return # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates assert (MAX_ITERATIONS > 0) # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry if 'destination' in trips_df: if trips_df.failed.any(): logger.info( 'trip_destination has already been run. Rerunning failed trips' ) flag_failed_trip_leg_mates(trips_df, 'failed') trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin( trips_df.tour_id)] logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0]) else: # no failed trips from prior run of trip_destination logger.info("%s - no failed trips from prior model run." % trace_label) del trips_df['failed'] pipeline.replace_table("trips", trips_df) return results = [] i = 0 RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed'] while True: i += 1 for c in RESULT_COLUMNS: if c in trips_df: del trips_df[c] trips_df = run_trip_purpose_and_destination( trips_df, tours_merged_df, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, "i%s" % i)) num_failed_trips = trips_df.failed.sum() # if there were no failed trips, we are done if num_failed_trips == 0: results.append(trips_df[RESULT_COLUMNS]) break logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i)) file_name = "%s_i%s_failed_trips" % (trace_label, i) logger.info("writing failed trips to %s" % file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) # if max iterations reached, add remaining trips to results and give up # note that we do this BEFORE failing leg_mates so resulting trip legs are complete if i >= MAX_ITERATIONS: logger.warning("%s too many iterations %s" % (trace_label, i)) results.append(trips_df[RESULT_COLUMNS]) break # otherwise, if any trips failed, then their leg-mates trips must also fail flag_failed_trip_leg_mates(trips_df, 'failed') # add the good trips to results results.append(trips_df[~trips_df.failed][RESULT_COLUMNS]) # and keep the failed ones to retry trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin( trips_df.tour_id)] # - assign result columns to trips results = pd.concat(results) logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i)) trips_df = trips.to_frame() assign_in_place(trips_df, results) trips_df = cleanup_failed_trips(trips_df) pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def mandatory_tour_scheduling(tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for mandatory tours """ trace_label = 'mandatory_tour_scheduling' model_settings = config.read_model_settings( 'mandatory_tour_scheduling.yaml') logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) tours = tours.to_frame() mandatory_tours = tours[tours.tour_category == 'mandatory'] # - if no mandatory_tours if mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return persons_merged = persons_merged.to_frame() # - filter chooser columns for both logsums and simulate logsum_columns = logsum_settings.get('LOGSUM_CHOOSER_COLUMNS', []) model_columns = model_settings.get('SIMULATE_CHOOSER_COLUMNS', []) chooser_columns = logsum_columns + [ c for c in model_columns if c not in logsum_columns ] persons_merged = expressions.filter_chooser_columns( persons_merged, chooser_columns) # - add primary_purpose column # mtctm1 segments mandatory_scheduling spec by tour_type # (i.e. there are different specs for work and school tour_types) # mtctm1 logsum coefficients are segmented by primary_purpose # (i.e. there are different locsum coefficents for work, school, univ primary_purposes # for simplicity managing these different segmentation schemes, # we conflate them by segmenting the skims to align with primary_purpose segment_col = 'primary_purpose' if segment_col not in mandatory_tours: is_university_tour = \ (mandatory_tours.tour_type == 'school') & \ reindex(persons_merged.is_university, mandatory_tours.person_id) mandatory_tours['primary_purpose'] = \ mandatory_tours.tour_type.where(~is_university_tour, 'univ') # - spec dict segmented by primary_purpose specs = model_settings.get('SPEC', []) segment_specs = { segment: simulate.read_model_spec(file_name=spec) for segment, spec in specs.items() } logger.info("Running mandatory_tour_scheduling with %d tours", len(tours)) tdd_choices, timetable = vts.vectorize_tour_scheduling( mandatory_tours, persons_merged, tdd_alts, spec=segment_specs, segment_col=segment_col, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) timetable.replace_table() assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) # updated df for tracing mandatory_tours = tours[tours.tour_category == 'mandatory'] tracing.dump_df(DUMP, tt.tour_map(persons_merged, mandatory_tours, tdd_alts), trace_label, 'tour_map') if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_scheduling", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack, land_use, size_terms, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_destination' model_settings_file_name = 'atwork_subtour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) destination_column_name = 'destination' logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return estimator = estimation.manager.begin_estimation( 'atwork_subtour_destination') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) destination_size_terms = tour_destination_size_terms( land_use, size_terms, 'atwork') destination_sample_df = atwork_subtour_destination_sample( subtours, persons_merged, model_settings, skim_dict, destination_size_terms, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'sample')) destination_sample_df = atwork_subtour_destination_logsums( persons_merged, destination_sample_df, model_settings, skim_dict, skim_stack, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'logsums')) choices_df = atwork_subtour_destination_simulate( subtours, persons_merged, destination_sample_df, want_logsums, model_settings, skim_dict, destination_size_terms, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'simulate')) if estimator: estimator.write_choices(choices_df['choice']) choices_df['choice'] = estimator.get_survey_values( choices_df['choice'], 'tours', 'destination') estimator.write_override_choices(choices_df['choice']) estimator.end_estimation() subtours[destination_column_name] = choices_df['choice'] assign_in_place(tours, subtours[[destination_column_name]]) if want_logsums: subtours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, subtours[[logsum_column_name]]) pipeline.replace_table("tours", tours) if want_sample_table: # FIXME - sample_table assert len(destination_sample_df.index.unique()) == len(choices_df) destination_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) pipeline.extend_table(sample_table_name, destination_sample_df) tracing.print_summary(destination_column_name, subtours[destination_column_name], describe=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def __init__(self, model_settings, num_processes, shared_data=None, shared_data_lock=None): """ Presence of shared_data is used as a flag for multiprocessing If we are multiprocessing, shared_data should be a multiprocessing.RawArray buffer to aggregate modeled_size across all sub-processes, and shared_data_lock should be a multiprocessing.Lock object to coordinate access to that buffer. Optionally load saved shadow_prices from data_dir if config setting use_shadow_pricing and shadow_setting LOAD_SAVED_SHADOW_PRICES are both True Parameters ---------- model_settings : dict shared_data : multiprocessing.Array or None (if single process) shared_data_lock : numpy array wrapping multiprocessing.RawArray or None (if single process) """ self.num_processes = num_processes self.use_shadow_pricing = bool(config.setting('use_shadow_pricing')) self.saved_shadow_price_file_path = None # set by read_saved_shadow_prices if loaded self.model_selector = model_settings['MODEL_SELECTOR'] full_model_run = config.setting('households_sample_size') == 0 if self.use_shadow_pricing and not full_model_run: logger.warning( "deprecated combination of use_shadow_pricing and not full_model_run" ) if (self.num_processes > 1) and not config.setting('fail_fast'): # if we are multiprocessing, then fail_fast should be true or we will wait forever for failed processes logger.warning( "deprecated combination of multiprocessing and not fail_fast") raise RuntimeError( "Shadow pricing requires fail_fast setting in multiprocessing mode" ) self.segment_ids = model_settings['SEGMENT_IDS'] # - modeled_size (set by call to set_choices/synchronize_choices) self.modeled_size = None if self.use_shadow_pricing: self.shadow_settings = config.read_model_settings( 'shadow_pricing.yaml') for k in self.shadow_settings: logger.debug("shadow_settings %s: %s" % (k, self.shadow_settings.get(k))) # - destination_size_table (desired_size) self.desired_size = inject.get_table( size_table_name(self.model_selector)).to_frame() self.desired_size = self.desired_size.sort_index() assert self.desired_size.index.is_monotonic_increasing, \ f"{size_table_name(self.model_selector)} not is_monotonic_increasing" # - shared_data if shared_data is not None: assert shared_data.shape[0] == self.desired_size.shape[0] assert shared_data.shape[ 1] == self.desired_size.shape[1] + 1 # tally column assert shared_data_lock is not None self.shared_data = shared_data self.shared_data_lock = shared_data_lock # - load saved shadow_prices (if available) and set max_iterations accordingly if self.use_shadow_pricing: self.shadow_prices = None self.shadow_price_method = self.shadow_settings[ 'SHADOW_PRICE_METHOD'] assert self.shadow_price_method in ['daysim', 'ctramp'] if self.shadow_settings['LOAD_SAVED_SHADOW_PRICES']: # read_saved_shadow_prices logs error and returns None if file not found self.shadow_prices = self.read_saved_shadow_prices( model_settings) if self.shadow_prices is None: self.max_iterations = self.shadow_settings.get( 'MAX_ITERATIONS', 5) else: self.max_iterations = self.shadow_settings.get( 'MAX_ITERATIONS_SAVED', 1) # initial_shadow_price if we did not load if self.shadow_prices is None: # initial value depends on method initial_shadow_price = 1.0 if self.shadow_price_method == 'ctramp' else 0.0 self.shadow_prices = \ pd.DataFrame(data=initial_shadow_price, columns=self.desired_size.columns, index=self.desired_size.index) else: self.max_iterations = 1 self.num_fail = pd.DataFrame(index=self.desired_size.columns) self.max_abs_diff = pd.DataFrame(index=self.desired_size.columns) self.max_rel_diff = pd.DataFrame(index=self.desired_size.columns)
def run_trip_destination( trips, tours_merged, chunk_size, trace_hh_id, trace_label): """ trip destination - main functionality separated from model step so it can be called iteratively Run the trip_destination model, assigning destinations for each (intermediate) trip (last trips already have a destination - either the tour primary destination or Home) Set trip destination and origin columns, and a boolean failed flag for any failed trips (destination for flagged failed trips will be set to -1) Parameters ---------- trips tours_merged chunk_size trace_hh_id trace_label Returns ------- """ model_settings = config.read_model_settings('trip_destination.yaml') preprocessor_settings = model_settings.get('preprocessor', None) logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) land_use = inject.get_table('land_use') size_terms = inject.get_injectable('size_terms') # - initialize trip origin and destination to those of half-tour # (we will sequentially adjust intermediate trips origin and destination as we choose them) tour_destination = reindex(tours_merged.destination, trips.tour_id).astype(int) tour_origin = reindex(tours_merged.origin, trips.tour_id).astype(int) trips['destination'] = np.where(trips.outbound, tour_destination, tour_origin) trips['origin'] = np.where(trips.outbound, tour_origin, tour_destination) trips['failed'] = False trips = trips.sort_index() trips['next_trip_id'] = np.roll(trips.index, -1) trips.next_trip_id = trips.next_trip_id.where(trips.trip_num < trips.trip_count, 0) # - filter tours_merged (AFTER copying destination and origin columns to trips) # tours_merged is used for logsums, we filter it here upfront to save space and time tours_merged_cols = logsum_settings['TOURS_MERGED_CHOOSER_COLUMNS'] if 'REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS' in model_settings: redundant_cols = model_settings['REDUNDANT_TOURS_MERGED_CHOOSER_COLUMNS'] tours_merged_cols = [c for c in tours_merged_cols if c not in redundant_cols] tours_merged = tours_merged[tours_merged_cols] # - skims skims = wrap_skims(model_settings) # - size_terms and alternatives alternatives = tour_destination_size_terms(land_use, size_terms, 'trip') # DataFrameMatrix alows us to treat dataframe as virtual a 2-D array, indexed by TAZ, purpose # e.g. size_terms.get(df.dest_taz, df.purpose) # returns a series of size_terms for each chooser's dest_taz and purpose with chooser index size_term_matrix = DataFrameMatrix(alternatives) # don't need size terms in alternatives, just TAZ index alternatives = alternatives.drop(alternatives.columns, axis=1) alternatives.index.name = model_settings['ALT_DEST'] # - process intermediate trips in ascending trip_num order intermediate = trips.trip_num < trips.trip_count if intermediate.any(): first_trip_num = trips[intermediate].trip_num.min() last_trip_num = trips[intermediate].trip_num.max() # iterate over trips in ascending trip_num order for trip_num in range(first_trip_num, last_trip_num + 1): nth_trips = trips[intermediate & (trips.trip_num == trip_num)] nth_trace_label = tracing.extend_trace_label(trace_label, 'trip_num_%s' % trip_num) # - annotate nth_trips if preprocessor_settings: expressions.assign_columns( df=nth_trips, model_settings=preprocessor_settings, locals_dict=config.get_model_constants(model_settings), trace_label=nth_trace_label) logger.info("Running %s with %d trips", nth_trace_label, nth_trips.shape[0]) # - choose destination for nth_trips, segmented by primary_purpose choices_list = [] for primary_purpose, trips_segment in nth_trips.groupby('primary_purpose'): choices = choose_trip_destination( primary_purpose, trips_segment, alternatives, tours_merged, model_settings, size_term_matrix, skims, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label(nth_trace_label, primary_purpose)) choices_list.append(choices) destinations = pd.concat(choices_list) failed_trip_ids = nth_trips.index.difference(destinations.index) if failed_trip_ids.any(): logger.warning("%s sidelining %s trips without viable destination alternatives" % (nth_trace_label, failed_trip_ids.shape[0])) next_trip_ids = nth_trips.next_trip_id.reindex(failed_trip_ids) trips.loc[failed_trip_ids, 'failed'] = True trips.loc[failed_trip_ids, 'destination'] = -1 trips.loc[next_trip_ids, 'origin'] = trips.loc[failed_trip_ids].origin.values # - assign choices to these trips destinations and to next trips origin assign_in_place(trips, destinations.to_frame('destination')) destinations.index = nth_trips.next_trip_id.reindex(destinations.index) assign_in_place(trips, destinations.to_frame('origin')) del trips['next_trip_id'] return trips
def transit_pass_subsidy(persons_merged, persons, chunk_size, trace_hh_id): """ Transit pass subsidy model. """ trace_label = 'transit_pass_subsidy' model_settings_file_name = 'transit_pass_subsidy.yaml' choosers = persons_merged.to_frame() logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('transit_pass_subsidy') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='transit_pass_subsidy', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'transit_pass_subsidy') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['transit_pass_subsidy'] = choices.reindex(persons.index) pipeline.replace_table("persons", persons) tracing.print_summary('transit_pass_subsidy', persons.transit_pass_subsidy, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def atwork_subtour_destination_logsums(persons_merged, land_use, skim_dict, skim_stack, atwork_subtour_destination_sample, configs_dir, chunk_size, trace_hh_id): """ add logsum column to existing workplace_location_sample able logsum is calculated by running the mode_choice model for each sample (person, dest_taz) pair in workplace_location_sample, and computing the logsum of all the utilities +-------+--------------+----------------+------------+----------------+ | PERID | dest_TAZ | rand | pick_count | logsum (added) | +=======+==============+================+============+================+ | 23750 | 14 | 0.565502716034 | 4 | 1.85659498857 | +-------+--------------+----------------+------------+----------------+ + 23750 | 16 | 0.711135838871 | 6 | 1.92315598631 | +-------+--------------+----------------+------------+----------------+ + ... | | | | | +-------+--------------+----------------+------------+----------------+ | 23751 | 12 | 0.408038878552 | 1 | 2.40612135416 | +-------+--------------+----------------+------------+----------------+ | 23751 | 14 | 0.972732479292 | 2 | 1.44009018355 | +-------+--------------+----------------+------------+----------------+ """ trace_label = 'atwork_subtour_destination_logsums' model_settings = inject.get_injectable('atwork_subtour_destination_settings') logsums_spec = mode_choice_logsums_spec(configs_dir, 'work') alt_col_name = model_settings["ALT_COL_NAME"] chooser_col_name = 'workplace_taz' # FIXME - just using settings from tour_mode_choice logsum_settings = config.read_model_settings(configs_dir, 'tour_mode_choice.yaml') persons_merged = persons_merged.to_frame() atwork_subtour_destination_sample = atwork_subtour_destination_sample.to_frame() # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['LOGSUM_CHOOSER_COLUMNS'] persons_merged = persons_merged[chooser_columns] # merge persons into tours choosers = pd.merge(atwork_subtour_destination_sample, persons_merged, left_on='person_id', right_index=True, how="left") choosers['in_period'] = skim_time_period_label(model_settings['IN_PERIOD']) choosers['out_period'] = skim_time_period_label(model_settings['OUT_PERIOD']) # FIXME - should do this in expression file? choosers['dest_topology'] = reindex(land_use.TOPOLOGY, choosers[alt_col_name]) choosers['dest_density_index'] = reindex(land_use.density_index, choosers[alt_col_name]) logger.info("Running atwork_subtour_destination_logsums with %s rows" % len(choosers)) tracing.dump_df(DUMP, persons_merged, trace_label, 'persons_merged') tracing.dump_df(DUMP, choosers, trace_label, 'choosers') logsums = compute_logsums( choosers, logsums_spec, logsum_settings, skim_dict, skim_stack, chooser_col_name, alt_col_name, chunk_size, trace_hh_id, trace_label) # "add_column series should have an index matching the table to which it is being added" # when the index has duplicates, however, in the special case that the series index exactly # matches the table index, then the series value order is preserved. logsums does have a # matching index, since atwork_subtour_destination_sample was on left side of merge de-dup merge inject.add_column("atwork_subtour_destination_sample", "mode_choice_logsum", logsums)
def add_size_tables(): """ inject tour_destination_size_terms tables for each model_selector (e.g. school, workplace) Size tables are pandas dataframes with locations counts for model_selector by zone and segment tour_destination_size_terms if using shadow pricing, we scale size_table counts to sample population (in which case, they have to be created while single-process) Scaling is problematic as it breaks household result replicability across sample sizes It also changes the magnitude of the size terms so if they are used as utilities in expression files, their importance will diminish relative to other utilities as the sample size decreases. Scaling makes most sense for a full sample in conjunction with shadow pricing, where shadow prices can be adjusted iteratively to bring modelled counts into line with desired (size table) counts. """ use_shadow_pricing = bool(config.setting('use_shadow_pricing')) shadow_settings = config.read_model_settings('shadow_pricing.yaml') shadow_pricing_models = shadow_settings['shadow_pricing_models'] # probably ought not scale if not shadow_pricing (breaks partial sample replicability) # but this allows compatability with existing CTRAMP behavior... scale_size_table = shadow_settings.get('SCALE_SIZE_TABLE', False) if shadow_pricing_models is None: logger.warning('shadow_pricing_models list not found in shadow_pricing settings') return # shadow_pricing_models is dict of {<model_selector>: <model_name>} # since these are scaled to model size, they have to be created while single-process for model_selector, model_name in iteritems(shadow_pricing_models): model_settings = config.read_model_settings(model_name) assert model_selector == model_settings['MODEL_SELECTOR'] segment_ids = model_settings['SEGMENT_IDS'] chooser_table_name = model_settings['CHOOSER_TABLE_NAME'] chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME'] choosers_df = inject.get_table(chooser_table_name).to_frame() if 'CHOOSER_FILTER_COLUMN_NAME' in model_settings: choosers_df = \ choosers_df[choosers_df[model_settings['CHOOSER_FILTER_COLUMN_NAME']] != 0] # - raw_desired_size land_use = inject.get_table('land_use') size_terms = inject.get_injectable('size_terms') raw_size = tour_destination_size_terms(land_use, size_terms, model_selector) assert set(raw_size.columns) == set(segment_ids.keys()) if use_shadow_pricing or scale_size_table: inject.add_table('raw_' + size_table_name(model_selector), raw_size) # - scale size_table counts to sample population # scaled_size = zone_size * (total_segment_modeled / total_segment_desired) # segment scale factor (modeled / desired) keyed by segment_name segment_scale_factors = {} for c in raw_size: # number of zone demographics desired destination choices segment_desired_size = raw_size[c].astype(np.float64).sum() # number of synthetic population choosers in segment segment_chooser_count = \ (choosers_df[chooser_segment_column] == segment_ids[c]).sum() segment_scale_factors[c] = \ segment_chooser_count / np.maximum(segment_desired_size, 1) logger.info("add_desired_size_tables %s segment %s " "desired %s modeled %s scale_factor %s" % (chooser_table_name, c, segment_desired_size, segment_chooser_count, segment_scale_factors[c])) # FIXME - should we be rounding? scaled_size = (raw_size * segment_scale_factors).round() else: scaled_size = raw_size inject.add_table(size_table_name(model_selector), scaled_size)
def write_trip_matrices(trips, network_los): """ Write trip matrices step. Adds boolean columns to local trips table via annotation expressions, then aggregates trip counts and writes OD matrices to OMX. Save annotated trips table to pipeline if desired. Writes taz trip tables for one and two zone system. Writes taz and tap trip tables for three zone system. Add ``is_tap:True`` to the settings file to identify an output matrix as tap level trips as opposed to taz level trips. For one zone system, uses the land use table for the set of possible tazs. For two zone system, uses the taz skim zone names for the set of possible tazs. For three zone system, uses the taz skim zone names for the set of possible tazs and uses the tap skim zone names for the set of possible taps. """ model_settings = config.read_model_settings('write_trip_matrices.yaml') trips_df = annotate_trips(trips, network_los, model_settings) if bool(model_settings.get('SAVE_TRIPS_TABLE')): pipeline.replace_table('trips', trips_df) if 'parking_location' in config.setting('models'): parking_settings = config.read_model_settings('parking_location_choice.yaml') parking_taz_col_name = parking_settings['ALT_DEST_COL_NAME'] if parking_taz_col_name in trips_df: trips_df.loc[trips_df[parking_taz_col_name] > 0, 'destination'] = trips_df[parking_taz_col_name] # Also need address the return trip # write matrices by zone system type if network_los.zone_system == los.ONE_ZONE: # taz trips written to taz matrices logger.info('aggregating trips one zone...') aggregate_trips = trips_df.groupby(['origin', 'destination'], sort=False).sum() # use the average household weight for all trips in the origin destination pair hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL') aggregate_weight = trips_df[['origin', 'destination', hh_weight_col]].groupby(['origin', 'destination'], sort=False).mean() aggregate_trips[hh_weight_col] = aggregate_weight[hh_weight_col] orig_vals = aggregate_trips.index.get_level_values('origin') dest_vals = aggregate_trips.index.get_level_values('destination') # use the land use table for the set of possible tazs zone_index = pipeline.get_table('land_use').index assert all(zone in zone_index for zone in orig_vals) assert all(zone in zone_index for zone in dest_vals) _, orig_index = zone_index.reindex(orig_vals) _, dest_index = zone_index.reindex(dest_vals) write_matrices(aggregate_trips, zone_index, orig_index, dest_index, model_settings) elif network_los.zone_system == los.TWO_ZONE: # maz trips written to taz matrices logger.info('aggregating trips two zone...') trips_df["otaz"] = pipeline.get_table('land_use').reindex(trips_df['origin']).TAZ.tolist() trips_df["dtaz"] = pipeline.get_table('land_use').reindex(trips_df['destination']).TAZ.tolist() aggregate_trips = trips_df.groupby(['otaz', 'dtaz'], sort=False).sum() # use the average household weight for all trips in the origin destination pair hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL') aggregate_weight = trips_df[['otaz', 'dtaz', hh_weight_col]].groupby(['otaz', 'dtaz'], sort=False).mean() aggregate_trips[hh_weight_col] = aggregate_weight[hh_weight_col] orig_vals = aggregate_trips.index.get_level_values('otaz') dest_vals = aggregate_trips.index.get_level_values('dtaz') # use the taz skim zone names for the set of possible tazs zone_index = pd.Index(network_los.skims_info['taz'].offset_map, name=network_los.skims_info['taz'].offset_map_name) assert all(zone in zone_index for zone in orig_vals) assert all(zone in zone_index for zone in dest_vals) _, orig_index = zone_index.reindex(orig_vals) _, dest_index = zone_index.reindex(dest_vals) write_matrices(aggregate_trips, zone_index, orig_index, dest_index, model_settings) elif network_los.zone_system == los.THREE_ZONE: # maz trips written to taz and tap matrices logger.info('aggregating trips three zone taz...') trips_df["otaz"] = pipeline.get_table('land_use').reindex(trips_df['origin']).TAZ.tolist() trips_df["dtaz"] = pipeline.get_table('land_use').reindex(trips_df['destination']).TAZ.tolist() aggregate_trips = trips_df.groupby(['otaz', 'dtaz'], sort=False).sum() # use the average household weight for all trips in the origin destination pair hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL') aggregate_weight = trips_df[['otaz', 'dtaz', hh_weight_col]].groupby(['otaz', 'dtaz'], sort=False).mean() aggregate_trips[hh_weight_col] = aggregate_weight[hh_weight_col] orig_vals = aggregate_trips.index.get_level_values('otaz') dest_vals = aggregate_trips.index.get_level_values('dtaz') # use the taz skim zone names for the set of possible tazs zone_index = pd.Index(network_los.skims_info['taz'].offset_map, name=network_los.skims_info['taz'].offset_map_name) assert all(zone in zone_index for zone in orig_vals) assert all(zone in zone_index for zone in dest_vals) _, orig_index = zone_index.reindex(orig_vals) _, dest_index = zone_index.reindex(dest_vals) write_matrices(aggregate_trips, zone_index, orig_index, dest_index, model_settings) logger.info('aggregating trips three zone tap...') aggregate_trips = trips_df.groupby(['btap', 'atap'], sort=False).sum() # use the average household weight for all trips in the origin destination pair hh_weight_col = model_settings.get('HH_EXPANSION_WEIGHT_COL') aggregate_weight = trips_df[['btap', 'atap', hh_weight_col]].groupby(['btap', 'atap'], sort=False).mean() aggregate_trips[hh_weight_col] = aggregate_weight[hh_weight_col] orig_vals = aggregate_trips.index.get_level_values('btap') dest_vals = aggregate_trips.index.get_level_values('atap') # use the tap skim zone names for the set of possible taps zone_index = pd.Index(network_los.skims_info['tap'].offset_map, name=network_los.skims_info['tap'].offset_map_name) assert all(zone in zone_index for zone in orig_vals) assert all(zone in zone_index for zone in dest_vals) _, orig_index = zone_index.reindex(orig_vals) _, dest_index = zone_index.reindex(dest_vals) write_matrices(aggregate_trips, zone_index, orig_index, dest_index, model_settings, True)
def atwork_subtour_scheduling( tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings = config.read_model_settings('tour_scheduling_atwork.yaml') model_spec = simulate.read_model_spec(file_name='tour_scheduling_atwork.csv') persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') do_skim_wrapper = skim_dict.wrap('destination', 'origin') skims = { "od_skims": od_skim_wrapper, "do_skims": do_skim_wrapper, } annotate_preprocessors( subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(int).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) tdd_choices = vectorize_subtour_scheduling( parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, chunk_size=chunk_size, trace_label=trace_label) assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df(DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def trip_scheduling(trips, tours, chunk_size, trace_hh_id): """ Trip scheduling assigns depart times for trips within the start, end limits of the tour. The algorithm is simplistic: The first outbound trip starts at the tour start time, and subsequent outbound trips are processed in trip_num order, to ensure that subsequent trips do not depart before the trip that preceeds them. Inbound trips are handled similarly, except in reverse order, starting with the last trip, and working backwards to ensure that inbound trips do not depart after the trip that succeeds them. The probability spec assigns probabilities for depart times, but those possible departs must be clipped to disallow depart times outside the tour limits, the departs of prior trips, and in the case of work tours, the start/end times of any atwork subtours. Scheduling can fail if the probability table assigns zero probabilities to all the available depart times in a trip's depart window. (This could be avoided by giving every window a small probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe this is due to the its having been generated from a small household travel survey sample that lacked any departs for some time periods.) Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent trip's ability to schedule a depart within the resulting window. But it can also happen if a tour is very short (e.g. one time period) and the prob spec having a zero probability for that tour hour. Therefor we need to handle trips that could not be scheduled. There are two ways (at least) to solve this problem: 1) CHOOSE_MOST_INITIAL simply assign a depart time to the trip, even if it has a zero probability. It makes most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips are minimally impacted. This can be done in the final iteration, thus affecting only the trips that could no be scheduled by the standard approach 2) drop_and_cleanup drop trips that could no be scheduled, and adjust their leg mates, as is done for failed trips in trip_destination. For now we are choosing among these approaches with a manifest constant, but this could be made a model setting... """ trace_label = "trip_scheduling" model_settings = config.read_model_settings('trip_scheduling.yaml') assert 'DEPART_ALT_BASE' in model_settings failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT) probs_spec = pd.read_csv( config.config_file_path('trip_scheduling_probs.csv'), comment='#') trips_df = trips.to_frame() tours = tours.to_frame() # add tour-based chunk_id so we can chunk all trips in tour together trips_df['chunk_id'] = \ reindex(pd.Series(list(range(tours.shape[0])), tours.index), trips_df.tour_id) max_iterations = model_settings.get('MAX_ITERATIONS', 1) assert max_iterations > 0 choices_list = [] i = 0 while (i < max_iterations) and not trips_df.empty: i += 1 last_iteration = (i == max_iterations) trace_label_i = tracing.extend_trace_label(trace_label, "i%s" % i) logger.info("%s scheduling %s trips", trace_label_i, trips_df.shape[0]) choices = \ run_trip_scheduling( trips_df, tours, probs_spec, model_settings, last_iteration=last_iteration, trace_hh_id=trace_hh_id, chunk_size=chunk_size, trace_label=trace_label_i) # boolean series of trips whose individual trip scheduling failed failed = choices.reindex(trips_df.index).isnull() logger.info("%s %s failed", trace_label_i, failed.sum()) if not last_iteration: # boolean series of trips whose leg scheduling failed failed_cohorts = failed_trip_cohorts(trips_df, failed) trips_df = trips_df[failed_cohorts] choices = choices[~failed_cohorts] choices_list.append(choices) trips_df = trips.to_frame() choices = pd.concat(choices_list) choices = choices.reindex(trips_df.index) if choices.isnull().any(): logger.warning( "%s of %s trips could not be scheduled after %s iterations" % (choices.isnull().sum(), trips_df.shape[0], i)) if failfix != FAILFIX_DROP_AND_CLEANUP: raise RuntimeError("%s setting '%s' not enabled in settings" % (FAILFIX, FAILFIX_DROP_AND_CLEANUP)) trips_df['failed'] = choices.isnull() trips_df = cleanup_failed_trips(trips_df) choices = choices.reindex(trips_df.index) trips_df['depart'] = choices assert not trips_df.depart.isnull().any() pipeline.replace_table("trips", trips_df)
def trip_mode_choice(trips, tours_merged, network_los, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coefficients.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings_file_name = 'trip_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'trip_mode' trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) tours_merged = tours_merged.to_frame() tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']] tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge(trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = network_los.skim_time_period_label( trips_merged.depart) orig_col = 'origin' dest_col = 'destination' constants = {} constants.update(config.get_model_constants(model_settings)) constants.update({'ORIGIN': orig_col, 'DESTINATION': dest_col}) skim_dict = network_los.get_default_skim_dict() odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col, dest_key=dest_col, dim3_key='trip_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col, dest_key=orig_col, dim3_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_wrapper, } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col, dest_key=dest_col, tod_key='trip_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, # 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) estimator = estimation.manager.begin_estimation('trip_mode_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_coefficients_template(model_settings=model_settings) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby( 'primary_purpose'): segment_trace_label = tracing.extend_trace_label( trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % ( primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(primary_purpose) # tvpb_logsum_dot.extend_trace_label(primary_purpose) coefficients = simulate.get_segment_coefficients( model_settings, primary_purpose) locals_dict = {} locals_dict.update(constants) locals_dict.update(coefficients) expressions.annotate_preprocessors(trips_segment, locals_dict, skims, model_settings, segment_trace_label) if estimator: # write choosers after annotation estimator.write_choosers(trips_segment) locals_dict.update(skims) choices = mode_choice_simulate( choosers=trips_segment, spec=simulate.eval_coefficients(model_spec, coefficients, estimator), nest_spec=simulate.eval_nest_coefficients(nest_spec, coefficients, segment_trace_label), skims=skims, locals_d=locals_dict, chunk_size=chunk_size, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, trace_label=trace_label, trace_choice_name='trip_mode_choice', estimator=estimator) if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label( segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations assign_in_place(trips_segment, choices) tracing.trace_df(trips_segment, label=tracing.extend_trace_label( segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_type in tvpb_mode_path_types.items(): skim_cache = tvpb_logsum_odt.cache[path_type] for c in skim_cache: dest_col = c if dest_col not in choices_df: choices_df[ dest_col] = np.nan if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where( choices_df[mode_column_name] != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df.trip_mode) choices_df.trip_mode = estimator.get_survey_values( choices_df.trip_mode, 'trips', 'trip_mode') estimator.write_override_choices(choices_df.trip_mode) estimator.end_estimation() # update trips table with choices (and potionally logssums) trips_df = trips.to_frame() assign_in_place(trips_df, choices_df) tracing.print_summary('trip_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', trips_df[mode_column_name], value_counts=True) assert not trips_df[mode_column_name].isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label( trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def tour_mode_choice_simulate(tours, persons_merged, network_los, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) persons_merged = persons_merged.to_frame() primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) constants = {} # model_constants can appear in expressions constants.update(config.get_model_constants(model_settings)) skim_dict = network_los.get_default_skim_dict() # setup skim keys orig_col_name = 'home_zone_id' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='out_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='in_period') odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='in_period') dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, # dot return skims for e.g. TNC bridge return fare "dor_skims": dor_skim_stack_wrapper, # odt return skims for e.g. TNC bridge return fare "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name, tod_key='out_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name, tod_key='in_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_dot') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) estimator = estimation.manager.begin_estimation('tour_mode_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_coefficients_template(model_settings=model_settings) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # (run_tour_mode_choice_simulate writes choosers post-annotation) # FIXME should normalize handling of tour_type and tour_purpose # mtctm1 school tour_type includes univ, which has different coefficients from elementary and HS # we should either add this column when tours created or add univ to tour_types not_university = (primary_tours_merged.tour_type != 'school') | ~primary_tours_merged.is_university primary_tours_merged['tour_purpose'] = \ primary_tours_merged.tour_type.where(not_university, 'univ') choices_list = [] for tour_purpose, tours_segment in primary_tours_merged.groupby( 'tour_purpose'): logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % ( tour_purpose, len(tours_segment.index), )) if network_los.zone_system == los.THREE_ZONE: tvpb_logsum_odt.extend_trace_label(tour_purpose) tvpb_logsum_dot.extend_trace_label(tour_purpose) # name index so tracing knows how to slice assert tours_segment.index.name == 'tour_id' choices_df = run_tour_mode_choice_simulate( tours_segment, tour_purpose, model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, network_los=network_los, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, tour_purpose), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices_df' % tour_purpose, choices_df.tour_mode, value_counts=True) choices_list.append(choices_df) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_types in tvpb_mode_path_types.items(): for direction, skim in zip(['od', 'do'], [tvpb_logsum_odt, tvpb_logsum_dot]): path_type = path_types[direction] skim_cache = skim.cache[path_type] print( f"mode {mode} direction {direction} path_type {path_type}") for c in skim_cache: dest_col = f'{direction}_{c}' if dest_col not in choices_df: choices_df[ dest_col] = np.nan if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where(choices_df.tour_mode != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df.tour_mode) choices_df.tour_mode = estimator.get_survey_values( choices_df.tour_mode, 'tours', 'tour_mode') estimator.write_override_choices(choices_df.tour_mode) estimator.end_estimation() tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices_df.tour_mode, value_counts=True) # so we can trace with annotations assign_in_place(primary_tours, choices_df) # update tours table with mode choice (and optionally logsums) all_tours = tours.to_frame() assign_in_place(all_tours, choices_df) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id', warn_if_empty=True)
def atwork_subtour_destination_simulate( subtours, persons_merged, destination_sample, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ atwork_subtour_destination model on atwork_subtour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ trace_label = 'atwork_subtour_destination_simulate' model_settings = config.read_model_settings('atwork_subtour_destination.yaml') model_spec = simulate.read_model_spec(file_name='atwork_subtour_destination.csv') # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() # merge persons into tours choosers = pd.merge(subtours, persons_merged, left_on='person_id', right_index=True) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] chooser_col_name = 'workplace_taz' # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge destination_size_terms columns into alt sample list alternatives = \ pd.merge(destination_sample, destination_size_terms, left_on=alt_dest_col_name, right_index=True, how="left") tracing.dump_df(DUMP, alternatives, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running atwork_subtour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(chooser_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=model_spec, choice_column=alt_dest_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='workplace_location') return choices
def atwork_subtour_destination_settings(configs_dir): return config.read_model_settings(configs_dir, 'atwork_subtour_destination.yaml')
def joint_tour_composition( tours, households, persons, chunk_size, trace_hh_id): """ This model predicts the makeup of the travel party (adults, children, or mixed). """ trace_label = 'joint_tour_composition' model_settings = config.read_model_settings('joint_tour_composition.yaml') model_spec = simulate.read_model_spec(file_name='joint_tour_composition.csv') tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: add_null_results(trace_label, tours) return # - only interested in households with joint_tours households = households.to_frame() households = households[households.num_hh_joint_tours > 0] persons = persons.to_frame() persons = persons[persons.household_id.isin(households.index)] logger.info("Running joint_tour_composition with %d joint tours" % joint_tours.shape[0]) # - run preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns( df=households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) joint_tours_merged = pd.merge(joint_tours, households, left_on='household_id', right_index=True, how='left') # - simple_simulate nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=joint_tours_merged, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='composition') # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) # add composition column to tours for tracing joint_tours['composition'] = choices # reindex since we ran model on a subset of households tours['composition'] = choices.reindex(tours.index).fillna('').astype(str) pipeline.replace_table("tours", tours) tracing.print_summary('joint_tour_composition', joint_tours.composition, value_counts=True) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_composition.joint_tours", slicer='household_id')
def accessibility_settings(configs_dir): return config.read_model_settings(configs_dir, 'accessibility.yaml')
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings = config.read_model_settings('tour_mode_choice.yaml') spec = tour_mode_choice_spec(model_settings) primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() persons_merged = persons_merged.to_frame() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) # setup skim keys orig_col_name = 'TAZ' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } choices_list = [] for tour_type, segment in primary_tours_merged.groupby('tour_type'): logger.info("tour_mode_choice_simulate tour_type '%s' (%s tours)" % (tour_type, len(segment.index), )) # name index so tracing knows how to slice assert segment.index.name == 'tour_id' choices = run_tour_mode_choice_simulate( segment, spec, tour_type, model_settings, skims=skims, constants=constants, nest_spec=nest_spec, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, tour_type), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices' % tour_type, choices, value_counts=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices = pd.concat(choices_list) tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices, value_counts=True) # so we can trace with annotations primary_tours['tour_mode'] = choices # but only keep mode choice col all_tours = tours.to_frame() # uncomment to save annotations to table # assign_in_place(all_tours, annotations) assign_in_place(all_tours, choices.to_frame('tour_mode')) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label(trace_label, 'tour_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True)
def run_trip_purpose( trips_df, chunk_size, trace_hh_id, trace_label): """ trip purpose - main functionality separated from model step so it can be called iteratively For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound) Each trip is assigned a purpose based on an observed frequency distribution The distribution is segmented by tour purpose, tour direction, person type, and, optionally, trip depart time . Returns ------- purpose: pandas.Series of purpose (str) indexed by trip_id """ model_settings = config.read_model_settings('trip_purpose.yaml') probs_spec = trip_purpose_probs() result_list = [] # - last trip of outbound tour gets primary_purpose last_trip = (trips_df.trip_num == trips_df.trip_count) purpose = trips_df.primary_purpose[last_trip & trips_df.outbound] result_list.append(purpose) logger.info("assign purpose to %s last outbound trips", purpose.shape[0]) # - last trip of inbound tour gets home (or work for atwork subtours) purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound] purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'), index=purpose.index) result_list.append(purpose) logger.info("assign purpose to %s last inbound trips", purpose.shape[0]) # - intermediate stops (non-last trips) purpose assigned by probability table trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = config.get_model_constants(model_settings) expressions.assign_columns( df=trips_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) rows_per_chunk, effective_chunk_size = \ trip_purpose_rpc(chunk_size, trips_df, probs_spec, trace_label=trace_label) for i, num_chunks, trips_chunk in chunk.chunked_choosers(trips_df, rows_per_chunk): logger.info("Running chunk %s of %s size %d", i, num_chunks, len(trips_chunk)) chunk_trace_label = tracing.extend_trace_label(trace_label, 'chunk_%s' % i) \ if num_chunks > 1 else trace_label chunk.log_open(chunk_trace_label, chunk_size, effective_chunk_size) choices = choose_intermediate_trip_purpose( trips_chunk, probs_spec, trace_hh_id, trace_label=chunk_trace_label) chunk.log_close(chunk_trace_label) result_list.append(choices) if len(result_list) > 1: choices = pd.concat(result_list) return choices
def atwork_subtour_frequency_settings(configs_dir): return config.read_model_settings(configs_dir, 'atwork_subtour_frequency.yaml')
def run_trip_purpose(trips_df, estimator, chunk_size, trace_hh_id, trace_label): """ trip purpose - main functionality separated from model step so it can be called iteratively For each intermediate stop on a tour (i.e. trip other than the last trip outbound or inbound) Each trip is assigned a purpose based on an observed frequency distribution The distribution is segmented by tour purpose, tour direction, person type, and, optionally, trip depart time . Returns ------- purpose: pandas.Series of purpose (str) indexed by trip_id """ # uniform across trip_purpose chunk_tag = 'trip_purpose' model_settings_file_name = 'trip_purpose.yaml' model_settings = config.read_model_settings(model_settings_file_name) spec_file_name = model_settings.get('PROBS_SPEC', 'trip_purpose_probs.csv') probs_spec = pd.read_csv(config.config_file_path(spec_file_name), comment='#') # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation # coefficients_df = simulate.read_model_coefficients(model_settings) # probs_spec = map_coefficients(probs_spec, coefficients_df) if estimator: estimator.write_spec(model_settings, tag='PROBS_SPEC') estimator.write_model_settings(model_settings, model_settings_file_name) # estimator.write_coefficients(coefficients_df, model_settings) result_list = [] # - last trip of outbound tour gets primary_purpose last_trip = (trips_df.trip_num == trips_df.trip_count) purpose = trips_df.primary_purpose[last_trip & trips_df.outbound] result_list.append(purpose) logger.info("assign purpose to %s last outbound trips", purpose.shape[0]) # - last trip of inbound tour gets home (or work for atwork subtours) purpose = trips_df.primary_purpose[last_trip & ~trips_df.outbound] # FIXME should be lower case for consistency? purpose = pd.Series(np.where(purpose == 'atwork', 'Work', 'Home'), index=purpose.index) result_list.append(purpose) logger.info("assign purpose to %s last inbound trips", purpose.shape[0]) # - intermediate stops (non-last trips) purpose assigned by probability table trips_df = trips_df[~last_trip] logger.info("assign purpose to %s intermediate trips", trips_df.shape[0]) preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = config.get_model_constants(model_settings) expressions.assign_columns(df=trips_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) for i, trips_chunk, chunk_trace_label in \ chunk.adaptive_chunked_choosers(trips_df, chunk_size, chunk_tag, trace_label): choices = choose_intermediate_trip_purpose( trips_chunk, probs_spec, estimator, trace_hh_id, trace_label=chunk_trace_label) result_list.append(choices) chunk.log_df(trace_label, f'result_list', result_list) if len(result_list) > 1: choices = pd.concat(result_list) return choices
def compute_columns(df, model_settings, locals_dict={}, trace_label=None): """ Evaluate expressions_spec in context of df, with optional additional pipeline tables in locals Parameters ---------- df : pandas DataFrame or if None, expect name of pipeline table to be specified by DF in model_settings model_settings : dict or str dict with keys: DF - df_alias and (additionally, if df is None) name of pipeline table to load as df SPEC - name of expressions file (csv suffix optional) if different from model_settings TABLES - list of pipeline tables to load and make available as (read only) locals str: name of yaml file in configs_dir to load dict from locals_dict : dict dict of locals (e.g. utility functions) to add to the execution environment trace_label Returns ------- results: pandas.DataFrame one column for each expression (except temps with ALL_CAP target names) same index as df """ if isinstance(model_settings, str): model_settings_name = model_settings model_settings = config.read_model_settings('%s.yaml' % model_settings) assert model_settings, "Found no model settings for %s" % model_settings_name else: model_settings_name = 'dict' assert isinstance(model_settings, dict) assert 'DF' in model_settings, \ "Expected to find 'DF' in %s" % model_settings_name df_name = model_settings.get('DF') helper_table_names = model_settings.get('TABLES', []) expressions_spec_name = model_settings.get('SPEC', model_settings_name) assert expressions_spec_name is not None, \ "Expected to find 'SPEC' in %s" % model_settings_name trace_label = tracing.extend_trace_label(trace_label or '', expressions_spec_name) if not expressions_spec_name.endswith(".csv"): expressions_spec_name = '%s.csv' % expressions_spec_name expressions_spec = assign.read_assignment_spec(config.config_file_path(expressions_spec_name)) assert expressions_spec.shape[0] > 0, \ "Expected to find some assignment expressions in %s" % expressions_spec_name tables = {t: inject.get_table(t).to_frame() for t in helper_table_names} # if df was passed in, df might be a slice, or any other table, but DF is it's local alias assert df_name not in tables, "Did not expect to find df '%s' in TABLES" % df_name tables[df_name] = df # be nice and also give it to them as df? tables['df'] = df _locals_dict = local_utilities() _locals_dict.update(locals_dict) _locals_dict.update(tables) results, trace_results, trace_assigned_locals \ = assign.assign_variables(expressions_spec, df, _locals_dict, trace_rows=tracing.trace_targets(df)) if trace_results is not None: tracing.trace_df(trace_results, label=trace_label, slicer='NONE') if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="%s_locals" % trace_label) return results
def work_from_home(persons_merged, persons, chunk_size, trace_hh_id): """ This model predicts whether a person (worker) works from home. The output from this model is TRUE (if works from home) or FALSE (works away from home). The workplace location choice is overridden for workers who work from home and set to -1. """ trace_label = 'work_from_home' model_settings_file_name = 'work_from_home.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_zone_id > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('work_from_home') constants = config.get_model_constants(model_settings) work_from_home_alt = model_settings['WORK_FROM_HOME_ALT'] # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) # - iterative what-if if specified iterations = model_settings.get('WORK_FROM_HOME_ITERATIONS', 1) iterations_coefficient_constant = model_settings.get( 'WORK_FROM_HOME_COEFFICIENT_CONSTANT', None) iterations_target_percent = model_settings.get( 'WORK_FROM_HOME_TARGET_PERCENT', None) iterations_target_percent_tolerance = model_settings.get( 'WORK_FROM_HOME_TARGET_PERCENT_TOLERANCE', None) for iteration in range(iterations): logger.info("Running %s with %d persons iteration %d", trace_label, len(choosers), iteration) # re-read spec to reset substitution model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) choices = simulate.simple_simulate(choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='work_from_home', estimator=estimator) if iterations_target_percent is not None: current_percent = ((choices == work_from_home_alt).sum() / len(choices)) logger.info( "Running %s iteration %i current percent %f target percent %f", trace_label, iteration, current_percent, iterations_target_percent) if current_percent <= (iterations_target_percent + iterations_target_percent_tolerance ) and current_percent >= ( iterations_target_percent - iterations_target_percent_tolerance): logger.info( "Running %s iteration %i converged with coefficient %f", trace_label, iteration, coefficients_df.value[iterations_coefficient_constant]) break else: new_value = np.log( iterations_target_percent / np.maximum(current_percent, 0.0001) ) + coefficients_df.value[iterations_coefficient_constant] coefficients_df.value[ iterations_coefficient_constant] = new_value logger.info( "Running %s iteration %i new coefficient for next iteration %f", trace_label, iteration, new_value) iteration = iteration + 1 choices = (choices == work_from_home_alt) dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME'] if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'work_from_home') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['work_from_home'] = choices.reindex( persons.index).fillna(0).astype(bool) persons[dest_choice_column_name] = np.where( persons.work_from_home is True, -1, persons[dest_choice_column_name]) pipeline.replace_table("persons", persons) tracing.print_summary('work_from_home', persons.work_from_home, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)
def non_mandatory_tour_frequency_settings(configs_dir): return config.read_model_settings(configs_dir, 'non_mandatory_tour_frequency.yaml')
def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ trace_label = 'non_mandatory_tour_frequency' model_settings_file_name = 'non_mandatory_tour_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) # FIXME kind of tacky both that we know to add this here and del it below # 'tot_tours' is used in model_spec expressions alternatives = simulate.read_model_alts('non_mandatory_tour_frequency_alternatives.csv', set_index=None) alternatives['tot_tours'] = alternatives.sum(axis=1) # filter based on results of CDAP choosers = persons_merged.to_frame() choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_max_window': person_max_window } expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers)) constants = config.get_model_constants(model_settings) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) spec_segments = model_settings.get('SPEC_SEGMENTS', {}) # segment by person type and pick the right spec for each person type choices_list = [] for segment_settings in spec_segments: segment_name = segment_settings['NAME'] ptype = segment_settings['PTYPE'] # pick the spec column for the segment segment_spec = model_spec[[segment_name]] chooser_segment = choosers[choosers.ptype == ptype] logger.info("Running segment '%s' of size %d", segment_name, len(chooser_segment)) if len(chooser_segment) == 0: # skip empty segments continue estimator = \ estimation.manager.begin_estimation(model_name='non_mandatory_tour_frequency_%s' % segment_name, bundle_name='non_mandatory_tour_frequency') coefficients_df = simulate.read_model_coefficients(file_name=segment_settings['COEFFICIENTS']) segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df) estimator.write_choosers(chooser_segment) estimator.write_alternatives(alternatives) # FIXME estimation_requires_chooser_id_in_df_column do it here or have interaction_simulate do it? # chooser index must be duplicated in column or it will be omitted from interaction_dataset # estimation requires that chooser_id is either in index or a column of interaction_dataset # so it can be reformatted (melted) and indexed by chooser_id and alt_id assert chooser_segment.index.name == 'person_id' assert 'person_id' not in chooser_segment.columns chooser_segment['person_id'] = chooser_segment.index # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? estimator.set_alt_id('alt_id') estimator.set_chooser_id(chooser_segment.index.name) choices = interaction_simulate( chooser_segment, alternatives, spec=segment_spec, locals_d=constants, chunk_size=chunk_size, trace_label='non_mandatory_tour_frequency.%s' % segment_name, trace_choice_name='non_mandatory_tour_frequency', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'non_mandatory_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() choices_list.append(choices) # FIXME - force garbage collection? force_garbage_collect() del alternatives['tot_tours'] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate # is the index value of the chosen alternative in the alternatives table. choices = pd.concat(choices_list).sort_index() # add non_mandatory_tour_frequency column to persons persons = persons.to_frame() # we expect there to be an alt with no tours - which we can use to backfill non-travelers no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] # need to reindex as we only handled persons with cdap_activity in ['M', 'N'] persons['non_mandatory_tour_frequency'] = \ choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8) """ We have now generated non-mandatory tour frequencies, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) But before we do that, we run an additional probablilistic step to extend/increase tour counts beyond the strict limits of the tour_frequency alternatives chosen above (which are currently limited to at most 2 escort tours and 1 each of shopping, othmaint, othdiscr, eatout, and social tours) The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate is simply the index value of the chosen alternative in the alternatives table. get counts of each of the tour type alternatives (so we can extend) escort shopping othmaint othdiscr eatout social parent_id 2588676 2 0 0 1 1 0 2588677 0 1 0 1 0 0 """ # counts of each of the tour type alternatives (so we can extend) modeled_tour_counts = alternatives.loc[choices] modeled_tour_counts.index = choices.index # assign person ids to the index # - extend_tour_counts - probabalistic extended_tour_counts = \ extend_tour_counts(choosers, modeled_tour_counts.copy(), alternatives, trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts')) num_modeled_tours = modeled_tour_counts.sum().sum() num_extended_tours = extended_tour_counts.sum().sum() logger.info("extend_tour_counts increased tour count by %s from %s to %s" % (num_extended_tours - num_modeled_tours, num_modeled_tours, num_extended_tours)) """ create the non_mandatory tours based on extended_tour_counts """ if estimator: override_tour_counts = \ estimation.manager.get_survey_values(extended_tour_counts, table_name='persons', column_names=['_%s' % c for c in extended_tour_counts.columns]) override_tour_counts = \ override_tour_counts.rename(columns={('_%s' % c): c for c in extended_tour_counts.columns}) logger.info("estimation get_survey_values override_tour_counts %s changed cells" % (override_tour_counts != extended_tour_counts).sum().sum()) extended_tour_counts = override_tour_counts """ create the non_mandatory tours based on extended_tour_counts """ non_mandatory_tours = process_non_mandatory_tours(persons, extended_tour_counts) assert len(non_mandatory_tours) == extended_tour_counts.sum().sum() if estimator: # make sure they created tours with the expected tour_ids columns = ['person_id', 'household_id', 'tour_type', 'tour_category'] survey_tours = \ estimation.manager.get_survey_values(non_mandatory_tours, table_name='tours', column_names=columns) tours_differ = (non_mandatory_tours[columns] != survey_tours[columns]).any(axis=1) if tours_differ.any(): print("tours_differ\n%s" % tours_differ) print("%s of %s tours differ" % (tours_differ.sum(), len(tours_differ))) print("differing survey_tours\n%s" % survey_tours[tours_differ]) print("differing modeled_tours\n%s" % non_mandatory_tours[columns][tours_differ]) assert(not tours_differ.any()) pipeline.extend_table("tours", non_mandatory_tours) tracing.register_traceable_table('tours', non_mandatory_tours) pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=trace_label) pipeline.replace_table("persons", persons) tracing.print_summary('non_mandatory_tour_frequency', persons.non_mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_frequency.non_mandatory_tours", warn_if_empty=True) tracing.trace_df(choosers, label="non_mandatory_tour_frequency.choosers", warn_if_empty=True) tracing.trace_df(persons, label="non_mandatory_tour_frequency.annotated_persons", warn_if_empty=True)
def add_size_tables(): """ inject tour_destination_size_terms tables for each model_selector (e.g. school, workplace) Size tables are pandas dataframes with locations counts for model_selector by zone and segment tour_destination_size_terms if using shadow pricing, we scale size_table counts to sample population (in which case, they have to be created while single-process) Scaling is problematic as it breaks household result replicability across sample sizes It also changes the magnitude of the size terms so if they are used as utilities in expression files, their importance will diminish relative to other utilities as the sample size decreases. Scaling makes most sense for a full sample in conjunction with shadow pricing, where shadow prices can be adjusted iteratively to bring modelled counts into line with desired (size table) counts. """ use_shadow_pricing = bool(config.setting('use_shadow_pricing')) shadow_settings = config.read_model_settings('shadow_pricing.yaml') shadow_pricing_models = shadow_settings.get('shadow_pricing_models') if shadow_pricing_models is None: logger.warning( 'shadow_pricing_models list not found in shadow_pricing settings') return # probably ought not scale if not shadow_pricing (breaks partial sample replicability) # but this allows compatability with existing CTRAMP behavior... scale_size_table = shadow_settings.get('SCALE_SIZE_TABLE', False) # shadow_pricing_models is dict of {<model_selector>: <model_name>} # since these are scaled to model size, they have to be created while single-process for model_selector, model_name in shadow_pricing_models.items(): model_settings = config.read_model_settings(model_name) assert model_selector == model_settings['MODEL_SELECTOR'] assert 'SEGMENT_IDS' in model_settings, f"missing SEGMENT_IDS setting in {model_name} model_settings" segment_ids = model_settings['SEGMENT_IDS'] chooser_table_name = model_settings['CHOOSER_TABLE_NAME'] chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME'] choosers_df = inject.get_table(chooser_table_name).to_frame() if 'CHOOSER_FILTER_COLUMN_NAME' in model_settings: choosers_df = \ choosers_df[choosers_df[model_settings['CHOOSER_FILTER_COLUMN_NAME']] != 0] # - raw_desired_size land_use = inject.get_table('land_use') size_terms = inject.get_injectable('size_terms') raw_size = tour_destination_size_terms(land_use, size_terms, model_selector) assert set(raw_size.columns) == set(segment_ids.keys()) if use_shadow_pricing or scale_size_table: # - scale size_table counts to sample population # scaled_size = zone_size * (total_segment_modeled / total_segment_desired) # segment scale factor (modeled / desired) keyed by segment_name segment_scale_factors = {} for c in raw_size: # number of zone demographics desired destination choices segment_desired_size = raw_size[c].astype(np.float64).sum() # number of synthetic population choosers in segment segment_chooser_count = \ (choosers_df[chooser_segment_column] == segment_ids[c]).sum() segment_scale_factors[c] = \ segment_chooser_count / np.maximum(segment_desired_size, 1) logger.info("add_desired_size_tables %s segment %s " "desired %s modeled %s scale_factor %s" % (chooser_table_name, c, segment_desired_size, segment_chooser_count, segment_scale_factors[c])) # FIXME - should we be rounding? scaled_size = (raw_size * segment_scale_factors).round() else: scaled_size = raw_size logger.debug( f"add_size_table {size_table_name(model_selector)} ({scaled_size.shape}) for {model_selector}" ) assert scaled_size.index.is_monotonic_increasing, \ f"size table {size_table_name(model_selector)} not is_monotonic_increasing" inject.add_table(size_table_name(model_selector), scaled_size)
def tour_od_choice( tours, persons, households, land_use, network_los, chunk_size, trace_hh_id): """Simulates joint origin/destination choice for all tours. Given a set of previously generated tours, each tour needs to have an origin and a destination. In this case tours are the choosers, but the associated person that's making the tour does not necessarily have a home location assigned already. So we choose a tour origin at the same time as we choose a tour destination, and assign the tour origin as that person's home location. Parameters ---------- tours : orca.DataFrameWrapper lazy-loaded tours table persons : orca.DataFrameWrapper lazy-loaded persons table households : orca.DataFrameWrapper lazy-loaded households table land_use : orca.DataFrameWrapper lazy-loaded land use data table stop_frequency_alts : orca.DataFrameWrapper lazy-loaded table of stop frequency alternatives, e.g. "1out2in" network_los : orca._InjectableFuncWrapper lazy-loaded activitysim.los.Network_LOS object chunk_size simulation chunk size, set in main settings.yaml trace_hh_id : int households to trace, set in main settings.yaml """ trace_label = 'tour_od_choice' model_settings_file_name = 'tour_od_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) origin_col_name = model_settings['ORIG_COL_NAME'] dest_col_name = model_settings['DEST_COL_NAME'] alt_id_col = tour_od.get_od_id_col(origin_col_name, dest_col_name) sample_table_name = model_settings.get('OD_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None logsum_column_name = model_settings.get('OD_CHOICE_LOGSUM_COLUMN_NAME', None) want_logsums = logsum_column_name is not None tours = tours.to_frame() # interaction_sample_simulate insists choosers appear in same order as alts tours = tours.sort_index() estimator = estimation.manager.begin_estimation('tour_od_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(alt_id_col) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_od.run_tour_od( tours, persons, want_logsums, want_sample_table, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label) if estimator: assert estimator.want_unsampled_alternatives estimator.write_choices(choices_df.choice) survey_od = estimator.get_survey_values( choices_df.choice, 'tours', [origin_col_name, dest_col_name]) choices_df[origin_col_name] = survey_od[origin_col_name] choices_df[dest_col_name] = survey_od[dest_col_name] survey_od[alt_id_col] = tour_od.create_od_id_col(survey_od, origin_col_name, dest_col_name) choices_df.choice = survey_od[alt_id_col] estimator.write_override_choices(choices_df.choice) estimator.end_estimation() tours[origin_col_name] = choices_df[origin_col_name].reindex(tours.index) tours[dest_col_name] = choices_df[dest_col_name].reindex(tours.index) if want_logsums: tours[logsum_column_name] = \ choices_df['logsum'].reindex(tours.index).astype('float') tours['poe_id'] = tours[origin_col_name].map(land_use.to_frame(columns='poe_id').poe_id) households = households.to_frame() persons = persons.to_frame() households[origin_col_name] = tours.set_index('household_id')[origin_col_name].reindex(households.index) persons[origin_col_name] = households[origin_col_name].reindex(persons.household_id).values # Downstream steps require that persons and households have a 'home_zone_id' # column. We assume that if the tour_od_choice model is used, this field is # missing from the population data, so it gets inherited from the tour origin households['home_zone_id'] = households[origin_col_name] persons['home_zone_id'] = persons[origin_col_name] pipeline.replace_table("tours", tours) pipeline.replace_table("persons", persons) pipeline.replace_table("households", households) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(tours, label="tours_od_choice", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def tour_mode_choice_simulate(tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Tour mode choice simulate """ trace_label = 'tour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' # FIXME - should be passed in? primary_tours = tours.to_frame() assert not (primary_tours.tour_category == 'atwork').any() persons_merged = persons_merged.to_frame() constants = config.get_model_constants(model_settings) logger.info("Running %s with %d tours" % (trace_label, primary_tours.shape[0])) tracing.print_summary('tour_types', primary_tours.tour_type, value_counts=True) primary_tours_merged = pd.merge(primary_tours, persons_merged, left_on='person_id', right_index=True, how='left', suffixes=('', '_r')) # setup skim keys orig_col_name = 'TAZ' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') odr_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='in_period') dor_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } estimator = estimation.manager.begin_estimation('tour_mode_choice') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) estimator.write_coefficients_template( simulate.read_model_coefficient_template(model_settings)) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # FIXME run_tour_mode_choice_simulate writes choosers post-annotation choices_list = [] primary_tours_merged['primary_purpose'] = \ primary_tours_merged.tour_type.where((primary_tours_merged.tour_type != 'school') | ~primary_tours_merged.is_university, 'univ') for primary_purpose, tours_segment in primary_tours_merged.groupby( 'primary_purpose'): logger.info( "tour_mode_choice_simulate primary_purpose '%s' (%s tours)" % ( primary_purpose, len(tours_segment.index), )) # name index so tracing knows how to slice assert tours_segment.index.name == 'tour_id' choices_df = run_tour_mode_choice_simulate( tours_segment, primary_purpose, model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, primary_purpose), trace_choice_name='tour_mode_choice') tracing.print_summary('tour_mode_choice_simulate %s choices_df' % primary_purpose, choices_df.tour_mode, value_counts=True) choices_list.append(choices_df) # FIXME - force garbage collection force_garbage_collect() choices_df = pd.concat(choices_list) if estimator: estimator.write_choices(choices_df.tour_mode) choices_df.tour_mode = estimator.get_survey_values( choices_df.tour_mode, 'tours', 'tour_mode') estimator.write_override_choices(choices_df.tour_mode) estimator.end_estimation() tracing.print_summary('tour_mode_choice_simulate all tour type choices', choices_df.tour_mode, value_counts=True) # so we can trace with annotations assign_in_place(primary_tours, choices_df) # but only keep mode choice col all_tours = tours.to_frame() assign_in_place(all_tours, choices_df) pipeline.replace_table("tours", all_tours) if trace_hh_id: tracing.trace_df(primary_tours, label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id', warn_if_empty=True)
def atwork_subtour_mode_choice(tours, persons_merged, network_los, chunk_size, trace_hh_id): """ At-work subtour mode choice simulate """ trace_label = 'atwork_subtour_mode_choice' model_settings_file_name = 'tour_mode_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('MODE_CHOICE_LOGSUM_COLUMN_NAME') mode_column_name = 'tour_mode' tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return subtours_merged = \ pd.merge(subtours, persons_merged.to_frame(), left_on='person_id', right_index=True, how='left') logger.info("Running %s with %d subtours" % (trace_label, subtours_merged.shape[0])) tracing.print_summary('%s tour_type' % trace_label, subtours_merged.tour_type, value_counts=True) constants = {} constants.update(config.get_model_constants(model_settings)) skim_dict = network_los.get_default_skim_dict() # setup skim keys orig_col_name = 'workplace_zone_id' dest_col_name = 'destination' out_time_col_name = 'start' in_time_col_name = 'end' odt_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='out_period') dot_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='in_period') odr_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=orig_col_name, dest_key=dest_col_name, dim3_key='in_period') dor_skim_stack_wrapper = skim_dict.wrap_3d(orig_key=dest_col_name, dest_key=orig_col_name, dim3_key='out_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "odr_skims": odr_skim_stack_wrapper, "dor_skims": dor_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, 'out_time_col_name': out_time_col_name, 'in_time_col_name': in_time_col_name } if network_los.zone_system == los.THREE_ZONE: # fixme - is this a lightweight object? tvpb = network_los.tvpb tvpb_logsum_odt = tvpb.wrap_logsum(orig_key=orig_col_name, dest_key=dest_col_name, tod_key='out_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_odt') tvpb_logsum_dot = tvpb.wrap_logsum(orig_key=dest_col_name, dest_key=orig_col_name, tod_key='in_period', segment_key='demographic_segment', cache_choices=True, trace_label=trace_label, tag='tvpb_logsum_dot') skims.update({ 'tvpb_logsum_odt': tvpb_logsum_odt, 'tvpb_logsum_dot': tvpb_logsum_dot }) # TVPB constants can appear in expressions constants.update( network_los.setting('TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) estimator = estimation.manager.begin_estimation( 'atwork_subtour_mode_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_coefficients_template(model_settings=model_settings) estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) # FIXME run_tour_mode_choice_simulate writes choosers post-annotation choices_df = run_tour_mode_choice_simulate( subtours_merged, tour_purpose='atwork', model_settings=model_settings, mode_column_name=mode_column_name, logsum_column_name=logsum_column_name, network_los=network_los, skims=skims, constants=constants, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='tour_mode_choice') # add cached tvpb_logsum tap choices for modes specified in tvpb_mode_path_types if network_los.zone_system == los.THREE_ZONE: tvpb_mode_path_types = model_settings.get('tvpb_mode_path_types') for mode, path_types in tvpb_mode_path_types.items(): for direction, skim in zip(['od', 'do'], [tvpb_logsum_odt, tvpb_logsum_dot]): path_type = path_types[direction] skim_cache = skim.cache[path_type] print( f"mode {mode} direction {direction} path_type {path_type}") for c in skim_cache: dest_col = f'{direction}_{c}' if dest_col not in choices_df: choices_df[ dest_col] = np.nan if pd.api.types.is_numeric_dtype( skim_cache[c]) else '' choices_df[dest_col].where(choices_df.tour_mode != mode, skim_cache[c], inplace=True) if estimator: estimator.write_choices(choices_df[mode_column_name]) choices_df[mode_column_name] = \ estimator.get_survey_values(choices_df[mode_column_name], 'tours', mode_column_name) estimator.write_override_choices(choices_df[mode_column_name]) estimator.end_estimation() tracing.print_summary('%s choices' % trace_label, choices_df[mode_column_name], value_counts=True) assign_in_place(tours, choices_df) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label=tracing.extend_trace_label( trace_label, mode_column_name), slicer='tour_id', index_label='tour_id')
def trip_mode_choice( trips, tours_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Trip mode choice - compute trip_mode (same values as for tour_mode) for each trip. Modes for each primary tour putpose are calculated separately because they have different coefficient values (stored in trip_mode_choice_coeffs.csv coefficient file.) Adds trip_mode column to trip table """ trace_label = 'trip_mode_choice' model_settings = config.read_model_settings('trip_mode_choice.yaml') model_spec = \ simulate.read_model_spec(file_name=model_settings['SPEC']) omnibus_coefficients = \ assign.read_constant_spec(config.config_file_path(model_settings['COEFFS'])) trips_df = trips.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) tours_merged = tours_merged.to_frame() tours_merged = tours_merged[model_settings['TOURS_MERGED_CHOOSER_COLUMNS']] nest_spec = config.get_logit_model_settings(model_settings) tracing.print_summary('primary_purpose', trips_df.primary_purpose, value_counts=True) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge( trips_df, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # setup skim keys assert ('trip_period' not in trips_merged) trips_merged['trip_period'] = skim_time_period_label(trips_merged.depart) orig_col = 'origin' dest_col = 'destination' odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col, right_key=dest_col, skim_key='trip_period') od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "odt_skims": odt_skim_stack_wrapper, "od_skims": od_skim_wrapper, } constants = config.get_model_constants(model_settings) constants.update({ 'ORIGIN': orig_col, 'DESTINATION': dest_col }) choices_list = [] for primary_purpose, trips_segment in trips_merged.groupby('primary_purpose'): segment_trace_label = tracing.extend_trace_label(trace_label, primary_purpose) logger.info("trip_mode_choice tour_type '%s' (%s trips)" % (primary_purpose, len(trips_segment.index), )) # name index so tracing knows how to slice assert trips_segment.index.name == 'trip_id' locals_dict = assign.evaluate_constants(omnibus_coefficients[primary_purpose], constants=constants) locals_dict.update(constants) annotate_preprocessors( trips_segment, locals_dict, skims, model_settings, segment_trace_label) locals_dict.update(skims) choices = simulate.simple_simulate( choosers=trips_segment, spec=model_spec, nest_spec=nest_spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, trace_label=segment_trace_label, trace_choice_name='trip_mode_choice') alts = model_spec.columns choices = choices.map(dict(list(zip(list(range(len(alts))), alts)))) # tracing.print_summary('trip_mode_choice %s choices' % primary_purpose, # choices, value_counts=True) if trace_hh_id: # trace the coefficients tracing.trace_df(pd.Series(locals_dict), label=tracing.extend_trace_label(segment_trace_label, 'constants'), transpose=False, slicer='NONE') # so we can trace with annotations trips_segment['trip_mode'] = choices tracing.trace_df(trips_segment, label=tracing.extend_trace_label(segment_trace_label, 'trip_mode'), slicer='tour_id', index_label='tour_id', warn_if_empty=True) choices_list.append(choices) # FIXME - force garbage collection force_garbage_collect() choices = pd.concat(choices_list) trips_df = trips.to_frame() trips_df['trip_mode'] = choices tracing.print_summary('tour_modes', trips_merged.tour_mode, value_counts=True) tracing.print_summary('trip_mode_choice choices', choices, value_counts=True) assert not trips_df.trip_mode.isnull().any() pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=tracing.extend_trace_label(trace_label, 'trip_mode'), slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def best_transit_path(set_random_seed, network_los, best_transit_path_spec): model_settings = config.read_model_settings('best_transit_path.yaml') logger.info("best_transit_path VECTOR_TEST_SIZE %s", VECTOR_TEST_SIZE) omaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index dmaz = network_los.maz_df.sample(VECTOR_TEST_SIZE, replace=True).index tod = np.random.choice(['AM', 'PM'], VECTOR_TEST_SIZE) od_df = pd.DataFrame({'omaz': omaz, 'dmaz': dmaz, 'tod': tod}) trace_od = (od_df.omaz[0], od_df.dmaz[0]) logger.info("trace_od omaz %s dmaz %s" % trace_od) # build exploded atap_btap_df # FIXME - pathological knowledge about mode - should be parameterized # filter out rows with no drive time omaz-btap or no walk time from dmaz-atap atap_btap_df = network_los.get_tappairs_mazpairs(od_df.omaz, od_df.dmaz, ofilter='drive_time', dfilter='walk_alightingActual') # add in tod column atap_btap_df = atap_btap_df.merge( right=od_df[['tod']], left_on='idx', right_index=True, how='left' ) logger.info("len od_df %s", len(od_df.index)) logger.info("len atap_btap_df %s", len(atap_btap_df.index)) logger.info("avg explosion %s", (len(atap_btap_df.index) / (1.0 * len(od_df.index)))) if trace_od: trace_orig, trace_dest = trace_od trace_oabd_rows = (atap_btap_df.omaz == trace_orig) & (atap_btap_df.dmaz == trace_dest) else: trace_oabd_rows = None constants = config.get_model_constants(model_settings) locals_d = { 'np': np, 'network_los': network_los } if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(best_transit_path_spec, atap_btap_df, locals_d, trace_rows=trace_oabd_rows) # copy results for column in results.columns: atap_btap_df[column] = results[column] # drop rows if no utility n = len(atap_btap_df.index) atap_btap_df = atap_btap_df.dropna(subset=['utility']) logger.info("Dropped %s of %s rows with null utility", n - len(atap_btap_df.index), n) # choose max utility atap_btap_df = atap_btap_df.sort_values(by='utility').groupby('idx').tail(1) if trace_od: if not trace_oabd_rows.any(): logger.warning("trace_od not found origin = %s, dest = %s", trace_orig, trace_dest) else: tracing.trace_df(atap_btap_df, label='best_transit_path', slicer='NONE', transpose=False) tracing.trace_df(trace_results, label='trace_best_transit_path', slicer='NONE', transpose=False) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="trace_best_transit_path_locals")
def joint_tour_scheduling( tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each joint tour """ trace_label = 'joint_tour_scheduling' model_settings = config.read_model_settings('joint_tour_scheduling.yaml') model_spec = simulate.read_model_spec(file_name='tour_scheduling_joint.csv') tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results(trace_label) return # use inject.get_table as this won't exist if there are no joint_tours joint_tour_participants = inject.get_table('joint_tour_participants').to_frame() persons_merged = persons_merged.to_frame() logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0]) # it may seem peculiar that we are concerned with persons rather than households # but every joint tour is (somewhat arbitrarily) assigned a "primary person" # some of whose characteristics are used in the spec # and we get household attributes along with person attributes in persons_merged persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0] # since a households joint tours each potentially different participants # they may also have different joint tour masks (free time of all participants) # so we have to either chunk processing by joint_tour_num and build timetable by household # or build timetables by unique joint_tour constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=joint_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) tdd_choices, timetable = vectorize_joint_tour_scheduling( joint_tours, joint_tour_participants, persons_merged, tdd_alts, spec=model_spec, model_settings=model_settings, chunk_size=chunk_size, trace_label=trace_label) timetable.replace_table() assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) # updated df for tracing joint_tours = tours[tours.tour_category == 'joint'] if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_scheduling", slicer='household_id')
def compute_accessibility(accessibility, skim_dict, land_use, trace_od): """ Compute accessibility for each zone in land use file using expressions from accessibility_spec The actual results depend on the expressions in accessibility_spec, but this is initially intended to permit implementation of the mtc accessibility calculation as implemented by Accessibility.job Compute measures of accessibility used by the automobile ownership model. The accessibility measure first multiplies an employment variable by a mode-specific decay function. The product reflects the difficulty of accessing the activities the farther (in terms of round-trip travel time) the jobs are from the location in question. The products to each destination zone are next summed over each origin zone, and the logarithm of the product mutes large differences. The decay function on the walk accessibility measure is steeper than automobile or transit. The minimum accessibility is zero. """ trace_label = 'compute_accessibility' model_settings = config.read_model_settings('accessibility.yaml') assignment_spec = assign.read_assignment_spec(config.config_file_path('accessibility.csv')) accessibility_df = accessibility.to_frame() logger.info("Running %s with %d dest zones" % (trace_label, len(accessibility_df))) constants = config.get_model_constants(model_settings) land_use_columns = model_settings.get('land_use_columns', []) land_use_df = land_use.to_frame() # #bug # # land_use_df = land_use_df[land_use_df.index % 2 == 1] # accessibility_df = accessibility_df[accessibility_df.index.isin(land_use_df.index)].head(5) # # print "land_use_df", land_use_df.index # print "accessibility_df", accessibility_df.index # #bug orig_zones = accessibility_df.index.values dest_zones = land_use_df.index.values orig_zone_count = len(orig_zones) dest_zone_count = len(dest_zones) logger.info("Running %s with %d dest zones %d orig zones" % (trace_label, dest_zone_count, orig_zone_count)) # create OD dataframe od_df = pd.DataFrame( data={ 'orig': np.repeat(np.asanyarray(accessibility_df.index), dest_zone_count), 'dest': np.tile(np.asanyarray(land_use_df.index), orig_zone_count) } ) if trace_od: trace_orig, trace_dest = trace_od trace_od_rows = (od_df.orig == trace_orig) & (od_df.dest == trace_dest) else: trace_od_rows = None # merge land_use_columns into od_df land_use_df = land_use_df[land_use_columns] od_df = pd.merge(od_df, land_use_df, left_on='dest', right_index=True).sort_index() locals_d = { 'log': np.log, 'exp': np.exp, 'skim_od': AccessibilitySkims(skim_dict, orig_zones, dest_zones), 'skim_do': AccessibilitySkims(skim_dict, orig_zones, dest_zones, transpose=True) } if constants is not None: locals_d.update(constants) results, trace_results, trace_assigned_locals \ = assign.assign_variables(assignment_spec, od_df, locals_d, trace_rows=trace_od_rows) for column in results.columns: data = np.asanyarray(results[column]) data.shape = (orig_zone_count, dest_zone_count) accessibility_df[column] = np.log(np.sum(data, axis=1) + 1) # - write table to pipeline pipeline.replace_table("accessibility", accessibility_df) if trace_od: if not trace_od_rows.any(): logger.warning("trace_od not found origin = %s, dest = %s" % (trace_orig, trace_dest)) else: # add OD columns to trace results df = pd.concat([od_df[trace_od_rows], trace_results], axis=1) # dump the trace results table (with _temp variables) to aid debugging tracing.trace_df(df, label='accessibility', index_label='skim_offset', slicer='NONE', warn_if_empty=True) if trace_assigned_locals: tracing.write_csv(trace_assigned_locals, file_name="accessibility_locals")
def atwork_subtour_scheduling(tours, persons_merged, tdd_alts, skim_dict, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each activity for at work subtours tours """ trace_label = 'atwork_subtour_scheduling' model_settings_file_name = 'tour_scheduling_atwork.yaml' tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results(trace_label) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation( 'atwork_subtour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) persons_merged = persons_merged.to_frame() logger.info("Running %s with %d tours", trace_label, len(subtours)) # preprocessor constants = config.get_model_constants(model_settings) od_skim_wrapper = skim_dict.wrap('origin', 'destination') skims = { "od_skims": od_skim_wrapper, } expressions.annotate_preprocessors(subtours, constants, skims, model_settings, trace_label) # parent_tours table with columns ['tour_id', 'tdd'] index = tour_id parent_tour_ids = subtours.parent_tour_id.astype(np.int64).unique() parent_tours = pd.DataFrame({'tour_id': parent_tour_ids}, index=parent_tour_ids) parent_tours = parent_tours.merge(tours[['tdd']], left_index=True, right_index=True) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) # we don't need to update timetable because subtours are scheduled inside work trip windows choices = vectorize_subtour_scheduling(parent_tours, subtours, persons_merged, tdd_alts, model_spec, model_settings, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table tdd_choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, tdd_choices) pipeline.replace_table("tours", tours) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'atwork'], label="atwork_subtour_scheduling", slicer='person_id', index_label='tour_id', columns=None) if DUMP: subtours = tours[tours.tour_category == 'atwork'] parent_tours = tours[tours.index.isin(subtours.parent_tour_id)] tracing.dump_df(DUMP, subtours, trace_label, 'sub_tours') tracing.dump_df(DUMP, parent_tours, trace_label, 'parent_tours') parent_tours['parent_tour_id'] = parent_tours.index subtours = pd.concat([parent_tours, subtours]) tracing.dump_df( DUMP, tt.tour_map(parent_tours, subtours, tdd_alts, persons_id_col='parent_tour_id'), trace_label, 'tour_map')
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' model_settings = config.read_model_settings('mandatory_tour_frequency.yaml') model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt') choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons", len(choosers)) # - if no mandatory tours if choosers.shape[0] == 0: add_null_results(trace_label, model_settings) return # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {} expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( model_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) # - create mandatory tours """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ choosers['mandatory_tour_frequency'] = choices mandatory_tours = process_mandatory_tours( persons=choosers, mandatory_tour_frequency_alts=alternatives ) tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', mandatory_tours) pipeline.get_rn_generator().add_channel('tours', mandatory_tours) # - annotate persons persons = inject.get_table('persons').to_frame() # need to reindex as we only handled persons with cdap_activity == 'M' persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True) tracing.trace_df(persons, label="mandatory_tour_frequency.persons", warn_if_empty=True)
def __init__(self, model_settings, num_processes, shared_data=None, shared_data_lock=None): """ Presence of shared_data is used as a flag for multiprocessing If we are multiprocessing, shared_data should be a multiprocessing.RawArray buffer to aggregate modeled_size across all sub-processes, and shared_data_lock should be a multiprocessing.Lock object to coordinate access to that buffer. Optionally load saved shadow_prices from data_dir if config setting use_shadow_pricing and shadow_setting LOAD_SAVED_SHADOW_PRICES are both True Parameters ---------- model_settings : dict shared_data : multiprocessing.Array or None (if single process) shared_data_lock : numpy array wrapping multiprocessing.RawArray or None (if single process) """ self.num_processes = num_processes self.use_shadow_pricing = bool(config.setting('use_shadow_pricing')) self.saved_shadow_price_file_path = None # set by read_saved_shadow_prices if loaded self.model_selector = model_settings['MODEL_SELECTOR'] full_model_run = config.setting('households_sample_size') == 0 if self.use_shadow_pricing and not full_model_run: logging.warning("deprecated combination of use_shadow_pricing and not full_model_run") self.segment_ids = model_settings['SEGMENT_IDS'] # - modeled_size (set by call to set_choices/synchronize_choices) self.modeled_size = None if self.use_shadow_pricing: self.shadow_settings = config.read_model_settings('shadow_pricing.yaml') for k in self.shadow_settings: logger.debug("shadow_settings %s: %s" % (k, self.shadow_settings.get(k))) # - destination_size_table (desired_size) self.desired_size = inject.get_table(size_table_name(self.model_selector)).to_frame() # - shared_data if shared_data is not None: assert shared_data.shape[0] == self.desired_size.shape[0] assert shared_data.shape[1] == self.desired_size.shape[1] + 1 # tally column assert shared_data_lock is not None self.shared_data = shared_data self.shared_data_lock = shared_data_lock # - load saved shadow_prices (if available) and set max_iterations accordingly if self.use_shadow_pricing: self.shadow_prices = None self.shadow_price_method = self.shadow_settings['SHADOW_PRICE_METHOD'] assert self.shadow_price_method in ['daysim', 'ctramp'] if self.shadow_settings['LOAD_SAVED_SHADOW_PRICES']: # read_saved_shadow_prices logs error and returns None if file not found self.shadow_prices = self.read_saved_shadow_prices(model_settings) if self.shadow_prices is None: self.max_iterations = self.shadow_settings.get('MAX_ITERATIONS', 5) else: self.max_iterations = self.shadow_settings.get('MAX_ITERATIONS_SAVED', 1) # initial_shadow_price if we did not load if self.shadow_prices is None: # initial value depends on method initial_shadow_price = 1.0 if self.shadow_price_method == 'ctramp' else 0.0 self.shadow_prices = \ pd.DataFrame(data=initial_shadow_price, columns=self.desired_size.columns, index=self.desired_size.index) else: self.max_iterations = 1 self.num_fail = pd.DataFrame(index=self.desired_size.columns) self.max_abs_diff = pd.DataFrame(index=self.desired_size.columns) self.max_rel_diff = pd.DataFrame(index=self.desired_size.columns)
def parking_location(trips, trips_merged, land_use, network_los, chunk_size, trace_hh_id): """ Given a set of trips, each trip needs to have a parking location if it is eligible for remote parking. """ trace_label = 'parking_location' model_settings = config.read_model_settings('parking_location_choice.yaml') alt_destination_col_name = model_settings['ALT_DEST_COL_NAME'] preprocessor_settings = model_settings.get('PREPROCESSOR', None) trips_df = trips.to_frame() trips_merged_df = trips_merged.to_frame() land_use_df = land_use.to_frame() locals_dict = {'network_los': network_los} locals_dict.update(config.get_model_constants(model_settings)) if preprocessor_settings: expressions.assign_columns(df=trips_merged_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) parking_locations, save_sample_df = run_parking_destination( model_settings, trips_merged_df, land_use_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, ) assign_in_place(trips_df, parking_locations.to_frame(alt_destination_col_name)) pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True) if save_sample_df is not None: assert len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) sample_table_name = model_settings.get( 'PARKING_LOCATION_SAMPLE_TABLE_NAME') assert sample_table_name is not None logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) # lest they try to put tour samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df)
def compute_logsums( primary_purpose, trips, destination_sample, tours_merged, model_settings, skims, chunk_size, trace_hh_id, trace_label): """ Calculate mode choice logsums using the same recipe as for trip_mode_choice, but do it twice for each alternative since we need out-of-direction logsum (i.e . origin to alt_dest, and alt_dest to half-tour destination) Returns ------- adds od_logsum and dp_logsum columns to trips (in place) """ trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums') logger.info("Running %s with %d samples", trace_label, destination_sample.shape[0]) # - trips_merged - merge trips and tours_merged trips_merged = pd.merge( trips, tours_merged, left_on='tour_id', right_index=True, how="left") assert trips_merged.index.equals(trips.index) # - choosers - merge destination_sample and trips_merged # re/set index because pandas merge does not preserve left index if it has duplicate values! choosers = pd.merge(destination_sample, trips_merged.reset_index(), left_index=True, right_on='trip_id', how="left", suffixes=('', '_r')).set_index('trip_id') assert choosers.index.equals(destination_sample.index) logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) omnibus_coefficient_spec = \ assign.read_constant_spec(config.config_file_path(logsum_settings['COEFFS'])) coefficient_spec = omnibus_coefficient_spec[primary_purpose] constants = config.get_model_constants(logsum_settings) locals_dict = assign.evaluate_constants(coefficient_spec, constants=constants) locals_dict.update(constants) # - od_logsums od_skims = { 'ORIGIN': model_settings['TRIP_ORIGIN'], 'DESTINATION': model_settings['ALT_DEST'], "odt_skims": skims['odt_skims'], "od_skims": skims['od_skims'], } destination_sample['od_logsum'] = compute_ood_logsums( choosers, logsum_settings, od_skims, locals_dict, chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'od')) # - dp_logsums dp_skims = { 'ORIGIN': model_settings['ALT_DEST'], 'DESTINATION': model_settings['PRIMARY_DEST'], "odt_skims": skims['dpt_skims'], "od_skims": skims['dp_skims'], } destination_sample['dp_logsum'] = compute_ood_logsums( choosers, logsum_settings, dp_skims, locals_dict, chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'dp'))
def mandatory_scheduling_settings(configs_dir): return config.read_model_settings(configs_dir, 'mandatory_scheduling.yaml')
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making at-work subtour tours (alternatives for this model come from a separate csv file which is configured by the user). """ trace_label = 'atwork_subtour_frequency' model_settings_file_name = 'atwork_subtour_frequency.yaml' tours = tours.to_frame() work_tours = tours[tours.tour_type == 'work'] # - if no work_tours if len(work_tours) == 0: add_null_results(trace_label, tours) return model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('atwork_subtour_frequency') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) alternatives = simulate.read_model_alts( 'atwork_subtour_frequency_alternatives.csv', set_index='alt') # merge persons into work_tours persons_merged = persons_merged.to_frame() work_tours = pd.merge(work_tours, persons_merged, left_on='person_id', right_index=True) logger.info("Running atwork_subtour_frequency with %d work tours", len(work_tours)) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: expressions.assign_columns(df=work_tours, model_settings=preprocessor_settings, trace_label=trace_label) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(work_tours) choices = simulate.simple_simulate( choosers=work_tours, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='atwork_subtour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'atwork_subtour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # add atwork_subtour_frequency column to tours # reindex since we are working with a subset of tours tours['atwork_subtour_frequency'] = choices.reindex(tours.index) pipeline.replace_table("tours", tours) # - create atwork_subtours based on atwork_subtour_frequency choice names work_tours = tours[tours.tour_type == 'work'] assert not work_tours.atwork_subtour_frequency.isnull().any() subtours = process_atwork_subtours(work_tours, alternatives) tours = pipeline.extend_table("tours", subtours) tracing.register_traceable_table('tours', subtours) pipeline.get_rn_generator().add_channel('tours', subtours) tracing.print_summary('atwork_subtour_frequency', tours.atwork_subtour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
def joint_tour_scheduling( tours, persons_merged, tdd_alts, chunk_size, trace_hh_id): """ This model predicts the departure time and duration of each joint tour """ trace_label = 'joint_tour_scheduling' model_settings_file_name = 'joint_tour_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results(trace_label) return # use inject.get_table as this won't exist if there are no joint_tours joint_tour_participants = inject.get_table('joint_tour_participants').to_frame() persons_merged = persons_merged.to_frame() logger.info("Running %s with %d joint tours", trace_label, joint_tours.shape[0]) # it may seem peculiar that we are concerned with persons rather than households # but every joint tour is (somewhat arbitrarily) assigned a "primary person" # some of whose characteristics are used in the spec # and we get household attributes along with person attributes in persons_merged persons_merged = persons_merged[persons_merged.num_hh_joint_tours > 0] # since a households joint tours each potentially different participants # they may also have different joint tour masks (free time of all participants) # so we have to either chunk processing by joint_tour_num and build timetable by household # or build timetables by unique joint_tour constants = config.get_model_constants(model_settings) # - run preprocessor to annotate choosers preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=joint_tours, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) timetable = inject.get_injectable("timetable") estimator = estimation.manager.begin_estimation('joint_tour_scheduling') model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) timetable.begin_transaction(estimator) choices = vectorize_joint_tour_scheduling( joint_tours, joint_tour_participants, persons_merged, tdd_alts, timetable, spec=model_spec, model_settings=model_settings, estimator=estimator, chunk_size=chunk_size, trace_label=trace_label) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'tours', 'tdd') estimator.write_override_choices(choices) estimator.end_estimation() # update timetable to reflect the override choices (assign tours in tour_num order) timetable.rollback() for tour_num, nth_tours in joint_tours.groupby('tour_num', sort=True): nth_participants = \ joint_tour_participants[joint_tour_participants.tour_id.isin(nth_tours.index)] estimator.log("assign timetable for %s participants in %s tours with tour_num %s" % (len(nth_participants), len(nth_tours), tour_num)) # - update timetables of all joint tour participants timetable.assign(nth_participants.person_id, reindex(choices, nth_participants.tour_id)) timetable.replace_table() # choices are tdd alternative ids # we want to add start, end, and duration columns to tours, which we have in tdd_alts table choices = pd.merge(choices.to_frame('tdd'), tdd_alts, left_on=['tdd'], right_index=True, how='left') assign_in_place(tours, choices) pipeline.replace_table("tours", tours) # updated df for tracing joint_tours = tours[tours.tour_category == 'joint'] if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_scheduling", slicer='household_id')
def trip_purpose_and_destination( trips, tours_merged, chunk_size, trace_hh_id): trace_label = "trip_purpose_and_destination" model_settings = config.read_model_settings('trip_purpose_and_destination.yaml') MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() if trips_df.empty: logger.info("%s - no trips. Nothing to do." % trace_label) return # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates assert (MAX_ITERATIONS > 0) # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry if 'destination' in trips_df: if trips_df.failed.any(): logger.info('trip_destination has already been run. Rerunning failed trips') flag_failed_trip_leg_mates(trips_df, 'failed') trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)] logger.info('Rerunning %s failed trips and leg-mates' % trips_df.shape[0]) else: # no failed trips from prior run of trip_destination logger.info("%s - no failed trips from prior model run." % trace_label) del trips_df['failed'] pipeline.replace_table("trips", trips_df) return results = [] i = 0 RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed'] while True: i += 1 for c in RESULT_COLUMNS: if c in trips_df: del trips_df[c] trips_df = run_trip_purpose_and_destination( trips_df, tours_merged_df, chunk_size, trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, "i%s" % i)) num_failed_trips = trips_df.failed.sum() # if there were no failed trips, we are done if num_failed_trips == 0: results.append(trips_df[RESULT_COLUMNS]) break logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i)) file_name = "%s_i%s_failed_trips" % (trace_label, i) logger.info("writing failed trips to %s" % file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) # if max iterations reached, add remaining trips to results and give up # note that we do this BEFORE failing leg_mates so resulting trip legs are complete if i >= MAX_ITERATIONS: logger.warning("%s too many iterations %s" % (trace_label, i)) results.append(trips_df[RESULT_COLUMNS]) break # otherwise, if any trips failed, then their leg-mates trips must also fail flag_failed_trip_leg_mates(trips_df, 'failed') # add the good trips to results results.append(trips_df[~trips_df.failed][RESULT_COLUMNS]) # and keep the failed ones to retry trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin(trips_df.tour_id)] # - assign result columns to trips results = pd.concat(results) logger.info("%s %s failed trips after %s iterations" % (trace_label, results.failed.sum(), i)) trips_df = trips.to_frame() assign_in_place(trips_df, results) trips_df = cleanup_failed_trips(trips_df) pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, network_los, skims, trace_label): """ compute logsums for tours using skims for alt_tdd out_period and in_period """ trace_label = tracing.extend_trace_label(trace_label, 'logsums') with chunk.chunk_log(trace_label): logsum_settings = config.read_model_settings( model_settings['LOGSUM_SETTINGS']) choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser') logger.info( f"{trace_label} compute_logsums for {choosers.shape[0]} choosers {alt_tdd.shape[0]} alts" ) # - locals_dict constants = config.get_model_constants(logsum_settings) locals_dict = {} locals_dict.update(constants) if network_los.zone_system == los.THREE_ZONE: # TVPB constants can appear in expressions locals_dict.update( network_los.setting( 'TVPB_SETTINGS.tour_mode_choice.CONSTANTS')) locals_dict.update(skims) # constrained coefficients can appear in expressions coefficients = simulate.get_segment_coefficients( logsum_settings, tour_purpose) locals_dict.update(coefficients) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - compute logsums logsum_spec = simulate.read_model_spec( file_name=logsum_settings['SPEC']) logsum_spec = simulate.eval_coefficients(logsum_spec, coefficients, estimator=None) nest_spec = config.get_logit_model_settings(logsum_settings) nest_spec = simulate.eval_nest_coefficients(nest_spec, coefficients, trace_label) logsums = simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=0, trace_label=trace_label) return logsums
def cdap_simulate(persons_merged, persons, households, cdap_indiv_spec, cdap_interaction_coefficients, cdap_fixed_relative_proportions, chunk_size, trace_hh_id): """ CDAP stands for Coordinated Daily Activity Pattern, which is a choice of high-level activity pattern for each person, in a coordinated way with other members of a person's household. Because Python requires vectorization of computation, there are some specialized routines in the cdap directory of activitysim for this purpose. This module simply applies those utilities using the simulation framework. """ trace_label = 'cdap' model_settings = config.read_model_settings('cdap.yaml') persons_merged = persons_merged.to_frame() constants = config.get_model_constants(model_settings) cdap_interaction_coefficients = \ cdap.preprocess_interaction_coefficients(cdap_interaction_coefficients) # specs are built just-in-time on demand and cached as injectables # prebuilding here allows us to write them to the output directory # (also when multiprocessing locutor might not see all household sizes) logger.info("Pre-building cdap specs") for hhsize in range(2, cdap.MAX_HHSIZE + 1): spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize, cache=True) if inject.get_injectable('locutor', False): spec.to_csv(config.output_file_path('cdap_spec_%s.csv' % hhsize), index=True) logger.info("Running cdap_simulate with %d persons", len(persons_merged.index)) choices = cdap.run_cdap( persons=persons_merged, cdap_indiv_spec=cdap_indiv_spec, cdap_interaction_coefficients=cdap_interaction_coefficients, cdap_fixed_relative_proportions=cdap_fixed_relative_proportions, locals_d=constants, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label) # - assign results to persons table and annotate persons = persons.to_frame() choices = choices.reindex(persons.index) persons['cdap_activity'] = choices.cdap_activity persons['cdap_rank'] = choices.cdap_rank expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) # - annotate households table households = households.to_frame() expressions.assign_columns( df=households, model_settings=model_settings.get('annotate_households'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_households')) pipeline.replace_table("households", households) tracing.print_summary('cdap_activity', persons.cdap_activity, value_counts=True) logger.info("cdap crosstabs:\n%s" % pd.crosstab(persons.ptype, persons.cdap_activity, margins=True)) if trace_hh_id: tracing.trace_df(inject.get_table('persons_merged').to_frame(), label="cdap", columns=['ptype', 'cdap_rank', 'cdap_activity'], warn_if_empty=True)
def stop_frequency( tours, tours_merged, stop_frequency_alts, skim_dict, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings = config.read_model_settings('stop_frequency.yaml') tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper } if constants is not None: locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) choices_list = [] for segment_type, choosers in tours_merged.groupby('primary_purpose'): logging.info("%s running segment %s with %s chooser rows" % (trace_label, segment_type, choosers.shape[0])) spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type) assert spec is not None, "spec for segment_type %s not found" % segment_type choices = simulate.simple_simulate( choosers=choosers, spec=spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_type), trace_choice_name='stops') # convert indexes to alternative names choices = pd.Series(spec.columns[choices.values], index=choices.index) choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) if 'primary_purpose' not in tours.columns: assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def _compute_logsums(alt_tdd, tours_merged, tour_purpose, model_settings, trace_label): """ compute logsums for tours using skims for alt_tdd out_period and in_period """ trace_label = tracing.extend_trace_label(trace_label, 'logsums') logsum_settings = config.read_model_settings(model_settings['LOGSUM_SETTINGS']) choosers = alt_tdd.join(tours_merged, how='left', rsuffix='_chooser') logger.info("%s compute_logsums for %d choosers%s alts" % (trace_label, choosers.shape[0], alt_tdd.shape[0])) # - setup skims skim_dict = inject.get_injectable('skim_dict') skim_stack = inject.get_injectable('skim_stack') orig_col_name = 'TAZ' dest_col_name = model_settings.get('DESTINATION_FOR_TOUR_PURPOSE').get(tour_purpose) odt_skim_stack_wrapper = skim_stack.wrap(left_key=orig_col_name, right_key=dest_col_name, skim_key='out_period') dot_skim_stack_wrapper = skim_stack.wrap(left_key=dest_col_name, right_key=orig_col_name, skim_key='in_period') od_skim_stack_wrapper = skim_dict.wrap(orig_col_name, dest_col_name) skims = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper, 'orig_col_name': orig_col_name, 'dest_col_name': dest_col_name, } # - locals_dict constants = config.get_model_constants(logsum_settings) omnibus_coefficient_spec = get_coeffecients_spec(logsum_settings) coefficient_spec = omnibus_coefficient_spec[tour_purpose] coefficients = assign.evaluate_constants(coefficient_spec, constants=constants) locals_dict = {} locals_dict.update(coefficients) locals_dict.update(constants) locals_dict.update(skims) # - run preprocessor to annotate choosers # allow specification of alternate preprocessor for nontour choosers preprocessor = model_settings.get('LOGSUM_PREPROCESSOR', 'preprocessor') preprocessor_settings = logsum_settings[preprocessor] if preprocessor_settings: simulate.set_skim_wrapper_targets(choosers, skims) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - compute logsums logsum_spec = get_logsum_spec(logsum_settings) nest_spec = config.get_logit_model_settings(logsum_settings) logsums = simulate.simple_simulate_logsums( choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_dict, chunk_size=0, trace_label=trace_label) return logsums
def telecommute_frequency( persons_merged, persons, chunk_size, trace_hh_id): """ This model predicts the frequency of telecommute for a person (worker) who does not works from home. The alternatives of this model are 'No Telecommute', '1 day per week', '2 to 3 days per week' and '4 days per week'. This model reflects the choices of people who prefer a combination of working from home and office during a week. """ trace_label = 'telecommute_frequency' model_settings_file_name = 'telecommute_frequency.yaml' choosers = persons_merged.to_frame() choosers = choosers[choosers.workplace_zone_id > -1] logger.info("Running %s with %d persons", trace_label, len(choosers)) model_settings = config.read_model_settings(model_settings_file_name) estimator = estimation.manager.begin_estimation('telecommute_frequency') constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_d = {} if constants is not None: locals_d.update(constants) expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_d, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) if estimator: estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_spec(model_settings) estimator.write_coefficients(coefficients_df) estimator.write_choosers(choosers) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='telecommute_frequency', estimator=estimator) choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'persons', 'telecommute_frequency') estimator.write_override_choices(choices) estimator.end_estimation() persons = persons.to_frame() persons['telecommute_frequency'] = choices.reindex(persons.index).fillna('').astype(str) pipeline.replace_table("persons", persons) tracing.print_summary('telecommute_frequency', persons.telecommute_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(persons, label=trace_label, warn_if_empty=True)