def trip_scheduling(trips, tours, chunk_size, trace_hh_id): """ Trip scheduling assigns depart times for trips within the start, end limits of the tour. The algorithm is simplistic: The first outbound trip starts at the tour start time, and subsequent outbound trips are processed in trip_num order, to ensure that subsequent trips do not depart before the trip that preceeds them. Inbound trips are handled similarly, except in reverse order, starting with the last trip, and working backwards to ensure that inbound trips do not depart after the trip that succeeds them. The probability spec assigns probabilities for depart times, but those possible departs must be clipped to disallow depart times outside the tour limits, the departs of prior trips, and in the case of work tours, the start/end times of any atwork subtours. Scheduling can fail if the probability table assigns zero probabilities to all the available depart times in a trip's depart window. (This could be avoided by giving every window a small probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe this is due to the its having been generated from a small household travel survey sample that lacked any departs for some time periods.) Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent trip's ability to schedule a depart within the resulting window. But it can also happen if a tour is very short (e.g. one time period) and the prob spec having a zero probability for that tour hour. Therefore we need to handle trips that could not be scheduled. There are two ways (at least) to solve this problem: 1) choose_most_initial simply assign a depart time to the trip, even if it has a zero probability. It makes most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips are minimally impacted. This can be done in the final iteration, thus affecting only the trips that could no be scheduled by the standard approach 2) drop_and_cleanup drop trips that could no be scheduled, and adjust their leg mates, as is done for failed trips in trip_destination. Which option is applied is determined by the FAILFIX model setting """ trace_label = "trip_scheduling" model_settings_file_name = 'trip_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) trips_df = trips.to_frame() tours = tours.to_frame() # add columns 'tour_hour', 'earliest', 'latest' to trips set_tour_hour(trips_df, tours) # trip_scheduling is a probabilistic model ane we don't support estimation, # but we do need to override choices in estimation mode estimator = estimation.manager.begin_estimation('trip_scheduling') if estimator: estimator.write_spec(model_settings, tag='PROBS_SPEC') estimator.write_model_settings(model_settings, model_settings_file_name) chooser_cols_for_estimation = [ 'person_id', 'household_id', 'tour_id', 'trip_num', 'trip_count', 'primary_purpose', 'outbound', 'earliest', 'latest', 'tour_hour', ] estimator.write_choosers(trips_df[chooser_cols_for_estimation]) probs_spec = pd.read_csv( config.config_file_path('trip_scheduling_probs.csv'), comment='#') # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation # coefficients_df = simulate.read_model_coefficients(model_settings) # probs_spec = map_coefficients(probs_spec, coefficients_df) # add tour-based chunk_id so we can chunk all trips in tour together trips_df['chunk_id'] = reindex( pd.Series(list(range(len(tours))), tours.index), trips_df.tour_id) assert 'DEPART_ALT_BASE' in model_settings failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT) max_iterations = model_settings.get('MAX_ITERATIONS', 1) assert max_iterations > 0 choices_list = [] for chunk_i, trips_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers_by_chunk_id( trips_df, chunk_size, trace_label, trace_label): i = 0 while (i < max_iterations) and not trips_chunk.empty: # only chunk log first iteration since memory use declines with each iteration with chunk.chunk_log( trace_label) if i == 0 else chunk.chunk_log_skip(): i += 1 is_last_iteration = (i == max_iterations) trace_label_i = tracing.extend_trace_label( trace_label, "i%s" % i) logger.info("%s scheduling %s trips within chunk %s", trace_label_i, trips_chunk.shape[0], chunk_i) choices = \ run_trip_scheduling( trips_chunk, tours, probs_spec, model_settings, estimator=estimator, is_last_iteration=is_last_iteration, trace_hh_id=trace_hh_id, chunk_size=chunk_size, chunk_tag=trace_label, trace_label=trace_label_i) # boolean series of trips whose individual trip scheduling failed failed = choices.reindex(trips_chunk.index).isnull() logger.info("%s %s failed", trace_label_i, failed.sum()) if not is_last_iteration: # boolean series of trips whose leg scheduling failed failed_cohorts = failed_trip_cohorts(trips_chunk, failed) trips_chunk = trips_chunk[failed_cohorts] choices = choices[~failed_cohorts] choices_list.append(choices) trips_df = trips.to_frame() choices = pd.concat(choices_list) choices = choices.reindex(trips_df.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'trips', 'depart') # override choices estimator.write_override_choices(choices) estimator.end_estimation() assert not choices.isnull().any() if choices.isnull().any(): logger.warning( "%s of %s trips could not be scheduled after %s iterations" % (choices.isnull().sum(), trips_df.shape[0], i)) if failfix != FAILFIX_DROP_AND_CLEANUP: raise RuntimeError("%s setting '%s' not enabled in settings" % (FAILFIX, FAILFIX_DROP_AND_CLEANUP)) trips_df['failed'] = choices.isnull() trips_df = cleanup_failed_trips(trips_df) choices = choices.reindex(trips_df.index) trips_df['depart'] = choices assert not trips_df.depart.isnull().any() pipeline.replace_table("trips", trips_df)
def trip_scheduling(trips, tours, chunk_size, trace_hh_id): """ Trip scheduling assigns depart times for trips within the start, end limits of the tour. The algorithm is simplistic: The first outbound trip starts at the tour start time, and subsequent outbound trips are processed in trip_num order, to ensure that subsequent trips do not depart before the trip that preceeds them. Inbound trips are handled similarly, except in reverse order, starting with the last trip, and working backwards to ensure that inbound trips do not depart after the trip that succeeds them. The probability spec assigns probabilities for depart times, but those possible departs must be clipped to disallow depart times outside the tour limits, the departs of prior trips, and in the case of work tours, the start/end times of any atwork subtours. Scheduling can fail if the probability table assigns zero probabilities to all the available depart times in a trip's depart window. (This could be avoided by giving every window a small probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe this is due to the its having been generated from a small household travel survey sample that lacked any departs for some time periods.) Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent trip's ability to schedule a depart within the resulting window. But it can also happen if a tour is very short (e.g. one time period) and the prob spec having a zero probability for that tour hour. Therefor we need to handle trips that could not be scheduled. There are two ways (at least) to solve this problem: 1) CHOOSE_MOST_INITIAL simply assign a depart time to the trip, even if it has a zero probability. It makes most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips are minimally impacted. This can be done in the final iteration, thus affecting only the trips that could no be scheduled by the standard approach 2) drop_and_cleanup drop trips that could no be scheduled, and adjust their leg mates, as is done for failed trips in trip_destination. For now we are choosing among these approaches with a manifest constant, but this could be made a model setting... """ trace_label = "trip_scheduling" model_settings = config.read_model_settings('trip_scheduling.yaml') assert 'DEPART_ALT_BASE' in model_settings failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT) probs_spec = pd.read_csv( config.config_file_path('trip_scheduling_probs.csv'), comment='#') trips_df = trips.to_frame() tours = tours.to_frame() # add tour-based chunk_id so we can chunk all trips in tour together trips_df['chunk_id'] = \ reindex(pd.Series(list(range(tours.shape[0])), tours.index), trips_df.tour_id) max_iterations = model_settings.get('MAX_ITERATIONS', 1) assert max_iterations > 0 choices_list = [] i = 0 while (i < max_iterations) and not trips_df.empty: i += 1 last_iteration = (i == max_iterations) trace_label_i = tracing.extend_trace_label(trace_label, "i%s" % i) logger.info("%s scheduling %s trips", trace_label_i, trips_df.shape[0]) choices = \ run_trip_scheduling( trips_df, tours, probs_spec, model_settings, last_iteration=last_iteration, trace_hh_id=trace_hh_id, chunk_size=chunk_size, trace_label=trace_label_i) # boolean series of trips whose individual trip scheduling failed failed = choices.reindex(trips_df.index).isnull() logger.info("%s %s failed", trace_label_i, failed.sum()) if not last_iteration: # boolean series of trips whose leg scheduling failed failed_cohorts = failed_trip_cohorts(trips_df, failed) trips_df = trips_df[failed_cohorts] choices = choices[~failed_cohorts] choices_list.append(choices) trips_df = trips.to_frame() choices = pd.concat(choices_list) choices = choices.reindex(trips_df.index) if choices.isnull().any(): logger.warning( "%s of %s trips could not be scheduled after %s iterations" % (choices.isnull().sum(), trips_df.shape[0], i)) if failfix != FAILFIX_DROP_AND_CLEANUP: raise RuntimeError("%s setting '%s' not enabled in settings" % (FAILFIX, FAILFIX_DROP_AND_CLEANUP)) trips_df['failed'] = choices.isnull() trips_df = cleanup_failed_trips(trips_df) choices = choices.reindex(trips_df.index) trips_df['depart'] = choices assert not trips_df.depart.isnull().any() pipeline.replace_table("trips", trips_df)