def run_trip_scheduling( trips, tours, probs_spec, model_settings, estimator, is_last_iteration, chunk_size, chunk_tag, trace_hh_id, trace_label): # only non-initial trips require scheduling, segment handing first such trip in tour will use most space # is_outbound_chooser = (trips.trip_num > 1) & trips.outbound & (trips.primary_purpose != 'atwork') # is_inbound_chooser = (trips.trip_num < trips.trip_count) & ~trips.outbound & (trips.primary_purpose != 'atwork') # num_choosers = (is_inbound_chooser | is_outbound_chooser).sum() result_list = [] for i, trips_chunk, chunk_trace_label \ in chunk.adaptive_chunked_choosers_by_chunk_id(trips, chunk_size, trace_label, chunk_tag): if trips_chunk.outbound.any(): leg_chunk = trips_chunk[trips_chunk.outbound] leg_trace_label = tracing.extend_trace_label(chunk_trace_label, 'outbound') choices = \ schedule_trips_in_leg( outbound=True, trips=leg_chunk, probs_spec=probs_spec, model_settings=model_settings, is_last_iteration=is_last_iteration, trace_hh_id=trace_hh_id, trace_label=leg_trace_label) result_list.append(choices) chunk.log_df(trace_label, f'result_list', result_list) if (~trips_chunk.outbound).any(): leg_chunk = trips_chunk[~trips_chunk.outbound] leg_trace_label = tracing.extend_trace_label(chunk_trace_label, 'inbound') choices = \ schedule_trips_in_leg( outbound=False, trips=leg_chunk, probs_spec=probs_spec, model_settings=model_settings, is_last_iteration=is_last_iteration, trace_hh_id=trace_hh_id, trace_label=leg_trace_label) result_list.append(choices) chunk.log_df(trace_label, f'result_list', result_list) choices = pd.concat(result_list) return choices
def trip_scheduling(trips, tours, chunk_size, trace_hh_id): """ Trip scheduling assigns depart times for trips within the start, end limits of the tour. The algorithm is simplistic: The first outbound trip starts at the tour start time, and subsequent outbound trips are processed in trip_num order, to ensure that subsequent trips do not depart before the trip that preceeds them. Inbound trips are handled similarly, except in reverse order, starting with the last trip, and working backwards to ensure that inbound trips do not depart after the trip that succeeds them. The probability spec assigns probabilities for depart times, but those possible departs must be clipped to disallow depart times outside the tour limits, the departs of prior trips, and in the case of work tours, the start/end times of any atwork subtours. Scheduling can fail if the probability table assigns zero probabilities to all the available depart times in a trip's depart window. (This could be avoided by giving every window a small probability, rather than zero, but the existing mtctm1 prob spec does not do this. I believe this is due to the its having been generated from a small household travel survey sample that lacked any departs for some time periods.) Rescheduling the trips that fail (along with their inbound or outbound leg-mates) can sometimes fix this problem, if it was caused by an earlier trip's depart choice blocking a subsequent trip's ability to schedule a depart within the resulting window. But it can also happen if a tour is very short (e.g. one time period) and the prob spec having a zero probability for that tour hour. Therefore we need to handle trips that could not be scheduled. There are two ways (at least) to solve this problem: 1) choose_most_initial simply assign a depart time to the trip, even if it has a zero probability. It makes most sense, in this case, to assign the 'most initial' depart time, so that subsequent trips are minimally impacted. This can be done in the final iteration, thus affecting only the trips that could no be scheduled by the standard approach 2) drop_and_cleanup drop trips that could no be scheduled, and adjust their leg mates, as is done for failed trips in trip_destination. Which option is applied is determined by the FAILFIX model setting """ trace_label = "trip_scheduling" model_settings_file_name = 'trip_scheduling.yaml' model_settings = config.read_model_settings(model_settings_file_name) trips_df = trips.to_frame() tours = tours.to_frame() # add columns 'tour_hour', 'earliest', 'latest' to trips set_tour_hour(trips_df, tours) # trip_scheduling is a probabilistic model ane we don't support estimation, # but we do need to override choices in estimation mode estimator = estimation.manager.begin_estimation('trip_scheduling') if estimator: estimator.write_spec(model_settings, tag='PROBS_SPEC') estimator.write_model_settings(model_settings, model_settings_file_name) chooser_cols_for_estimation = [ 'person_id', 'household_id', 'tour_id', 'trip_num', 'trip_count', 'primary_purpose', 'outbound', 'earliest', 'latest', 'tour_hour', ] estimator.write_choosers(trips_df[chooser_cols_for_estimation]) probs_spec = pd.read_csv( config.config_file_path('trip_scheduling_probs.csv'), comment='#') # FIXME for now, not really doing estimation for probabilistic model - just overwriting choices # besides, it isn't clear that named coefficients would be helpful if we had some form of estimation # coefficients_df = simulate.read_model_coefficients(model_settings) # probs_spec = map_coefficients(probs_spec, coefficients_df) # add tour-based chunk_id so we can chunk all trips in tour together trips_df['chunk_id'] = reindex( pd.Series(list(range(len(tours))), tours.index), trips_df.tour_id) assert 'DEPART_ALT_BASE' in model_settings failfix = model_settings.get(FAILFIX, FAILFIX_DEFAULT) max_iterations = model_settings.get('MAX_ITERATIONS', 1) assert max_iterations > 0 choices_list = [] for chunk_i, trips_chunk, chunk_trace_label in chunk.adaptive_chunked_choosers_by_chunk_id( trips_df, chunk_size, trace_label, trace_label): i = 0 while (i < max_iterations) and not trips_chunk.empty: # only chunk log first iteration since memory use declines with each iteration with chunk.chunk_log( trace_label) if i == 0 else chunk.chunk_log_skip(): i += 1 is_last_iteration = (i == max_iterations) trace_label_i = tracing.extend_trace_label( trace_label, "i%s" % i) logger.info("%s scheduling %s trips within chunk %s", trace_label_i, trips_chunk.shape[0], chunk_i) choices = \ run_trip_scheduling( trips_chunk, tours, probs_spec, model_settings, estimator=estimator, is_last_iteration=is_last_iteration, trace_hh_id=trace_hh_id, chunk_size=chunk_size, chunk_tag=trace_label, trace_label=trace_label_i) # boolean series of trips whose individual trip scheduling failed failed = choices.reindex(trips_chunk.index).isnull() logger.info("%s %s failed", trace_label_i, failed.sum()) if not is_last_iteration: # boolean series of trips whose leg scheduling failed failed_cohorts = failed_trip_cohorts(trips_chunk, failed) trips_chunk = trips_chunk[failed_cohorts] choices = choices[~failed_cohorts] choices_list.append(choices) trips_df = trips.to_frame() choices = pd.concat(choices_list) choices = choices.reindex(trips_df.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'trips', 'depart') # override choices estimator.write_override_choices(choices) estimator.end_estimation() assert not choices.isnull().any() if choices.isnull().any(): logger.warning( "%s of %s trips could not be scheduled after %s iterations" % (choices.isnull().sum(), trips_df.shape[0], i)) if failfix != FAILFIX_DROP_AND_CLEANUP: raise RuntimeError("%s setting '%s' not enabled in settings" % (FAILFIX, FAILFIX_DROP_AND_CLEANUP)) trips_df['failed'] = choices.isnull() trips_df = cleanup_failed_trips(trips_df) choices = choices.reindex(trips_df.index) trips_df['depart'] = choices assert not trips_df.depart.isnull().any() pipeline.replace_table("trips", trips_df)
def run_cdap(persons, person_type_map, cdap_indiv_spec, cdap_interaction_coefficients, cdap_fixed_relative_proportions, locals_d, chunk_size=0, trace_hh_id=None, trace_label=None): """ Choose individual activity patterns for persons. Parameters ---------- persons : pandas.DataFrame Table of persons data. Must contain at least a household ID, household size, person type category, and age, plus any columns used in cdap_indiv_spec cdap_indiv_spec : pandas.DataFrame CDAP spec for individuals without taking any interactions into account. cdap_interaction_coefficients : pandas.DataFrame Rules and coefficients for generating interaction specs for different household sizes cdap_fixed_relative_proportions : pandas.DataFrame Spec to for the relative proportions of each activity (M, N, H) to choose activities for additional household members not handled by CDAP locals_d : Dict This is a dictionary of local variables that will be the environment for an evaluation of an expression that begins with @ in either the cdap_indiv_spec or cdap_fixed_relative_proportions expression files chunk_size: int Chunk size or 0 for no chunking trace_hh_id : int hh_id to trace or None if no hh tracing trace_label : str label for tracing or None if no tracing Returns ------- choices : pandas.DataFrame dataframe is indexed on _persons_index_ and has two columns: cdap_activity : str activity for that person expressed as 'M', 'N', 'H' """ trace_label = tracing.extend_trace_label(trace_label, 'cdap') result_list = [] # segment by person type and pick the right spec for each person type for i, persons_chunk, chunk_trace_label \ in chunk.adaptive_chunked_choosers_by_chunk_id(persons, chunk_size, trace_label): cdap_results = \ _run_cdap(persons_chunk, person_type_map, cdap_indiv_spec, cdap_interaction_coefficients, cdap_fixed_relative_proportions, locals_d, trace_hh_id, chunk_trace_label) result_list.append(cdap_results) chunk.log_df(trace_label, f'result_list', result_list) # FIXME: this will require 2X RAM # if necessary, could append to hdf5 store on disk: # http://pandas.pydata.org/pandas-docs/stable/io.html#id2 if len(result_list) > 1: cdap_results = pd.concat(result_list) if trace_hh_id: tracing.trace_df(cdap_results, label="cdap", columns=['cdap_rank', 'cdap_activity'], warn_if_empty=True) # return choices column as series return cdap_results['cdap_activity']
def apply_stage_two_model(omnibus_spec, trips, chunk_size, trace_label): if not trips.index.is_monotonic: trips = trips.sort_index() # Assign the duration of the appropriate leg to the trip trips[TRIP_DURATION] = np.where(trips[OUTBOUND], trips[OB_DURATION], trips[IB_DURATION]) trips['depart'] = -1 # If this is the first outbound trip, the choice is easy, assign the depart time # to equal the tour start time. trips.loc[(trips['trip_num'] == 1) & (trips[OUTBOUND]), 'depart'] = trips['start'] # If its the first return leg, it is easy too. Just assign the trip start time to the # end time minus the IB duration trips.loc[(trips['trip_num'] == 1) & (~trips[OUTBOUND]), 'depart'] = trips['end'] - trips[IB_DURATION] # The last leg of the outbound tour needs to begin at the start plus OB duration trips.loc[(trips['trip_count'] == trips['trip_num']) & (trips[OUTBOUND]), 'depart'] = \ trips['start'] + trips[OB_DURATION] # The last leg of the inbound tour needs to begin at the end time of the tour trips.loc[(trips['trip_count'] == trips['trip_num']) & (~trips[OUTBOUND]), 'depart'] = \ trips['end'] # Slice off the remaining trips with an intermediate stops to deal with. # Hopefully, with the tricks above we've sliced off a lot of choices. # This slice should only include trip numbers greater than 2 since the side_trips = trips[(trips['trip_num'] != 1) & (trips['trip_count'] != trips['trip_num'])] # No processing needs to be done because we have simple trips / tours if side_trips.empty: assert trips['depart'].notnull().all return trips['depart'].astype(int) # Get the potential time windows time_windows = get_time_windows(side_trips[TRIP_DURATION].max(), side_trips[TRIP_COUNT].max() - 1) trip_list = [] for i, chooser_chunk, chunk_trace_label in \ chunk.adaptive_chunked_choosers_by_chunk_id(side_trips, chunk_size, trace_label): for is_outbound, trip_segment in chooser_chunk.groupby(OUTBOUND): direction = OUTBOUND if is_outbound else 'inbound' spec = get_spec_for_segment(omnibus_spec, direction) segment_trace_label = '{}_{}'.format(direction, chunk_trace_label) patterns = build_patterns(trip_segment, time_windows) choices = choose_tour_leg_pattern(trip_segment, patterns, spec, trace_label=segment_trace_label) choices = pd.merge(choices.reset_index(), patterns.reset_index(), on=[TOUR_LEG_ID, PATTERN_ID], how='left') choices = choices[['trip_id', 'stop_time_duration']].copy() trip_list.append(choices) trip_list = pd.concat(trip_list, sort=True).set_index('trip_id') trips['stop_time_duration'] = 0 trips.update(trip_list) trips.loc[trips['trip_num'] == 1, 'stop_time_duration'] = trips['depart'] trips.sort_values(['tour_id', 'outbound', 'trip_num']) trips['stop_time_duration'] = trips.groupby(['tour_id', 'outbound'])['stop_time_duration'].cumsum() trips.loc[trips['trip_num'] != trips['trip_count'], 'depart'] = trips['stop_time_duration'] return trips['depart'].astype(int)