def trip_destination(trips, tours_merged, chunk_size, trace_hh_id): """ Choose a destination for all 'intermediate' trips based on trip purpose. Final trips already have a destination (the primary tour destination for outbound trips, and home for inbound trips.) """ trace_label = 'trip_destination' model_settings_file_name = 'trip_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) CLEANUP = model_settings.get('CLEANUP', True) fail_some_trips_for_testing = model_settings.get( 'fail_some_trips_for_testing', False) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() estimator = estimation.manager.begin_estimation('trip_destination') if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) trips_df, save_sample_df = run_trip_destination( trips_df, tours_merged_df, estimator=estimator, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, fail_some_trips_for_testing=fail_some_trips_for_testing) # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run if config.setting('testing_fail_trip_destination', False) and not trips_df.failed.any(): if (trips_df.trip_num < trips_df.trip_count).sum() == 0: raise RuntimeError( f"can't honor 'testing_fail_trip_destination' setting because no intermediate trips" ) fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max() trips_df.failed = (trips_df.origin == fail_o) & \ (trips_df.trip_num < trips_df.trip_count) if trips_df.failed.any(): logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum()) if inject.get_injectable('pipeline_file_prefix', None): file_name = f"{trace_label}_failed_trips_{inject.get_injectable('pipeline_file_prefix')}" else: file_name = f"{trace_label}_failed_trips" logger.info("writing failed trips to %s", file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) if estimator: estimator.end_estimation() # no trips should have failed since we overwrite choices and sample should have not failed trips assert not trips_df.failed.any() if CLEANUP: if trips_df.failed.any(): flag_failed_trip_leg_mates(trips_df, 'failed') if save_sample_df is not None: save_sample_df.drop(trips_df.index[trips_df.failed], level='trip_id', inplace=True) trips_df = cleanup_failed_trips(trips_df) trips_df.drop(columns='failed', inplace=True, errors='ignore') pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True) if save_sample_df is not None: # might be none if want_sample_table but there are no intermediate trips # expect samples only for intermediate trip destinations assert len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') assert sample_table_name is not None logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) # lest they try to put tour samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df)
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id): trace_label = "trip_purpose_and_destination" model_settings = config.read_model_settings( 'trip_purpose_and_destination.yaml') # for consistency, read sample_table_name setting from trip_destination settings file trip_destination_model_settings = config.read_model_settings( 'trip_destination.yaml') sample_table_name = trip_destination_model_settings.get( 'DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() if trips_df.empty: logger.info("%s - no trips. Nothing to do." % trace_label) return # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates assert (MAX_ITERATIONS > 0) # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry if 'destination' in trips_df: if 'failed' not in trips_df.columns: # trip_destination model cleaned up any failed trips logger.info("%s - no failed column from prior model run." % trace_label) return elif not trips_df.failed.any(): # 'failed' column but no failed trips from prior run of trip_destination logger.info("%s - no failed trips from prior model run." % trace_label) trips_df.drop(columns='failed', inplace=True) pipeline.replace_table("trips", trips_df) return else: logger.info( "trip_destination has already been run. Rerunning failed trips" ) flag_failed_trip_leg_mates(trips_df, 'failed') trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin( trips_df.tour_id)] logger.info("Rerunning %s failed trips and leg-mates" % trips_df.shape[0]) # drop any previously saved samples of failed trips if want_sample_table and pipeline.is_table(sample_table_name): logger.info( "Dropping any previously saved samples of failed trips") save_sample_df = pipeline.get_table(sample_table_name) save_sample_df.drop(trips_df.index, level='trip_id', inplace=True) pipeline.replace_table(sample_table_name, save_sample_df) del save_sample_df # if we estimated trip_destination, there should have been no failed trips # if we didn't, but it is enabled, it is probably a configuration error # if we just estimated trip_purpose, it isn't clear what they are trying to do , nor how to handle it assert not (estimation.manager.begin_estimation('trip_purpose') or estimation.manager.begin_estimation('trip_destination')) processed_trips = [] save_samples = [] i = 0 TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed'] while True: i += 1 for c in TRIP_RESULT_COLUMNS: if c in trips_df: del trips_df[c] trips_df, save_sample_df = run_trip_purpose_and_destination( trips_df, tours_merged_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, "i%s" % i)) # # if testing, make sure at least one trip fails if config.setting('testing_fail_trip_destination', False) \ and (i == 1) and not trips_df.failed.any(): fail_o = trips_df[ trips_df.trip_num < trips_df.trip_count].origin.max() trips_df.failed = (trips_df.origin == fail_o) & \ (trips_df.trip_num < trips_df.trip_count) num_failed_trips = trips_df.failed.sum() # if there were no failed trips, we are done if num_failed_trips == 0: processed_trips.append(trips_df[TRIP_RESULT_COLUMNS]) if save_sample_df is not None: save_samples.append(save_sample_df) break logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i)) file_name = "%s_i%s_failed_trips" % (trace_label, i) logger.info("writing failed trips to %s" % file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) # if max iterations reached, add remaining trips to processed_trips and give up # note that we do this BEFORE failing leg_mates so resulting trip legs are complete if i >= MAX_ITERATIONS: logger.warning("%s too many iterations %s" % (trace_label, i)) processed_trips.append(trips_df[TRIP_RESULT_COLUMNS]) if save_sample_df is not None: save_sample_df.drop(trips_df[trips_df.failed].index, level='trip_id', inplace=True) save_samples.append(save_sample_df) break # otherwise, if any trips failed, then their leg-mates trips must also fail flag_failed_trip_leg_mates(trips_df, 'failed') # add the good trips to processed_trips processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS]) # and keep the failed ones to retry trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin( trips_df.tour_id)] # add trip samples of processed_trips to processed_samples if save_sample_df is not None: # drop failed trip samples save_sample_df.drop(trips_df.index, level='trip_id', inplace=True) save_samples.append(save_sample_df) # - assign result columns to trips processed_trips = pd.concat(processed_trips) if len(save_samples) > 0: save_sample_df = pd.concat(save_samples) logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) pipeline.extend_table(sample_table_name, save_sample_df) logger.info("%s %s failed trips after %s iterations" % (trace_label, processed_trips.failed.sum(), i)) trips_df = trips.to_frame() assign_in_place(trips_df, processed_trips) trips_df = cleanup_failed_trips(trips_df) pipeline.replace_table("trips", trips_df) # check to make sure we wrote sample file if requestsd if want_sample_table and len(trips_df) > 0: assert pipeline.is_table(sample_table_name) # since we have saved samples for all successful trips # once we discard failed trips, we should samples for all trips save_sample_df = pipeline.get_table(sample_table_name) # expect samples only for intermediate trip destinatinos assert \ len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) del save_sample_df if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def parking_location(trips, trips_merged, land_use, network_los, chunk_size, trace_hh_id): """ Given a set of trips, each trip needs to have a parking location if it is eligible for remote parking. """ trace_label = 'parking_location' model_settings = config.read_model_settings('parking_location_choice.yaml') alt_destination_col_name = model_settings['ALT_DEST_COL_NAME'] preprocessor_settings = model_settings.get('PREPROCESSOR', None) trips_df = trips.to_frame() trips_merged_df = trips_merged.to_frame() land_use_df = land_use.to_frame() locals_dict = {'network_los': network_los} locals_dict.update(config.get_model_constants(model_settings)) if preprocessor_settings: expressions.assign_columns(df=trips_merged_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) parking_locations, save_sample_df = run_parking_destination( model_settings, trips_merged_df, land_use_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, ) assign_in_place(trips_df, parking_locations.to_frame(alt_destination_col_name)) pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True) if save_sample_df is not None: assert len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) sample_table_name = model_settings.get( 'PARKING_LOCATION_SAMPLE_TABLE_NAME') assert sample_table_name is not None logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) # lest they try to put tour samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df)
def trip_destination( trips, tours_merged, chunk_size, trace_hh_id): """ Choose a destination for all 'intermediate' trips based on trip purpose. Final trips already have a destination (the primary tour destination for outbound trips, and home for inbound trips.) """ trace_label = 'trip_destination' model_settings = config.read_model_settings('trip_destination.yaml') CLEANUP = model_settings.get('CLEANUP', True) fail_some_trips_for_testing = model_settings.get('fail_some_trips_for_testing', False) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) trips_df, save_sample_df = run_trip_destination( trips_df, tours_merged_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, fail_some_trips_for_testing=fail_some_trips_for_testing) # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run if config.setting('testing_fail_trip_destination', False) and not trips_df.failed.any(): fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max() trips_df.failed = (trips_df.origin == fail_o) & \ (trips_df.trip_num < trips_df.trip_count) if trips_df.failed.any(): logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum()) file_name = "%s_failed_trips" % trace_label logger.info("writing failed trips to %s", file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) if CLEANUP: if trips_df.failed.any(): flag_failed_trip_leg_mates(trips_df, 'failed') if save_sample_df is not None: save_sample_df.drop(trips_df.index[trips_df.failed], level='trip_id', inplace=True) trips_df = cleanup_failed_trips(trips_df) trips_df.drop(columns='failed', inplace=True, errors='ignore') pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True) if save_sample_df is not None: # might be none if want_sample_table but there are no intermediate trips # expect samples only for intermediate trip destinations assert len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') assert sample_table_name is not None logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) # lest they try to put tour samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df)
def iterate_location_choice(model_settings, persons_merged, persons, households, network_los, estimator, chunk_size, trace_hh_id, locutor, trace_label): """ iterate run_location_choice updating shadow pricing until convergence criteria satisfied or max_iterations reached. (If use_shadow_pricing not enabled, then just iterate once) Parameters ---------- model_settings : dict persons_merged : injected table persons : injected table network_los : los.Network_LOS chunk_size : int trace_hh_id : int locutor : bool whether this process is the privileged logger of shadow_pricing when multiprocessing trace_label : str Returns ------- adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided adds annotations to persons table """ chunk_tag = trace_label # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student) chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME'] dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME'] logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None persons_merged_df = persons_merged.to_frame() persons_merged_df = persons_merged_df[ persons_merged[chooser_filter_column]] persons_merged_df.sort_index( inplace=True ) # interaction_sample expects chooser index to be monotonic increasing # chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME'] assert chooser_segment_column in persons_merged_df, \ f"CHOOSER_SEGMENT_COLUMN '{chooser_segment_column}' not in persons_merged table." spc = shadow_pricing.load_shadow_price_calculator(model_settings) max_iterations = spc.max_iterations assert not (spc.use_shadow_pricing and estimator) logger.debug("%s max_iterations: %s" % (trace_label, max_iterations)) for iteration in range(1, max_iterations + 1): if spc.use_shadow_pricing and iteration > 1: spc.update_shadow_prices() choices_df, save_sample_df = run_location_choice( persons_merged_df, network_los, shadow_price_calculator=spc, want_logsums=logsum_column_name is not None, want_sample_table=want_sample_table, estimator=estimator, model_settings=model_settings, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_hh_id=trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration)) # choices_df is a pandas DataFrame with columns 'choice' and (optionally) 'logsum' if choices_df is None: break spc.set_choices( choices=choices_df['choice'], segment_ids=persons_merged_df[chooser_segment_column].reindex( choices_df.index)) if locutor: spc.write_trace_files(iteration) if spc.use_shadow_pricing and spc.check_fit(iteration): logging.info("%s converged after iteration %s" % ( trace_label, iteration, )) break # - shadow price table if locutor: if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings: inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices) if 'MODELED_SIZE_TABLE' in model_settings: inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size) persons_df = persons.to_frame() # add the choice values to the dest_choice_column in persons dataframe # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location # names for location choice and (optional) logsums columns NO_DEST_ZONE = -1 persons_df[dest_choice_column_name] = \ choices_df['choice'].reindex(persons_df.index).fillna(NO_DEST_ZONE).astype(int) # add the dest_choice_logsum column to persons dataframe if logsum_column_name: persons_df[logsum_column_name] = \ choices_df['logsum'].reindex(persons_df.index).astype('float') if save_sample_df is not None: # might be None for tiny samples even if sample_table_name was specified assert len(save_sample_df.index.get_level_values(0).unique()) == len( choices_df) # lest they try to put school and workplace samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("dest choice sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df) # - annotate persons table if 'annotate_persons' in model_settings: expressions.assign_columns( df=persons_df, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons_df) if trace_hh_id: tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) # - annotate households table if 'annotate_households' in model_settings: households_df = households.to_frame() expressions.assign_columns( df=households_df, model_settings=model_settings.get('annotate_households'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_households')) pipeline.replace_table("households", households_df) if trace_hh_id: tracing.trace_df(households_df, label=trace_label, warn_if_empty=True) if logsum_column_name: tracing.print_summary(logsum_column_name, choices_df['logsum'], value_counts=True) return persons_df