def create_mandatory_tours(): # FIXME - move this to body? persons = inject.get_table('persons') configs_dir = inject.get_injectable('configs_dir') persons = persons.to_frame(columns=[ "mandatory_tour_frequency", "is_worker", "school_taz", "workplace_taz" ]) persons = persons[~persons.mandatory_tour_frequency.isnull()] tour_frequency_alternatives = inject.get_injectable( 'mandatory_tour_frequency_alternatives') tours = process_mandatory_tours(persons, tour_frequency_alternatives) expressions.assign_columns(df=tours, model_settings='annotate_tours_with_dest', configs_dir=configs_dir, trace_label='create_mandatory_tours') pipeline.extend_table("tours", tours) tracing.register_traceable_table('tours', tours) pipeline.get_rn_generator().add_channel(tours, 'tours')
def create_non_mandatory_tours(): """ We have now generated non-mandatory tours, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) """ persons = inject.get_table('persons') alts = inject.get_injectable('non_mandatory_tour_frequency_alts') df = process_non_mandatory_tours( persons.non_mandatory_tour_frequency.dropna(), alts ) pipeline.extend_table("tours", df) tracing.register_traceable_table('tours', df) pipeline.get_rn_generator().add_channel(df, 'tours')
def create_non_mandatory_tours(trace_hh_id): """ We have now generated non-mandatory tours, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) """ persons = inject.get_table('persons') alts = inject.get_injectable('non_mandatory_tour_frequency_alts') non_mandatory_tours = process_non_mandatory_tours( persons.non_mandatory_tour_frequency.dropna(), alts ) tours = pipeline.extend_table("tours", non_mandatory_tours) tracing.register_traceable_table('tours', tours) pipeline.get_rn_generator().add_channel(non_mandatory_tours, 'tours') if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_frequency.non_mandatory_tours", warn_if_empty=True)
def atwork_subtour_frequency(tours, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making at-work subtour tours (alternatives for this model come from a separate csv file which is configured by the user). """ trace_label = 'atwork_subtour_frequency' model_settings = config.read_model_settings( 'atwork_subtour_frequency.yaml') model_spec = simulate.read_model_spec( file_name='atwork_subtour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('atwork_subtour_frequency_alternatives.csv'), set_index='alt') tours = tours.to_frame() persons_merged = persons_merged.to_frame() work_tours = tours[tours.tour_type == 'work'] # - if no work_tours if len(work_tours) == 0: add_null_results(trace_label, tours) return # merge persons into work_tours work_tours = pd.merge(work_tours, persons_merged, left_on='person_id', right_index=True) logger.info("Running atwork_subtour_frequency with %d work tours", len(work_tours)) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: assign_columns(df=work_tours, model_settings=preprocessor_settings, trace_label=trace_label) choices = simulate.simple_simulate( choosers=work_tours, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='atwork_subtour_frequency') # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) tracing.print_summary('atwork_subtour_frequency', choices, value_counts=True) # add atwork_subtour_frequency column to tours # reindex since we are working with a subset of tours tours['atwork_subtour_frequency'] = choices.reindex(tours.index) pipeline.replace_table("tours", tours) # - create atwork_subtours based on atwork_subtour_frequency choice names work_tours = tours[tours.tour_type == 'work'] assert not work_tours.atwork_subtour_frequency.isnull().any() subtours = process_atwork_subtours(work_tours, alternatives) tours = pipeline.extend_table("tours", subtours) tracing.register_traceable_table('tours', subtours) pipeline.get_rn_generator().add_channel('tours', subtours) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_frequency.tours')
def trip_purpose_and_destination(trips, tours_merged, chunk_size, trace_hh_id): trace_label = "trip_purpose_and_destination" model_settings = config.read_model_settings( 'trip_purpose_and_destination.yaml') # for consistency, read sample_table_name setting from trip_destination settings file trip_destination_model_settings = config.read_model_settings( 'trip_destination.yaml') sample_table_name = trip_destination_model_settings.get( 'DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None MAX_ITERATIONS = model_settings.get('MAX_ITERATIONS', 5) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() if trips_df.empty: logger.info("%s - no trips. Nothing to do." % trace_label) return # FIXME could allow MAX_ITERATIONS=0 to allow for cleanup-only run # in which case, we would need to drop bad trips, WITHOUT failing bad_trip leg_mates assert (MAX_ITERATIONS > 0) # if trip_destination has been run before, keep only failed trips (and leg_mates) to retry if 'destination' in trips_df: if 'failed' not in trips_df.columns: # trip_destination model cleaned up any failed trips logger.info("%s - no failed column from prior model run." % trace_label) return elif not trips_df.failed.any(): # 'failed' column but no failed trips from prior run of trip_destination logger.info("%s - no failed trips from prior model run." % trace_label) trips_df.drop(columns='failed', inplace=True) pipeline.replace_table("trips", trips_df) return else: logger.info( "trip_destination has already been run. Rerunning failed trips" ) flag_failed_trip_leg_mates(trips_df, 'failed') trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin( trips_df.tour_id)] logger.info("Rerunning %s failed trips and leg-mates" % trips_df.shape[0]) # drop any previously saved samples of failed trips if want_sample_table and pipeline.is_table(sample_table_name): logger.info( "Dropping any previously saved samples of failed trips") save_sample_df = pipeline.get_table(sample_table_name) save_sample_df.drop(trips_df.index, level='trip_id', inplace=True) pipeline.replace_table(sample_table_name, save_sample_df) del save_sample_df # if we estimated trip_destination, there should have been no failed trips # if we didn't, but it is enabled, it is probably a configuration error # if we just estimated trip_purpose, it isn't clear what they are trying to do , nor how to handle it assert not (estimation.manager.begin_estimation('trip_purpose') or estimation.manager.begin_estimation('trip_destination')) processed_trips = [] save_samples = [] i = 0 TRIP_RESULT_COLUMNS = ['purpose', 'destination', 'origin', 'failed'] while True: i += 1 for c in TRIP_RESULT_COLUMNS: if c in trips_df: del trips_df[c] trips_df, save_sample_df = run_trip_purpose_and_destination( trips_df, tours_merged_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, "i%s" % i)) # # if testing, make sure at least one trip fails if config.setting('testing_fail_trip_destination', False) \ and (i == 1) and not trips_df.failed.any(): fail_o = trips_df[ trips_df.trip_num < trips_df.trip_count].origin.max() trips_df.failed = (trips_df.origin == fail_o) & \ (trips_df.trip_num < trips_df.trip_count) num_failed_trips = trips_df.failed.sum() # if there were no failed trips, we are done if num_failed_trips == 0: processed_trips.append(trips_df[TRIP_RESULT_COLUMNS]) if save_sample_df is not None: save_samples.append(save_sample_df) break logger.warning("%s %s failed trips in iteration %s" % (trace_label, num_failed_trips, i)) file_name = "%s_i%s_failed_trips" % (trace_label, i) logger.info("writing failed trips to %s" % file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) # if max iterations reached, add remaining trips to processed_trips and give up # note that we do this BEFORE failing leg_mates so resulting trip legs are complete if i >= MAX_ITERATIONS: logger.warning("%s too many iterations %s" % (trace_label, i)) processed_trips.append(trips_df[TRIP_RESULT_COLUMNS]) if save_sample_df is not None: save_sample_df.drop(trips_df[trips_df.failed].index, level='trip_id', inplace=True) save_samples.append(save_sample_df) break # otherwise, if any trips failed, then their leg-mates trips must also fail flag_failed_trip_leg_mates(trips_df, 'failed') # add the good trips to processed_trips processed_trips.append(trips_df[~trips_df.failed][TRIP_RESULT_COLUMNS]) # and keep the failed ones to retry trips_df = trips_df[trips_df.failed] tours_merged_df = tours_merged_df[tours_merged_df.index.isin( trips_df.tour_id)] # add trip samples of processed_trips to processed_samples if save_sample_df is not None: # drop failed trip samples save_sample_df.drop(trips_df.index, level='trip_id', inplace=True) save_samples.append(save_sample_df) # - assign result columns to trips processed_trips = pd.concat(processed_trips) if len(save_samples) > 0: save_sample_df = pd.concat(save_samples) logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) pipeline.extend_table(sample_table_name, save_sample_df) logger.info("%s %s failed trips after %s iterations" % (trace_label, processed_trips.failed.sum(), i)) trips_df = trips.to_frame() assign_in_place(trips_df, processed_trips) trips_df = cleanup_failed_trips(trips_df) pipeline.replace_table("trips", trips_df) # check to make sure we wrote sample file if requestsd if want_sample_table and len(trips_df) > 0: assert pipeline.is_table(sample_table_name) # since we have saved samples for all successful trips # once we discard failed trips, we should samples for all trips save_sample_df = pipeline.get_table(sample_table_name) # expect samples only for intermediate trip destinatinos assert \ len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) del save_sample_df if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True)
def stop_frequency( tours, tours_merged, stop_frequency_alts, skim_dict, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings = config.read_model_settings('stop_frequency.yaml') tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper } if constants is not None: locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) choices_list = [] for segment_type, choosers in tours_merged.groupby('primary_purpose'): logging.info("%s running segment %s with %s chooser rows" % (trace_label, segment_type, choosers.shape[0])) spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type) assert spec is not None, "spec for segment_type %s not found" % segment_type choices = simulate.simple_simulate( choosers=choosers, spec=spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_type), trace_choice_name='stops') # convert indexes to alternative names choices = pd.Series(spec.columns[choices.values], index=choices.index) choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) if 'primary_purpose' not in tours.columns: assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' model_settings = config.read_model_settings( 'mandatory_tour_frequency.yaml') model_spec = simulate.read_model_spec( file_name='mandatory_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt') choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons", len(choosers)) # - if no mandatory tours if choosers.shape[0] == 0: add_null_results(trace_label, model_settings) return # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {} expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index).reindex( persons_merged.local.index) # - create mandatory tours """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ choosers['mandatory_tour_frequency'] = choices mandatory_tours = process_mandatory_tours( persons=choosers, mandatory_tour_frequency_alts=alternatives) tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', mandatory_tours) pipeline.get_rn_generator().add_channel('tours', mandatory_tours) # - annotate persons persons = inject.get_table('persons').to_frame() # need to reindex as we only handled persons with cdap_activity == 'M' persons['mandatory_tour_frequency'] = choices.reindex( persons.index).fillna('').astype(str) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True) tracing.trace_df(persons, label="mandatory_tour_frequency.persons", warn_if_empty=True)
def atwork_subtour_destination( tours, persons_merged, network_los, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_destination' model_settings_file_name = 'atwork_subtour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) future_settings = { 'SIZE_TERM_SELECTOR': 'atwork', 'SEGMENTS': ['atwork'], 'ORIG_ZONE_ID': 'workplace_zone_id' } model_settings = config.future_model_settings(model_settings_file_name, model_settings, future_settings) destination_column_name = 'destination' logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return estimator = estimation.manager.begin_estimation('atwork_subtour_destination') if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( subtours, persons_merged, want_logsums, want_sample_table, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df['choice']) choices_df['choice'] = estimator.get_survey_values(choices_df['choice'], 'tours', 'destination') estimator.write_override_choices(choices_df['choice']) estimator.end_estimation() subtours[destination_column_name] = choices_df['choice'] assign_in_place(tours, subtours[[destination_column_name]]) if want_logsums: subtours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, subtours[[logsum_column_name]]) pipeline.replace_table("tours", tours) tracing.print_summary(destination_column_name, subtours[destination_column_name], describe=True) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def stop_frequency(tours, tours_merged, stop_frequency_alts, skim_dict, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings = config.read_model_settings('stop_frequency.yaml') tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already od_skim_stack_wrapper = skim_dict.wrap('origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = {"od_skims": od_skim_stack_wrapper} if constants is not None: locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) choices_list = [] for segment_type, choosers in tours_merged.groupby('primary_purpose'): logging.info("%s running segment %s with %s chooser rows" % (trace_label, segment_type, choosers.shape[0])) spec = simulate.read_model_spec(file_name='stop_frequency_%s.csv' % segment_type) assert spec is not None, "spec for segment_type %s not found" % segment_type choices = simulate.simple_simulate( choosers=choosers, spec=spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_type), trace_choice_name='stops') # convert indexes to alternative names choices = pd.Series(spec.columns[choices.values], index=choices.index) choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) if 'primary_purpose' not in tours.columns: assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def iterate_location_choice(model_settings, persons_merged, persons, households, network_los, estimator, chunk_size, trace_hh_id, locutor, trace_label): """ iterate run_location_choice updating shadow pricing until convergence criteria satisfied or max_iterations reached. (If use_shadow_pricing not enabled, then just iterate once) Parameters ---------- model_settings : dict persons_merged : injected table persons : injected table network_los : los.Network_LOS chunk_size : int trace_hh_id : int locutor : bool whether this process is the privileged logger of shadow_pricing when multiprocessing trace_label : str Returns ------- adds choice column model_settings['DEST_CHOICE_COLUMN_NAME'] adds logsum column model_settings['DEST_CHOICE_LOGSUM_COLUMN_NAME']- if provided adds annotations to persons table """ chunk_tag = trace_label # boolean to filter out persons not needing location modeling (e.g. is_worker, is_student) chooser_filter_column = model_settings['CHOOSER_FILTER_COLUMN_NAME'] dest_choice_column_name = model_settings['DEST_CHOICE_COLUMN_NAME'] logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None persons_merged_df = persons_merged.to_frame() persons_merged_df = persons_merged_df[ persons_merged[chooser_filter_column]] persons_merged_df.sort_index( inplace=True ) # interaction_sample expects chooser index to be monotonic increasing # chooser segmentation allows different sets coefficients for e.g. different income_segments or tour_types chooser_segment_column = model_settings['CHOOSER_SEGMENT_COLUMN_NAME'] assert chooser_segment_column in persons_merged_df, \ f"CHOOSER_SEGMENT_COLUMN '{chooser_segment_column}' not in persons_merged table." spc = shadow_pricing.load_shadow_price_calculator(model_settings) max_iterations = spc.max_iterations assert not (spc.use_shadow_pricing and estimator) logger.debug("%s max_iterations: %s" % (trace_label, max_iterations)) for iteration in range(1, max_iterations + 1): if spc.use_shadow_pricing and iteration > 1: spc.update_shadow_prices() choices_df, save_sample_df = run_location_choice( persons_merged_df, network_los, shadow_price_calculator=spc, want_logsums=logsum_column_name is not None, want_sample_table=want_sample_table, estimator=estimator, model_settings=model_settings, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_hh_id=trace_hh_id, trace_label=tracing.extend_trace_label(trace_label, 'i%s' % iteration)) # choices_df is a pandas DataFrame with columns 'choice' and (optionally) 'logsum' if choices_df is None: break spc.set_choices( choices=choices_df['choice'], segment_ids=persons_merged_df[chooser_segment_column].reindex( choices_df.index)) if locutor: spc.write_trace_files(iteration) if spc.use_shadow_pricing and spc.check_fit(iteration): logging.info("%s converged after iteration %s" % ( trace_label, iteration, )) break # - shadow price table if locutor: if spc.use_shadow_pricing and 'SHADOW_PRICE_TABLE' in model_settings: inject.add_table(model_settings['SHADOW_PRICE_TABLE'], spc.shadow_prices) if 'MODELED_SIZE_TABLE' in model_settings: inject.add_table(model_settings['MODELED_SIZE_TABLE'], spc.modeled_size) persons_df = persons.to_frame() # add the choice values to the dest_choice_column in persons dataframe # We only chose school locations for the subset of persons who go to school # so we backfill the empty choices with -1 to code as no school location # names for location choice and (optional) logsums columns NO_DEST_ZONE = -1 persons_df[dest_choice_column_name] = \ choices_df['choice'].reindex(persons_df.index).fillna(NO_DEST_ZONE).astype(int) # add the dest_choice_logsum column to persons dataframe if logsum_column_name: persons_df[logsum_column_name] = \ choices_df['logsum'].reindex(persons_df.index).astype('float') if save_sample_df is not None: # might be None for tiny samples even if sample_table_name was specified assert len(save_sample_df.index.get_level_values(0).unique()) == len( choices_df) # lest they try to put school and workplace samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("dest choice sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df) # - annotate persons table if 'annotate_persons' in model_settings: expressions.assign_columns( df=persons_df, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons_df) if trace_hh_id: tracing.trace_df(persons_df, label=trace_label, warn_if_empty=True) # - annotate households table if 'annotate_households' in model_settings: households_df = households.to_frame() expressions.assign_columns( df=households_df, model_settings=model_settings.get('annotate_households'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_households')) pipeline.replace_table("households", households_df) if trace_hh_id: tracing.trace_df(households_df, label=trace_label, warn_if_empty=True) if logsum_column_name: tracing.print_summary(logsum_column_name, choices_df['logsum'], value_counts=True) return persons_df
def atwork_subtour_destination(tours, persons_merged, skim_dict, skim_stack, land_use, size_terms, chunk_size, trace_hh_id): trace_label = 'atwork_subtour_destination' model_settings_file_name = 'atwork_subtour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) destination_column_name = 'destination' logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None persons_merged = persons_merged.to_frame() tours = tours.to_frame() subtours = tours[tours.tour_category == 'atwork'] # interaction_sample_simulate insists choosers appear in same order as alts subtours = subtours.sort_index() # - if no atwork subtours if subtours.shape[0] == 0: tracing.no_results('atwork_subtour_destination') return estimator = estimation.manager.begin_estimation( 'atwork_subtour_destination') if estimator: estimator.write_coefficients( simulate.read_model_coefficients(model_settings)) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) destination_size_terms = tour_destination_size_terms( land_use, size_terms, 'atwork') destination_sample_df = atwork_subtour_destination_sample( subtours, persons_merged, model_settings, skim_dict, destination_size_terms, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'sample')) destination_sample_df = atwork_subtour_destination_logsums( persons_merged, destination_sample_df, model_settings, skim_dict, skim_stack, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'logsums')) choices_df = atwork_subtour_destination_simulate( subtours, persons_merged, destination_sample_df, want_logsums, model_settings, skim_dict, destination_size_terms, estimator=estimator, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, 'simulate')) if estimator: estimator.write_choices(choices_df['choice']) choices_df['choice'] = estimator.get_survey_values( choices_df['choice'], 'tours', 'destination') estimator.write_override_choices(choices_df['choice']) estimator.end_estimation() subtours[destination_column_name] = choices_df['choice'] assign_in_place(tours, subtours[[destination_column_name]]) if want_logsums: subtours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, subtours[[logsum_column_name]]) pipeline.replace_table("tours", tours) if want_sample_table: # FIXME - sample_table assert len(destination_sample_df.index.unique()) == len(choices_df) destination_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) pipeline.extend_table(sample_table_name, destination_sample_df) tracing.print_summary(destination_column_name, subtours[destination_column_name], describe=True) if trace_hh_id: tracing.trace_df(tours, label='atwork_subtour_destination', columns=['destination'])
def tour_od_choice( tours, persons, households, land_use, network_los, chunk_size, trace_hh_id): """Simulates joint origin/destination choice for all tours. Given a set of previously generated tours, each tour needs to have an origin and a destination. In this case tours are the choosers, but the associated person that's making the tour does not necessarily have a home location assigned already. So we choose a tour origin at the same time as we choose a tour destination, and assign the tour origin as that person's home location. Parameters ---------- tours : orca.DataFrameWrapper lazy-loaded tours table persons : orca.DataFrameWrapper lazy-loaded persons table households : orca.DataFrameWrapper lazy-loaded households table land_use : orca.DataFrameWrapper lazy-loaded land use data table stop_frequency_alts : orca.DataFrameWrapper lazy-loaded table of stop frequency alternatives, e.g. "1out2in" network_los : orca._InjectableFuncWrapper lazy-loaded activitysim.los.Network_LOS object chunk_size simulation chunk size, set in main settings.yaml trace_hh_id : int households to trace, set in main settings.yaml """ trace_label = 'tour_od_choice' model_settings_file_name = 'tour_od_choice.yaml' model_settings = config.read_model_settings(model_settings_file_name) origin_col_name = model_settings['ORIG_COL_NAME'] dest_col_name = model_settings['DEST_COL_NAME'] alt_id_col = tour_od.get_od_id_col(origin_col_name, dest_col_name) sample_table_name = model_settings.get('OD_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None logsum_column_name = model_settings.get('OD_CHOICE_LOGSUM_COLUMN_NAME', None) want_logsums = logsum_column_name is not None tours = tours.to_frame() # interaction_sample_simulate insists choosers appear in same order as alts tours = tours.sort_index() estimator = estimation.manager.begin_estimation('tour_od_choice') if estimator: estimator.write_coefficients(model_settings=model_settings) estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(alt_id_col) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_od.run_tour_od( tours, persons, want_logsums, want_sample_table, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label) if estimator: assert estimator.want_unsampled_alternatives estimator.write_choices(choices_df.choice) survey_od = estimator.get_survey_values( choices_df.choice, 'tours', [origin_col_name, dest_col_name]) choices_df[origin_col_name] = survey_od[origin_col_name] choices_df[dest_col_name] = survey_od[dest_col_name] survey_od[alt_id_col] = tour_od.create_od_id_col(survey_od, origin_col_name, dest_col_name) choices_df.choice = survey_od[alt_id_col] estimator.write_override_choices(choices_df.choice) estimator.end_estimation() tours[origin_col_name] = choices_df[origin_col_name].reindex(tours.index) tours[dest_col_name] = choices_df[dest_col_name].reindex(tours.index) if want_logsums: tours[logsum_column_name] = \ choices_df['logsum'].reindex(tours.index).astype('float') tours['poe_id'] = tours[origin_col_name].map(land_use.to_frame(columns='poe_id').poe_id) households = households.to_frame() persons = persons.to_frame() households[origin_col_name] = tours.set_index('household_id')[origin_col_name].reindex(households.index) persons[origin_col_name] = households[origin_col_name].reindex(persons.household_id).values # Downstream steps require that persons and households have a 'home_zone_id' # column. We assume that if the tour_od_choice model is used, this field is # missing from the population data, so it gets inherited from the tour origin households['home_zone_id'] = households[origin_col_name] persons['home_zone_id'] = persons[origin_col_name] pipeline.replace_table("tours", tours) pipeline.replace_table("persons", persons) pipeline.replace_table("households", households) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(tours, label="tours_od_choice", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id): """ This model predicts the frequency of making fully joint trips (see the alternatives above). """ trace_label = 'joint_tour_frequency' model_settings = config.read_model_settings('joint_tour_frequency.yaml') model_spec = simulate.read_model_spec(file_name='joint_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('joint_tour_frequency_alternatives.csv'), set_index='alt') # - only interested in households with more than one cdap travel_active person households = households.to_frame() multi_person_households = households[ households.num_travel_active > 1].copy() # - only interested in persons in multi_person_households # FIXME - gratuitous pathological efficiency move, just let yaml specify persons? persons = persons.to_frame() persons = persons[persons.household_id.isin(multi_person_households.index)] logger.info( "Running joint_tour_frequency with %d multi-person households" % multi_person_households.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns(df=multi_person_households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # - simple_simulate nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=multi_person_households, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='joint_tour_frequency') # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) # - create joint_tours based on joint_tour_frequency choices # - we need a person_id in order to generate the tour index (and for register_traceable_table) # - but we don't know the tour participants yet # - so we arbitrarily choose the first person in the household # - to be point person for the purpose of generating an index and setting origin temp_point_persons = persons.loc[persons.PNUM == 1] temp_point_persons['person_id'] = temp_point_persons.index temp_point_persons = temp_point_persons.set_index('household_id') temp_point_persons = temp_point_persons[['person_id', 'home_taz']] joint_tours = \ process_joint_tours(choices, alternatives, temp_point_persons) tours = pipeline.extend_table("tours", joint_tours) tracing.register_traceable_table('tours', joint_tours) pipeline.get_rn_generator().add_channel('tours', joint_tours) # - annotate households # add joint_tour_frequency and num_hh_joint_tours columns to households # reindex since we ran model on a subset of households households['joint_tour_frequency'] = choices.reindex( households.index).fillna('').astype(str) households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\ reindex(households.index).fillna(0).astype(np.int8) pipeline.replace_table("households", households) tracing.print_summary('joint_tour_frequency', households.joint_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(households, label="joint_tour_frequency.households") tracing.trace_df(joint_tours, label="joint_tour_frequency.joint_tours", slicer='household_id')
def mandatory_tour_frequency(persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making mandatory trips (see the alternatives above) - these trips include work and school in some combination. """ trace_label = 'mandatory_tour_frequency' model_settings = config.read_model_settings('mandatory_tour_frequency.yaml') model_spec = simulate.read_model_spec(file_name='mandatory_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('mandatory_tour_frequency_alternatives.csv'), set_index='alt') choosers = persons_merged.to_frame() # filter based on results of CDAP choosers = choosers[choosers.cdap_activity == 'M'] logger.info("Running mandatory_tour_frequency with %d persons", len(choosers)) # - if no mandatory tours if choosers.shape[0] == 0: add_null_results(trace_label, model_settings) return # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {} expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) choices = simulate.simple_simulate( choosers=choosers, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='mandatory_tour_frequency') # convert indexes to alternative names choices = pd.Series( model_spec.columns[choices.values], index=choices.index).reindex(persons_merged.local.index) # - create mandatory tours """ This reprocesses the choice of index of the mandatory tour frequency alternatives into an actual dataframe of tours. Ending format is the same as got non_mandatory_tours except trip types are "work" and "school" """ choosers['mandatory_tour_frequency'] = choices mandatory_tours = process_mandatory_tours( persons=choosers, mandatory_tour_frequency_alts=alternatives ) tours = pipeline.extend_table("tours", mandatory_tours) tracing.register_traceable_table('tours', mandatory_tours) pipeline.get_rn_generator().add_channel('tours', mandatory_tours) # - annotate persons persons = inject.get_table('persons').to_frame() # need to reindex as we only handled persons with cdap_activity == 'M' persons['mandatory_tour_frequency'] = choices.reindex(persons.index).fillna('').astype(str) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=tracing.extend_trace_label(trace_label, 'annotate_persons')) pipeline.replace_table("persons", persons) tracing.print_summary('mandatory_tour_frequency', persons.mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(mandatory_tours, label="mandatory_tour_frequency.mandatory_tours", warn_if_empty=True) tracing.trace_df(persons, label="mandatory_tour_frequency.persons", warn_if_empty=True)
def joint_tour_destination(tours, persons_merged, households_merged, network_los, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'joint_tour_destination' model_settings_file_name = 'joint_tour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting( 'want_dest_choice_sample_tables') and sample_table_name is not None # choosers are tours - in a sense tours are choosing their destination tours = tours.to_frame() joint_tours = tours[tours.tour_category == 'joint'] persons_merged = persons_merged.to_frame() # - if no joint tours if joint_tours.shape[0] == 0: tracing.no_results('joint_tour_destination') return estimator = estimation.manager.begin_estimation('joint_tour_destination') if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( tours, persons_merged, want_logsums, want_sample_table, model_settings, network_los, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df.choice) choices_df.choice = estimator.get_survey_values( choices_df.choice, 'tours', 'destination') estimator.write_override_choices(choices_df.choice) estimator.end_estimation() # add column as we want joint_tours table for tracing. joint_tours['destination'] = choices_df.choice assign_in_place(tours, joint_tours[['destination']]) pipeline.replace_table("tours", tours) if want_logsums: joint_tours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, joint_tours[[logsum_column_name]]) tracing.print_summary('destination', joint_tours.destination, describe=True) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len( choices_df) # save_sample_df.set_index(model_settings['ALT_DEST_COL_NAME'], append=True, inplace=True) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(joint_tours, label="joint_tour_destination.joint_tours")
def atwork_subtour_frequency(tours, persons_merged, atwork_subtour_frequency_spec, atwork_subtour_frequency_settings, atwork_subtour_frequency_alternatives, chunk_size, trace_hh_id): """ This model predicts the frequency of making at-work subtour tours (alternatives for this model come from a separate csv file which is configured by the user). """ trace_label = 'atwork_subtour_frequency' tours = tours.to_frame() persons_merged = persons_merged.to_frame() work_tours = tours[tours.tour_type == 'work'] # merge persons into work_tours work_tours = pd.merge(work_tours, persons_merged, left_on='person_id', right_index=True) logger.info("Running atwork_subtour_frequency with %d work tours" % len(work_tours)) nest_spec = config.get_logit_model_settings( atwork_subtour_frequency_settings) constants = config.get_model_constants(atwork_subtour_frequency_settings) choices = simulate.simple_simulate( choosers=work_tours, spec=atwork_subtour_frequency_spec, nest_spec=nest_spec, locals_d=constants, trace_label=trace_label, trace_choice_name='atwork_subtour_frequency') # convert indexes to alternative names choices = pd.Series(atwork_subtour_frequency_spec.columns[choices.values], index=choices.index) tracing.print_summary('atwork_subtour_frequency', choices, value_counts=True) # reindex since we are working with a subset of tours choices = choices.reindex(tours.index) # add atwork_subtour_frequency column to tours tours['atwork_subtour_frequency'] = choices pipeline.replace_table("tours", tours) # - create atwork_subtours based on atwork_subtour_frequency choice names work_tours = tours[tours.tour_type == 'work'] assert not work_tours.atwork_subtour_frequency.isnull().any() subtours = process_atwork_subtours(work_tours, atwork_subtour_frequency_alternatives) pipeline.extend_table("tours", subtours) tracing.register_traceable_table('tours', subtours) pipeline.get_rn_generator().add_channel(subtours, 'tours') if trace_hh_id: trace_columns = ['atwork_subtour_frequency'] tracing.trace_df(inject.get_table('tours').to_frame(), label=trace_label, columns=trace_columns, warn_if_empty=True)
def joint_tour_frequency(households, persons, chunk_size, trace_hh_id): """ This model predicts the frequency of making fully joint trips (see the alternatives above). """ trace_label = 'joint_tour_frequency' model_settings_file_name = 'joint_tour_frequency.yaml' estimator = estimation.manager.begin_estimation('joint_tour_frequency') model_settings = config.read_model_settings(model_settings_file_name) alternatives = simulate.read_model_alts( 'joint_tour_frequency_alternatives.csv', set_index='alt') # - only interested in households with more than one cdap travel_active person and # - at least one non-preschooler households = households.to_frame() multi_person_households = households[ households.participates_in_jtf_model].copy() # - only interested in persons in multi_person_households # FIXME - gratuitous pathological efficiency move, just let yaml specify persons? persons = persons.to_frame() persons = persons[persons.household_id.isin(multi_person_households.index)] logger.info( "Running joint_tour_frequency with %d multi-person households" % multi_person_households.shape[0]) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'persons': persons, 'hh_time_window_overlap': hh_time_window_overlap } expressions.assign_columns(df=multi_person_households, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) coefficients_df = simulate.read_model_coefficients(model_settings) model_spec = simulate.eval_coefficients(model_spec, coefficients_df, estimator) nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) if estimator: estimator.write_spec(model_settings) estimator.write_model_settings(model_settings, model_settings_file_name) estimator.write_coefficients(coefficients_df, model_settings) estimator.write_choosers(multi_person_households) choices = simulate.simple_simulate( choosers=multi_person_households, spec=model_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='joint_tour_frequency', estimator=estimator) # convert indexes to alternative names choices = pd.Series(model_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values(choices, 'households', 'joint_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() # - create joint_tours based on joint_tour_frequency choices # - we need a person_id in order to generate the tour index (and for register_traceable_table) # - but we don't know the tour participants yet # - so we arbitrarily choose the first person in the household # - to be point person for the purpose of generating an index and setting origin temp_point_persons = persons.loc[persons.PNUM == 1] temp_point_persons['person_id'] = temp_point_persons.index temp_point_persons = temp_point_persons.set_index('household_id') temp_point_persons = temp_point_persons[['person_id', 'home_zone_id']] joint_tours = \ process_joint_tours(choices, alternatives, temp_point_persons) tours = pipeline.extend_table("tours", joint_tours) tracing.register_traceable_table('tours', joint_tours) pipeline.get_rn_generator().add_channel('tours', joint_tours) # - annotate households # we expect there to be an alt with no tours - which we can use to backfill non-travelers no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] households['joint_tour_frequency'] = choices.reindex( households.index).fillna(no_tours_alt).astype(str) households['num_hh_joint_tours'] = joint_tours.groupby('household_id').size().\ reindex(households.index).fillna(0).astype(np.int8) pipeline.replace_table("households", households) tracing.print_summary('joint_tour_frequency', households.joint_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(households, label="joint_tour_frequency.households") tracing.trace_df(joint_tours, label="joint_tour_frequency.joint_tours", slicer='household_id') if estimator: survey_tours = estimation.manager.get_survey_table('tours') survey_tours = survey_tours[survey_tours.tour_category == 'joint'] print(f"len(survey_tours) {len(survey_tours)}") print(f"len(joint_tours) {len(joint_tours)}") different = False survey_tours_not_in_tours = survey_tours[~survey_tours.index. isin(joint_tours.index)] if len(survey_tours_not_in_tours) > 0: print(f"survey_tours_not_in_tours\n{survey_tours_not_in_tours}") different = True tours_not_in_survey_tours = joint_tours[~joint_tours.index. isin(survey_tours.index)] if len(survey_tours_not_in_tours) > 0: print(f"tours_not_in_survey_tours\n{tours_not_in_survey_tours}") different = True assert not different
def parking_location(trips, trips_merged, land_use, network_los, chunk_size, trace_hh_id): """ Given a set of trips, each trip needs to have a parking location if it is eligible for remote parking. """ trace_label = 'parking_location' model_settings = config.read_model_settings('parking_location_choice.yaml') alt_destination_col_name = model_settings['ALT_DEST_COL_NAME'] preprocessor_settings = model_settings.get('PREPROCESSOR', None) trips_df = trips.to_frame() trips_merged_df = trips_merged.to_frame() land_use_df = land_use.to_frame() locals_dict = {'network_los': network_los} locals_dict.update(config.get_model_constants(model_settings)) if preprocessor_settings: expressions.assign_columns(df=trips_merged_df, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) parking_locations, save_sample_df = run_parking_destination( model_settings, trips_merged_df, land_use_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, ) assign_in_place(trips_df, parking_locations.to_frame(alt_destination_col_name)) pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True) if save_sample_df is not None: assert len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) sample_table_name = model_settings.get( 'PARKING_LOCATION_SAMPLE_TABLE_NAME') assert sample_table_name is not None logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) # lest they try to put tour samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df)
def trip_destination(trips, tours_merged, chunk_size, trace_hh_id): """ Choose a destination for all 'intermediate' trips based on trip purpose. Final trips already have a destination (the primary tour destination for outbound trips, and home for inbound trips.) """ trace_label = 'trip_destination' model_settings_file_name = 'trip_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) CLEANUP = model_settings.get('CLEANUP', True) fail_some_trips_for_testing = model_settings.get( 'fail_some_trips_for_testing', False) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() estimator = estimation.manager.begin_estimation('trip_destination') if estimator: estimator.write_coefficients(model_settings=model_settings) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) trips_df, save_sample_df = run_trip_destination( trips_df, tours_merged_df, estimator=estimator, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, fail_some_trips_for_testing=fail_some_trips_for_testing) # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run if config.setting('testing_fail_trip_destination', False) and not trips_df.failed.any(): if (trips_df.trip_num < trips_df.trip_count).sum() == 0: raise RuntimeError( f"can't honor 'testing_fail_trip_destination' setting because no intermediate trips" ) fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max() trips_df.failed = (trips_df.origin == fail_o) & \ (trips_df.trip_num < trips_df.trip_count) if trips_df.failed.any(): logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum()) if inject.get_injectable('pipeline_file_prefix', None): file_name = f"{trace_label}_failed_trips_{inject.get_injectable('pipeline_file_prefix')}" else: file_name = f"{trace_label}_failed_trips" logger.info("writing failed trips to %s", file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) if estimator: estimator.end_estimation() # no trips should have failed since we overwrite choices and sample should have not failed trips assert not trips_df.failed.any() if CLEANUP: if trips_df.failed.any(): flag_failed_trip_leg_mates(trips_df, 'failed') if save_sample_df is not None: save_sample_df.drop(trips_df.index[trips_df.failed], level='trip_id', inplace=True) trips_df = cleanup_failed_trips(trips_df) trips_df.drop(columns='failed', inplace=True, errors='ignore') pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True) if save_sample_df is not None: # might be none if want_sample_table but there are no intermediate trips # expect samples only for intermediate trip destinations assert len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') assert sample_table_name is not None logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) # lest they try to put tour samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df)
def stop_frequency(tours, tours_merged, stop_frequency_alts, network_los, chunk_size, trace_hh_id): """ stop frequency model For each tour, shoose a number of intermediate inbound stops and outbound stops. Create a trip table with inbound and outbound trips. Thus, a tour with stop_frequency '2out_0in' will have two outbound and zero inbound stops, and four corresponding trips: three outbound, and one inbound. Adds stop_frequency str column to trips, with fields creates trips table with columns: :: - person_id - household_id - tour_id - primary_purpose - atwork - trip_num - outbound - trip_count """ trace_label = 'stop_frequency' model_settings_file_name = 'stop_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) tours = tours.to_frame() tours_merged = tours_merged.to_frame() assert not tours_merged.household_id.isnull().any() assert not (tours_merged.origin == -1).any() assert not (tours_merged.destination == -1).any() nest_spec = config.get_logit_model_settings(model_settings) constants = config.get_model_constants(model_settings) # - run preprocessor to annotate tours_merged preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: # hack: preprocessor adds origin column in place if it does not exist already assert 'origin' in tours_merged assert 'destination' in tours_merged od_skim_stack_wrapper = network_los.get_default_skim_dict().wrap( 'origin', 'destination') skims = [od_skim_stack_wrapper] locals_dict = { "od_skims": od_skim_stack_wrapper, 'network_los': network_los } locals_dict.update(constants) simulate.set_skim_wrapper_targets(tours_merged, skims) # this should be pre-slice as some expressions may count tours by type annotations = expressions.compute_columns( df=tours_merged, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) assign_in_place(tours_merged, annotations) tracing.print_summary('stop_frequency segments', tours_merged.primary_purpose, value_counts=True) spec_segments = model_settings.get('SPEC_SEGMENTS') assert spec_segments is not None, f"SPEC_SEGMENTS setting not found in model settings: {model_settings_file_name}" segment_col = model_settings.get('SEGMENT_COL') assert segment_col is not None, f"SEGMENT_COL setting not found in model settings: {model_settings_file_name}" nest_spec = config.get_logit_model_settings(model_settings) choices_list = [] for segment_settings in spec_segments: segment_name = segment_settings[segment_col] segment_value = segment_settings[segment_col] chooser_segment = tours_merged[tours_merged[segment_col] == segment_value] if len(chooser_segment) == 0: logging.info( f"{trace_label} skipping empty segment {segment_name}") continue logging.info( f"{trace_label} running segment {segment_name} with {chooser_segment.shape[0]} chooser rows" ) estimator = estimation.manager.begin_estimation( model_name=segment_name, bundle_name='stop_frequency') segment_spec = simulate.read_model_spec( file_name=segment_settings['SPEC']) assert segment_spec is not None, "spec for segment_type %s not found" % segment_name coefficients_file_name = segment_settings['COEFFICIENTS'] coefficients_df = simulate.read_model_coefficients( file_name=coefficients_file_name) segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator) if estimator: estimator.write_spec(segment_settings, bundle_directory=False) estimator.write_model_settings(model_settings, model_settings_file_name, bundle_directory=True) estimator.write_coefficients(coefficients_df, segment_settings) estimator.write_choosers(chooser_segment) estimator.set_chooser_id(chooser_segment.index.name) choices = simulate.simple_simulate( choosers=chooser_segment, spec=segment_spec, nest_spec=nest_spec, locals_d=constants, chunk_size=chunk_size, trace_label=tracing.extend_trace_label(trace_label, segment_name), trace_choice_name='stops', estimator=estimator) # convert indexes to alternative names choices = pd.Series(segment_spec.columns[choices.values], index=choices.index) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values( choices, 'tours', 'stop_frequency') # override choices estimator.write_override_choices(choices) estimator.end_estimation() choices_list.append(choices) choices = pd.concat(choices_list) tracing.print_summary('stop_frequency', choices, value_counts=True) # add stop_frequency choices to tours table assign_in_place(tours, choices.to_frame('stop_frequency')) # FIXME should have added this when tours created? assert 'primary_purpose' not in tours if 'primary_purpose' not in tours.columns: # if not already there, then it will have been added by annotate tours preprocessor assign_in_place(tours, tours_merged[['primary_purpose']]) pipeline.replace_table("tours", tours) # create trips table trips = process_trips(tours, stop_frequency_alts) trips = pipeline.extend_table("trips", trips) tracing.register_traceable_table('trips', trips) pipeline.get_rn_generator().add_channel('trips', trips) if estimator: # make sure they created trips with the expected tour_ids columns = ['person_id', 'household_id', 'tour_id', 'outbound'] survey_trips = estimation.manager.get_survey_table(table_name='trips') different = False survey_trips_not_in_trips = survey_trips[~survey_trips.index. isin(trips.index)] if len(survey_trips_not_in_trips) > 0: print(f"survey_trips_not_in_trips\n{survey_trips_not_in_trips}") different = True trips_not_in_survey_trips = trips[~trips.index.isin(survey_trips.index )] if len(survey_trips_not_in_trips) > 0: print(f"trips_not_in_survey_trips\n{trips_not_in_survey_trips}") different = True assert not different survey_trips = \ estimation.manager.get_survey_values(trips, table_name='trips', column_names=columns) trips_differ = (trips[columns] != survey_trips[columns]).any(axis=1) if trips_differ.any(): print("trips_differ\n%s" % trips_differ) print("%s of %s tours differ" % (trips_differ.sum(), len(trips_differ))) print("differing survey_trips\n%s" % survey_trips[trips_differ]) print("differing modeled_trips\n%s" % trips[columns][trips_differ]) assert (not trips_differ.any()) if trace_hh_id: tracing.trace_df(tours, label="stop_frequency.tours", slicer='person_id', columns=None) tracing.trace_df(trips, label="stop_frequency.trips", slicer='person_id', columns=None) tracing.trace_df(annotations, label="stop_frequency.annotations", columns=None) tracing.trace_df(tours_merged, label="stop_frequency.tours_merged", slicer='person_id', columns=None)
def non_mandatory_tour_destination( tours, persons_merged, skim_dict, skim_stack, chunk_size, trace_hh_id): """ Given the tour generation from the above, each tour needs to have a destination, so in this case tours are the choosers (with the associated person that's making the tour) """ trace_label = 'non_mandatory_tour_destination' model_settings_file_name = 'non_mandatory_tour_destination.yaml' model_settings = config.read_model_settings(model_settings_file_name) logsum_column_name = model_settings.get('DEST_CHOICE_LOGSUM_COLUMN_NAME') want_logsums = logsum_column_name is not None sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') want_sample_table = config.setting('want_dest_choice_sample_tables') and sample_table_name is not None tours = tours.to_frame() persons_merged = persons_merged.to_frame() # choosers are tours - in a sense tours are choosing their destination non_mandatory_tours = tours[tours.tour_category == 'non_mandatory'] # - if no mandatory_tours if non_mandatory_tours.shape[0] == 0: tracing.no_results(trace_label) return estimator = estimation.manager.begin_estimation('non_mandatory_tour_destination') if estimator: estimator.write_coefficients(simulate.read_model_coefficients(model_settings)) # estimator.write_spec(model_settings, tag='SAMPLE_SPEC') estimator.write_spec(model_settings, tag='SPEC') estimator.set_alt_id(model_settings["ALT_DEST_COL_NAME"]) estimator.write_table(inject.get_injectable('size_terms'), 'size_terms', append=False) estimator.write_table(inject.get_table('land_use').to_frame(), 'landuse', append=False) estimator.write_model_settings(model_settings, model_settings_file_name) choices_df, save_sample_df = tour_destination.run_tour_destination( non_mandatory_tours, persons_merged, want_logsums, want_sample_table, model_settings, skim_dict, skim_stack, estimator, chunk_size, trace_hh_id, trace_label) if estimator: estimator.write_choices(choices_df.choice) choices_df.choice = estimator.get_survey_values(choices_df.choice, 'tours', 'destination') estimator.write_override_choices(choices_df.choice) estimator.end_estimation() non_mandatory_tours['destination'] = choices_df.choice assign_in_place(tours, non_mandatory_tours[['destination']]) if want_logsums: non_mandatory_tours[logsum_column_name] = choices_df['logsum'] assign_in_place(tours, non_mandatory_tours[[logsum_column_name]]) pipeline.replace_table("tours", tours) if want_sample_table: assert len(save_sample_df.index.get_level_values(0).unique()) == len(choices_df) pipeline.extend_table(sample_table_name, save_sample_df) if trace_hh_id: tracing.trace_df(tours[tours.tour_category == 'non_mandatory'], label="non_mandatory_tour_destination", slicer='person_id', index_label='tour', columns=None, warn_if_empty=True)
def trip_destination( trips, tours_merged, chunk_size, trace_hh_id): """ Choose a destination for all 'intermediate' trips based on trip purpose. Final trips already have a destination (the primary tour destination for outbound trips, and home for inbound trips.) """ trace_label = 'trip_destination' model_settings = config.read_model_settings('trip_destination.yaml') CLEANUP = model_settings.get('CLEANUP', True) fail_some_trips_for_testing = model_settings.get('fail_some_trips_for_testing', False) trips_df = trips.to_frame() tours_merged_df = tours_merged.to_frame() logger.info("Running %s with %d trips", trace_label, trips_df.shape[0]) trips_df, save_sample_df = run_trip_destination( trips_df, tours_merged_df, chunk_size=chunk_size, trace_hh_id=trace_hh_id, trace_label=trace_label, fail_some_trips_for_testing=fail_some_trips_for_testing) # testing feature t0 make sure at least one trip fails so trip_purpose_and_destination model is run if config.setting('testing_fail_trip_destination', False) and not trips_df.failed.any(): fail_o = trips_df[trips_df.trip_num < trips_df.trip_count].origin.max() trips_df.failed = (trips_df.origin == fail_o) & \ (trips_df.trip_num < trips_df.trip_count) if trips_df.failed.any(): logger.warning("%s %s failed trips", trace_label, trips_df.failed.sum()) file_name = "%s_failed_trips" % trace_label logger.info("writing failed trips to %s", file_name) tracing.write_csv(trips_df[trips_df.failed], file_name=file_name, transpose=False) if CLEANUP: if trips_df.failed.any(): flag_failed_trip_leg_mates(trips_df, 'failed') if save_sample_df is not None: save_sample_df.drop(trips_df.index[trips_df.failed], level='trip_id', inplace=True) trips_df = cleanup_failed_trips(trips_df) trips_df.drop(columns='failed', inplace=True, errors='ignore') pipeline.replace_table("trips", trips_df) if trace_hh_id: tracing.trace_df(trips_df, label=trace_label, slicer='trip_id', index_label='trip_id', warn_if_empty=True) if save_sample_df is not None: # might be none if want_sample_table but there are no intermediate trips # expect samples only for intermediate trip destinations assert len(save_sample_df.index.get_level_values(0).unique()) == \ len(trips_df[trips_df.trip_num < trips_df.trip_count]) sample_table_name = model_settings.get('DEST_CHOICE_SAMPLE_TABLE_NAME') assert sample_table_name is not None logger.info("adding %s samples to %s" % (len(save_sample_df), sample_table_name)) # lest they try to put tour samples into the same table if pipeline.is_table(sample_table_name): raise RuntimeError("sample table %s already exists" % sample_table_name) pipeline.extend_table(sample_table_name, save_sample_df)
def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ trace_label = 'non_mandatory_tour_frequency' model_settings_file_name = 'non_mandatory_tour_frequency.yaml' model_settings = config.read_model_settings(model_settings_file_name) # FIXME kind of tacky both that we know to add this here and del it below # 'tot_tours' is used in model_spec expressions alternatives = simulate.read_model_alts( 'non_mandatory_tour_frequency_alternatives.csv', set_index=None) alternatives['tot_tours'] = alternatives.sum(axis=1) # filter based on results of CDAP choosers = persons_merged.to_frame() choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = {'person_max_window': person_max_window} expressions.assign_columns(df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers)) constants = config.get_model_constants(model_settings) model_spec = simulate.read_model_spec(file_name=model_settings['SPEC']) spec_segments = model_settings.get('SPEC_SEGMENTS', {}) # segment by person type and pick the right spec for each person type choices_list = [] for segment_settings in spec_segments: segment_name = segment_settings['NAME'] ptype = segment_settings['PTYPE'] # pick the spec column for the segment segment_spec = model_spec[[segment_name]] chooser_segment = choosers[choosers.ptype == ptype] logger.info("Running segment '%s' of size %d", segment_name, len(chooser_segment)) if len(chooser_segment) == 0: # skip empty segments continue estimator = \ estimation.manager.begin_estimation(model_name=segment_name, bundle_name='non_mandatory_tour_frequency') coefficients_df = simulate.read_model_coefficients(segment_settings) segment_spec = simulate.eval_coefficients(segment_spec, coefficients_df, estimator) if estimator: estimator.write_spec(model_settings, bundle_directory=True) estimator.write_model_settings(model_settings, model_settings_file_name, bundle_directory=True) # preserving coefficients file name makes bringing back updated coefficients more straightforward estimator.write_coefficients(coefficients_df, segment_settings) estimator.write_choosers(chooser_segment) estimator.write_alternatives(alternatives, bundle_directory=True) # FIXME #interaction_simulate_estimation_requires_chooser_id_in_df_column # shuold we do it here or have interaction_simulate do it? # chooser index must be duplicated in column or it will be omitted from interaction_dataset # estimation requires that chooser_id is either in index or a column of interaction_dataset # so it can be reformatted (melted) and indexed by chooser_id and alt_id assert chooser_segment.index.name == 'person_id' assert 'person_id' not in chooser_segment.columns chooser_segment['person_id'] = chooser_segment.index # FIXME set_alt_id - do we need this for interaction_simulate estimation bundle tables? estimator.set_alt_id('alt_id') estimator.set_chooser_id(chooser_segment.index.name) choices = interaction_simulate( chooser_segment, alternatives, spec=segment_spec, locals_d=constants, chunk_size=chunk_size, trace_label='non_mandatory_tour_frequency.%s' % segment_name, trace_choice_name='non_mandatory_tour_frequency', estimator=estimator) if estimator: estimator.write_choices(choices) choices = estimator.get_survey_values( choices, 'persons', 'non_mandatory_tour_frequency') estimator.write_override_choices(choices) estimator.end_estimation() choices_list.append(choices) # FIXME - force garbage collection? force_garbage_collect() del alternatives['tot_tours'] # del tot_tours column we added above # The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate # is the index value of the chosen alternative in the alternatives table. choices = pd.concat(choices_list).sort_index() # add non_mandatory_tour_frequency column to persons persons = persons.to_frame() # we expect there to be an alt with no tours - which we can use to backfill non-travelers no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] # need to reindex as we only handled persons with cdap_activity in ['M', 'N'] persons['non_mandatory_tour_frequency'] = \ choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8) """ We have now generated non-mandatory tour frequencies, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) But before we do that, we run an additional probablilistic step to extend/increase tour counts beyond the strict limits of the tour_frequency alternatives chosen above (which are currently limited to at most 2 escort tours and 1 each of shopping, othmaint, othdiscr, eatout, and social tours) The choice value 'non_mandatory_tour_frequency' assigned by interaction_simulate is simply the index value of the chosen alternative in the alternatives table. get counts of each of the tour type alternatives (so we can extend) escort shopping othmaint othdiscr eatout social parent_id 2588676 2 0 0 1 1 0 2588677 0 1 0 1 0 0 """ # counts of each of the tour type alternatives (so we can extend) modeled_tour_counts = alternatives.loc[choices] modeled_tour_counts.index = choices.index # assign person ids to the index # - extend_tour_counts - probabalistic extended_tour_counts = \ extend_tour_counts(choosers, modeled_tour_counts.copy(), alternatives, trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts')) num_modeled_tours = modeled_tour_counts.sum().sum() num_extended_tours = extended_tour_counts.sum().sum() logger.info("extend_tour_counts increased tour count by %s from %s to %s" % (num_extended_tours - num_modeled_tours, num_modeled_tours, num_extended_tours)) """ create the non_mandatory tours based on extended_tour_counts """ if estimator: override_tour_counts = \ estimation.manager.get_survey_values(extended_tour_counts, table_name='persons', column_names=['_%s' % c for c in extended_tour_counts.columns]) override_tour_counts = \ override_tour_counts.rename(columns={('_%s' % c): c for c in extended_tour_counts.columns}) logger.info( "estimation get_survey_values override_tour_counts %s changed cells" % (override_tour_counts != extended_tour_counts).sum().sum()) extended_tour_counts = override_tour_counts """ create the non_mandatory tours based on extended_tour_counts """ non_mandatory_tours = process_non_mandatory_tours(persons, extended_tour_counts) assert len(non_mandatory_tours) == extended_tour_counts.sum().sum() if estimator: # make sure they created the right tours survey_tours = estimation.manager.get_survey_table( 'tours').sort_index() non_mandatory_survey_tours = survey_tours[survey_tours.tour_category == 'non_mandatory'] assert len(non_mandatory_survey_tours) == len(non_mandatory_tours) assert non_mandatory_survey_tours.index.equals( non_mandatory_tours.sort_index().index) # make sure they created tours with the expected tour_ids columns = ['person_id', 'household_id', 'tour_type', 'tour_category'] survey_tours = \ estimation.manager.get_survey_values(non_mandatory_tours, table_name='tours', column_names=columns) tours_differ = (non_mandatory_tours[columns] != survey_tours[columns]).any(axis=1) if tours_differ.any(): print("tours_differ\n%s" % tours_differ) print("%s of %s tours differ" % (tours_differ.sum(), len(tours_differ))) print("differing survey_tours\n%s" % survey_tours[tours_differ]) print("differing modeled_tours\n%s" % non_mandatory_tours[columns][tours_differ]) assert (not tours_differ.any()) pipeline.extend_table("tours", non_mandatory_tours) tracing.register_traceable_table('tours', non_mandatory_tours) pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=trace_label) pipeline.replace_table("persons", persons) tracing.print_summary('non_mandatory_tour_frequency', persons.non_mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df( non_mandatory_tours, label="non_mandatory_tour_frequency.non_mandatory_tours", warn_if_empty=True) tracing.trace_df(choosers, label="non_mandatory_tour_frequency.choosers", warn_if_empty=True) tracing.trace_df( persons, label="non_mandatory_tour_frequency.annotated_persons", warn_if_empty=True)
def non_mandatory_tour_frequency(persons, persons_merged, chunk_size, trace_hh_id): """ This model predicts the frequency of making non-mandatory trips (alternatives for this model come from a separate csv file which is configured by the user) - these trips include escort, shopping, othmaint, othdiscr, eatout, and social trips in various combination. """ trace_label = 'non_mandatory_tour_frequency' model_settings = config.read_model_settings('non_mandatory_tour_frequency.yaml') model_spec = simulate.read_model_spec(file_name='non_mandatory_tour_frequency.csv') alternatives = simulate.read_model_alts( config.config_file_path('non_mandatory_tour_frequency_alternatives.csv'), set_index=None) choosers = persons_merged.to_frame() # FIXME kind of tacky both that we know to add this here and del it below # 'tot_tours' is used in model_spec expressions alternatives['tot_tours'] = alternatives.sum(axis=1) # - preprocessor preprocessor_settings = model_settings.get('preprocessor', None) if preprocessor_settings: locals_dict = { 'person_max_window': person_max_window } expressions.assign_columns( df=choosers, model_settings=preprocessor_settings, locals_dict=locals_dict, trace_label=trace_label) # filter based on results of CDAP choosers = choosers[choosers.cdap_activity.isin(['M', 'N'])] logger.info("Running non_mandatory_tour_frequency with %d persons", len(choosers)) constants = config.get_model_constants(model_settings) choices_list = [] # segment by person type and pick the right spec for each person type for ptype, segment in choosers.groupby('ptype'): name = PTYPE_NAME[ptype] # pick the spec column for the segment spec = model_spec[[name]] # drop any zero-valued rows spec = spec[spec[name] != 0] logger.info("Running segment '%s' of size %d", name, len(segment)) choices = interaction_simulate( segment, alternatives, spec=spec, locals_d=constants, chunk_size=chunk_size, trace_label='non_mandatory_tour_frequency.%s' % name, trace_choice_name='non_mandatory_tour_frequency') choices_list.append(choices) # FIXME - force garbage collection? # force_garbage_collect() choices = pd.concat(choices_list) del alternatives['tot_tours'] # del tot_tours column we added above # - add non_mandatory_tour_frequency column to persons persons = persons.to_frame() # need to reindex as we only handled persons with cdap_activity in ['M', 'N'] # (we expect there to be an alt with no tours - which we can use to backfill non-travelers) no_tours_alt = (alternatives.sum(axis=1) == 0).index[0] persons['non_mandatory_tour_frequency'] = \ choices.reindex(persons.index).fillna(no_tours_alt).astype(np.int8) """ We have now generated non-mandatory tours, but they are attributes of the person table Now we create a "tours" table which has one row per tour that has been generated (and the person id it is associated with) """ # - get counts of each of the alternatives (so we can extend) # (choices is just the index values for the chosen alts) """ escort shopping othmaint othdiscr eatout social parent_id 2588676 2 0 0 1 1 0 2588677 0 1 0 1 0 0 """ tour_counts = alternatives.loc[choices] tour_counts.index = choices.index # assign person ids to the index prev_tour_count = tour_counts.sum().sum() # - extend_tour_counts tour_counts = extend_tour_counts(choosers, tour_counts, alternatives, trace_hh_id, tracing.extend_trace_label(trace_label, 'extend_tour_counts')) extended_tour_count = tour_counts.sum().sum() logging.info("extend_tour_counts increased nmtf tour count by %s from %s to %s" % (extended_tour_count - prev_tour_count, prev_tour_count, extended_tour_count)) # - create the non_mandatory tours non_mandatory_tours = process_non_mandatory_tours(persons, tour_counts) assert len(non_mandatory_tours) == extended_tour_count pipeline.extend_table("tours", non_mandatory_tours) tracing.register_traceable_table('tours', non_mandatory_tours) pipeline.get_rn_generator().add_channel('tours', non_mandatory_tours) expressions.assign_columns( df=persons, model_settings=model_settings.get('annotate_persons'), trace_label=trace_label) pipeline.replace_table("persons", persons) tracing.print_summary('non_mandatory_tour_frequency', persons.non_mandatory_tour_frequency, value_counts=True) if trace_hh_id: tracing.trace_df(non_mandatory_tours, label="non_mandatory_tour_frequency.non_mandatory_tours", warn_if_empty=True) tracing.trace_df(choosers, label="non_mandatory_tour_frequency.choosers", warn_if_empty=True) tracing.trace_df(persons, label="non_mandatory_tour_frequency.annotated_persons", warn_if_empty=True)