def _location_sample(segment_name, choosers, alternatives, skims, estimator, model_settings, alt_dest_col_name, chunk_size, chunk_tag, trace_label): """ select a sample of alternative locations. Logsum calculations are expensive, so we build a table of persons * all zones and then select a sample subset of potential locations The sample subset is generated by making multiple choices (<sample_size> number of choices) which results in sample containing up to <sample_size> choices for each choose (e.g. person) and a pick_count indicating how many times that choice was selected for that chooser.) person_id, dest_zone_id, rand, pick_count 23750, 14, 0.565502716034, 4 23750, 16, 0.711135838871, 6 ... 23751, 12, 0.408038878552, 1 23751, 14, 0.972732479292, 2 """ assert not choosers.empty logger.info("Running %s with %d persons" % (trace_label, len(choosers.index))) sample_size = model_settings["SAMPLE_SIZE"] if config.setting('disable_destination_sampling', False) or (estimator and estimator.want_unsampled_alternatives): # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 locals_d = {'skims': skims, 'segment_size': segment_name} constants = config.get_model_constants(model_settings) locals_d.update(constants) spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=segment_name, estimator=estimator) # here since presumably we want this when called for either sample or presample log_alt_losers = config.setting('log_alt_losers', False) choices = interaction_sample(choosers, alternatives, spec=spec, sample_size=sample_size, alt_col_name=alt_dest_col_name, log_alt_losers=log_alt_losers, skims=skims, locals_d=locals_d, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) return choices
def _destination_sample(primary_purpose, trips, alternatives, model_settings, size_term_matrix, skims, alt_dest_col_name, estimator, chunk_size, chunk_tag, trace_label): """ Note: trips with no viable destination receive no sample rows (because we call interaction_sample with allow_zero_probs=True) All other trips will have one or more rows with pick_count summing to sample_size returns choices: pandas.DataFrame alt_dest prob pick_count trip_id 102829169 2898 0.002333 1 102829169 2901 0.004976 1 102829169 3193 0.002628 1 """ spec = simulate.spec_for_segment(model_settings, spec_id='DESTINATION_SAMPLE_SPEC', segment_name=primary_purpose, estimator=estimator) sample_size = model_settings['SAMPLE_SIZE'] if config.setting('disable_destination_sampling', False) or (estimator and estimator.want_unsampled_alternatives): # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 locals_dict = config.get_model_constants(model_settings).copy() # size_terms of destination zones are purpose-specific, and trips have various purposes # so the relevant size_term for each interaction_sample row # cannot be determined until after choosers are joined with alternatives # (unless we iterate over trip.purpose - which we could, though we are already iterating over trip_num) # so, instead, expressions determine row-specific size_term by a call to: size_terms.get(df.alt_dest, df.purpose) locals_dict.update({'size_terms': size_term_matrix}) locals_dict.update(skims) log_alt_losers = config.setting('log_alt_losers', False) choices = interaction_sample(choosers=trips, alternatives=alternatives, sample_size=sample_size, alt_col_name=alt_dest_col_name, log_alt_losers=log_alt_losers, allow_zero_probs=True, spec=spec, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) return choices
def _destination_sample(spec_segment_name, choosers, destination_size_terms, skims, estimator, model_settings, alt_dest_col_name, chunk_size, chunk_tag, trace_label): model_spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=spec_segment_name, estimator=estimator) logger.info("running %s with %d tours", trace_label, len(choosers)) sample_size = model_settings['SAMPLE_SIZE'] if config.setting('disable_destination_sampling', False) or (estimator and estimator.want_unsampled_alternatives): # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 locals_d = {'skims': skims} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) log_alt_losers = config.setting('log_alt_losers', False) choices = interaction_sample(choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, log_alt_losers=log_alt_losers, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps # (broadcasts person_id onto all alternatives sharing the same tour_id index value) choices['person_id'] = choosers.person_id return choices
def run_od_sample(spec_segment_name, tours, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): model_spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=spec_segment_name, estimator=estimator) choosers = tours # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] # interaction_sample requires that choosers.index.is_monotonic_increasing if not choosers.index.is_monotonic_increasing: logger.debug( f"run_destination_sample {trace_label} sorting choosers because not monotonic_increasing" ) choosers = choosers.sort_index() # by default, enable presampling for multizone systems, unless they disable it in settings file pre_sample_taz = not (network_los.zone_system == los.ONE_ZONE) if pre_sample_taz and not config.setting('want_dest_choice_presampling', True): pre_sample_taz = False logger.info(f"Disabled destination zone presampling for {trace_label} " f"because 'want_dest_choice_presampling' setting is False") if pre_sample_taz: logger.info("Running %s destination_presample with %d tours" % (trace_label, len(tours))) choices = od_presample(spec_segment_name, choosers, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label) else: choices = od_sample(spec_segment_name, choosers, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label) return choices
def trip_destination_simulate(primary_purpose, trips, destination_sample, model_settings, want_logsums, size_term_matrix, skim_hotel, estimator, chunk_size, trace_hh_id, trace_label): """ Chose destination from destination_sample (with od_logsum and dp_logsum columns added) Returns ------- choices - pandas.Series destination alt chosen """ trace_label = tracing.extend_trace_label(trace_label, 'trip_dest_simulate') chunk_tag = 'trip_destination.simulate' spec = simulate.spec_for_segment(model_settings, spec_id='DESTINATION_SPEC', segment_name=primary_purpose, estimator=estimator) if estimator: estimator.write_choosers(trips) alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] logger.info("Running trip_destination_simulate with %d trips", len(trips)) skims = skim_hotel.sample_skims(presample=False) locals_dict = config.get_model_constants(model_settings).copy() locals_dict.update({'size_terms': size_term_matrix}) locals_dict.update(skims) log_alt_losers = config.setting('log_alt_losers', False) destinations = interaction_sample_simulate( choosers=trips, alternatives=destination_sample, spec=spec, choice_column=alt_dest_col_name, log_alt_losers=log_alt_losers, want_logsums=want_logsums, allow_zero_probs=True, zero_prob_choice_val=NO_DESTINATION, skims=skims, locals_d=locals_dict, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label, trace_choice_name='trip_dest', estimator=estimator) if not want_logsums: # for consistency, always return a dataframe with canonical column name assert isinstance(destinations, pd.Series) destinations = destinations.to_frame('choice') if estimator: # need to overwrite choices here before any failed choices are suppressed estimator.write_choices(destinations.choice) destinations.choice = estimator.get_survey_values( destinations.choice, 'trips', 'destination') estimator.write_override_choices(destinations.choice) # drop any failed zero_prob destinations if (destinations.choice == NO_DESTINATION).any(): # logger.debug("dropping %s failed destinations", (destinations == NO_DESTINATION).sum()) destinations = destinations[destinations.choice != NO_DESTINATION] return destinations
def run_destination_simulate(spec_segment_name, tours, persons_merged, destination_sample, want_logsums, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): """ run destination_simulate on tour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ model_spec = simulate.spec_for_segment(model_settings, spec_id='SPEC', segment_name=spec_segment_name, estimator=estimator) # FIXME - MEMORY HACK - only include columns actually used in spec (omit them pre-merge) chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] persons_merged = persons_merged[[ c for c in persons_merged.columns if c in chooser_columns ]] tours = tours[[ c for c in tours.columns if c in chooser_columns or c == 'person_id' ]] choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left') # interaction_sample requires that choosers.index.is_monotonic_increasing if not choosers.index.is_monotonic_increasing: logger.debug( f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing" ) choosers = choosers.sort_index() if estimator: estimator.write_choosers(choosers) alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge size_terms column into alt sample list destination_sample['size_term'] = \ reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name]) tracing.dump_df(DUMP, destination_sample, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running tour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers # and a zone_id in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap(origin_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate(choosers, destination_sample, spec=model_spec, choice_column=alt_dest_col_name, want_logsums=want_logsums, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='destination', estimator=estimator) if not want_logsums: # for consistency, always return a dataframe with canonical column name assert isinstance(choices, pd.Series) choices = choices.to_frame('choice') return choices
def run_destination_sample(spec_segment_name, tours, households_merged, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=spec_segment_name, estimator=estimator) # choosers are tours - in a sense tours are choosing their destination choosers = pd.merge(tours, households_merged, left_on='household_id', right_index=True, how='left') # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] logger.info("running %s with %d tours", trace_label, len(choosers)) sample_size = model_settings["SAMPLE_SIZE"] if estimator: # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info( "Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label, )) sample_size = 0 # create wrapper with keys for this lookup - in this case there is a workplace_zone_id # in the choosers and a zone_id in the alternatives which ge t merged during interaction # the skims will be available under the name "skims" for any @ expressions origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] dest_column_name = destination_size_terms.index.name # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') if (origin_col_name == dest_column_name): origin_col_name = f'{origin_col_name}_chooser' skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap(origin_col_name, dest_column_name) locals_d = {'skims': skims} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) choices = interaction_sample(choosers, alternatives=destination_size_terms, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) # remember person_id in chosen alts so we can merge with persons in subsequent steps # (broadcasts person_id onto all alternatives sharing the same tour_id index value) choices['person_id'] = choosers.person_id return choices
def run_destination_simulate(spec_segment_name, tours, persons_merged, destination_sample, want_logsums, model_settings, skim_dict, destination_size_terms, estimator, chunk_size, trace_label): """ run destination_simulate on tour_destination_sample annotated with mode_choice logsum to select a destination from sample alternatives """ spec = simulate.spec_for_segment(model_settings, spec_id='SPEC', segment_name=spec_segment_name, estimator=estimator) # merge persons into tours choosers = pd.merge(tours, persons_merged, left_on='person_id', right_index=True, how='left') # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] if estimator: estimator.write_choosers(choosers) alt_dest_col_name = model_settings["ALT_DEST_COL_NAME"] origin_col_name = model_settings['CHOOSER_ORIG_COL_NAME'] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge size_terms column into alt sample list destination_sample['size_term'] = \ reindex(destination_size_terms.size_term, destination_sample[alt_dest_col_name]) constants = config.get_model_constants(model_settings) logger.info("Running tour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is a TAZ in the choosers # and a TAZ in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skims = skim_dict.wrap(origin_col_name, alt_dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) choices = interaction_sample_simulate(choosers, destination_sample, spec=spec, choice_column=alt_dest_col_name, want_logsums=want_logsums, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='destination', estimator=estimator) return choices
def run_location_simulate(segment_name, persons_merged, location_sample_df, network_los, dest_size_terms, want_logsums, estimator, model_settings, chunk_size, chunk_tag, trace_label): """ run location model on location_sample annotated with mode_choice logsum to select a dest zone from sample alternatives Returns ------- choices : pandas.DataFrame indexed by persons_merged_df.index choice : location choices (zone ids) logsum : float logsum of choice utilities across alternatives logsums optional & only returned if DEST_CHOICE_LOGSUM_COLUMN_NAME specified in model_settings """ assert not persons_merged.empty # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = persons_merged[chooser_columns] alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list alternatives = \ pd.merge(location_sample_df, dest_size_terms, left_on=alt_dest_col_name, right_index=True, how="left") logger.info("Running %s with %d persons" % (trace_label, len(choosers))) # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers # and a zone_id in the alternatives which get merged during interaction # the skims will be available under the name "skims" for any @ expressions skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap('home_zone_id', alt_dest_col_name) locals_d = {'skims': skims, 'segment_size': segment_name} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) if estimator: # write choosers after annotation estimator.write_choosers(choosers) estimator.set_alt_id(alt_dest_col_name) estimator.write_interaction_sample_alternatives(alternatives) spec = simulate.spec_for_segment(model_settings, spec_id='SPEC', segment_name=segment_name, estimator=estimator) log_alt_losers = config.setting('log_alt_losers', False) choices = interaction_sample_simulate( choosers, alternatives, spec=spec, choice_column=alt_dest_col_name, log_alt_losers=log_alt_losers, want_logsums=want_logsums, skims=skims, locals_d=locals_d, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label, trace_choice_name=model_settings['DEST_CHOICE_COLUMN_NAME'], estimator=estimator) if not want_logsums: # for consistency, always return a dataframe with canonical column name assert isinstance(choices, pd.Series) choices = choices.to_frame('choice') assert isinstance(choices, pd.DataFrame) return choices
def run_location_sample( segment_name, persons_merged, network_los, dest_size_terms, estimator, model_settings, chunk_size, trace_label): """ select a sample of alternative locations. Logsum calculations are expensive, so we build a table of persons * all zones and then select a sample subset of potential locations The sample subset is generated by making multiple choices (<sample_size> number of choices) which results in sample containing up to <sample_size> choices for each choose (e.g. person) and a pick_count indicating how many times that choice was selected for that chooser.) person_id, dest_zone_id, rand, pick_count 23750, 14, 0.565502716034, 4 23750, 16, 0.711135838871, 6 ... 23751, 12, 0.408038878552, 1 23751, 14, 0.972732479292, 2 """ assert not persons_merged.empty # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = persons_merged[chooser_columns] alternatives = dest_size_terms alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] logger.info("Running %s with %d persons" % (trace_label, len(choosers.index))) sample_size = model_settings["SAMPLE_SIZE"] if estimator: # FIXME interaction_sample will return unsampled complete alternatives with probs and pick_count logger.info("Estimation mode for %s using unsampled alternatives short_circuit_choices" % (trace_label,)) sample_size = 0 # create wrapper with keys for this lookup - in this case there is a home_zone_id in the choosers # and a zone_id in the alternatives which get merged during interaction # (logit.interaction_dataset suffixes duplicate chooser column with '_chooser') # the skims will be available under the name "skims" for any @ expressions skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap('home_zone_id', 'zone_id') locals_d = { 'skims': skims, 'segment_size': segment_name } constants = config.get_model_constants(model_settings) locals_d.update(constants) spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=segment_name, estimator=estimator) choices = interaction_sample( choosers, alternatives, sample_size=sample_size, alt_col_name=alt_dest_col_name, spec=spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) return choices
def run_od_simulate(spec_segment_name, tours, od_sample, want_logsums, model_settings, network_los, destination_size_terms, estimator, chunk_size, trace_label): """ run simulate OD choices on tour_od_sample annotated with mode_choice logsum to select a tour OD from sample alternatives """ model_spec = simulate.spec_for_segment(model_settings, spec_id='SPEC', segment_name=spec_segment_name, estimator=estimator) # merge persons into tours choosers = tours # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = model_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] # interaction_sample requires that choosers.index.is_monotonic_increasing if not choosers.index.is_monotonic_increasing: logger.debug( f"run_destination_simulate {trace_label} sorting choosers because not monotonic_increasing" ) choosers = choosers.sort_index() if estimator: estimator.write_choosers(choosers) origin_col_name = model_settings['ORIG_COL_NAME'] dest_col_name = model_settings['DEST_COL_NAME'] alt_dest_col_name = model_settings['ALT_DEST_COL_NAME'] origin_attr_cols = model_settings['ORIGIN_ATTR_COLS_TO_USE'] alt_od_col_name = get_od_id_col(origin_col_name, dest_col_name) od_sample[alt_od_col_name] = create_od_id_col(od_sample, origin_col_name, dest_col_name) # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge size_terms column into alt sample list od_sample['size_term'] = \ reindex(destination_size_terms.size_term, od_sample[alt_dest_col_name]) # also have to add origin attribute columns lu = inject.get_table('land_use').to_frame(columns=origin_attr_cols) od_sample = pd.merge(od_sample, lu, left_on=origin_col_name, right_index=True, how='left') tracing.dump_df(DUMP, od_sample, trace_label, 'alternatives') constants = config.get_model_constants(model_settings) logger.info("Running tour_destination_simulate with %d persons", len(choosers)) # create wrapper with keys for this lookup - in this case there is an origin ID # column and a destination ID columns in the alternatives table. # the skims will be available under the name "skims" for any @ expressions skim_dict = network_los.get_default_skim_dict() skims = skim_dict.wrap(origin_col_name, dest_col_name) locals_d = { 'skims': skims, } if constants is not None: locals_d.update(constants) tracing.dump_df(DUMP, choosers, trace_label, 'choosers') choices = interaction_sample_simulate( choosers, od_sample, spec=model_spec, choice_column=alt_od_col_name, want_logsums=want_logsums, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label, trace_choice_name='origin_destination', estimator=estimator) if not want_logsums: choices = choices.to_frame('choice') choices = _get_od_cols_from_od_id(choices, origin_col_name, dest_col_name) return choices
def _od_sample(spec_segment_name, choosers, network_los, destination_size_terms, origin_id_col, dest_id_col, skims, estimator, model_settings, alt_od_col_name, chunk_size, chunk_tag, trace_label): model_spec = simulate.spec_for_segment(model_settings, spec_id='SAMPLE_SPEC', segment_name=spec_segment_name, estimator=estimator) if alt_od_col_name is None: alt_col_name = get_od_id_col(origin_id_col, dest_id_col) else: alt_col_name = alt_od_col_name logger.info("running %s with %d tours", trace_label, len(choosers)) sample_size = model_settings['SAMPLE_SIZE'] if config.setting('disable_destination_sampling', False) or (estimator and estimator.want_unsampled_alternatives): # FIXME interaction_sample will return unsampled complete alternatives # with probs and pick_count logger.info(("Estimation mode for %s using unsampled alternatives " "short_circuit_choices") % trace_label) sample_size = 0 locals_d = {'skims': skims} constants = config.get_model_constants(model_settings) if constants is not None: locals_d.update(constants) origin_filter = model_settings.get('ORIG_FILTER', None) origin_attr_cols = model_settings['ORIGIN_ATTR_COLS_TO_USE'] od_alts_df = _create_od_alts_from_dest_size_terms( destination_size_terms, spec_segment_name, od_id_col=alt_col_name, origin_id_col=origin_id_col, dest_id_col=dest_id_col, origin_filter=origin_filter, origin_attr_cols=origin_attr_cols) if skims.orig_key == ORIG_TAZ: od_alts_df[ORIG_TAZ] = map_maz_to_taz(od_alts_df[origin_id_col], network_los) elif skims.orig_key not in od_alts_df: logger.error("Alts df is missing origin skim key column.") choices = interaction_sample(choosers, alternatives=od_alts_df, sample_size=sample_size, alt_col_name=alt_col_name, spec=model_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, chunk_tag=chunk_tag, trace_label=trace_label) return choices