def household_activity_choices(indiv_utils, interaction_coefficients, hhsize, trace_hh_id=None, trace_label=None): """ Calculate household utilities for each activity pattern alternative for households of hhsize The resulting activity pattern for each household will be coded as a string of activity codes. e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home Parameters ---------- indiv_utils : pandas.DataFrame CDAP utilities for each individual, ignoring interactions ind_utils has index of _persons_index_ and a column for each alternative i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home) interaction_coefficients : pandas.DataFrame Rules and coefficients for generating interaction specs for different household sizes hhsize : int the size of household for which activity perttern should be calculated (1..MAX_HHSIZE) Returns ------- choices : pandas.Series the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH') with same index (_hh_index_) as utils """ if hhsize == 1: # for 1 person households, there are no interactions to account for # and the household utils are the same as the individual utils choosers = vars = None # extract the individual utilities for individuals from hhsize 1 households utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']] # index on household_id, not person_id set_hh_index(utils) else: choosers = hh_choosers(indiv_utils, hhsize=hhsize) spec = build_cdap_spec(interaction_coefficients, hhsize, trace_spec=(trace_hh_id in choosers.index), trace_label=trace_label) utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label) if len(utils.index) == 0: return pd.Series(dtype='float64') probs = logit.utils_to_probs(utils, trace_label=trace_label) # select an activity pattern alternative for each household based on probability # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs idx_choices, rands = logit.make_choices(probs, trace_label=trace_label) # convert choice expressed as index into alternative name from util column label choices = pd.Series(utils.columns[idx_choices].values, index=utils.index) if trace_hh_id: if hhsize > 1: tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize), column_labels=['expression', 'person']) tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize), columns=[None, 'rand']) return choices
def build_virtual_path(self, recipe, path_type, orig, dest, tod, demographic_segment, want_choices, trace_label, filter_targets=None, trace=False, override_choices=None): trace_label = tracing.extend_trace_label(trace_label, 'build_virtual_path') # Tracing is implemented as a seperate, second call that operates ONLY on filter_targets assert not (trace and filter_targets is None) if filter_targets is not None: assert filter_targets.any() # slice orig and dest orig = orig[filter_targets] dest = dest[filter_targets] assert len(orig) > 0 assert len(dest) > 0 # slice tod and demographic_segment if not scalar if not isinstance(tod, str): tod = tod[filter_targets] if demographic_segment is not None: demographic_segment = demographic_segment[filter_targets] assert len(demographic_segment) > 0 # slice choices # (requires actual choices from the previous call lest rands change on second call) assert want_choices == (override_choices is not None) if want_choices: override_choices = override_choices[filter_targets] units = self.units_for_recipe(recipe) assert units == 'utility' or not want_choices, "'want_choices' only supported supported if units is utility" access_mode = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.access') egress_mode = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.path_types.{path_type}.egress') path_types_settings = self.network_los.setting( f'TVPB_SETTINGS.{recipe}.path_types.{path_type}') attributes_as_columns = \ self.network_los.setting(f'TVPB_SETTINGS.{recipe}.tap_tap_settings.attributes_as_columns', []) path_info = { 'path_type': path_type, 'access_mode': access_mode, 'egress_mode': egress_mode } # maz od pairs requested with memo("#TVPB build_virtual_path maz_od_df"): maz_od_df = pd.DataFrame({ 'idx': orig.index.values, 'omaz': orig.values, 'dmaz': dest.values, 'seq': range(len(orig)) }) chunk.log_df(trace_label, "maz_od_df", maz_od_df) self.trace_maz_tap(maz_od_df, access_mode, egress_mode) # for location choice, there will be multiple alt dest rows per chooser and duplicate orig.index values # but tod and demographic_segment should be the same for all chooser rows (unique orig index values) # knowing this allows us to eliminate redundant computations (e.g. utilities of maz_tap pairs) duplicated = orig.index.duplicated(keep='first') chooser_attributes = pd.DataFrame(index=orig.index[~duplicated]) if not isinstance(tod, str): chooser_attributes['tod'] = tod.loc[~duplicated] elif 'tod' in attributes_as_columns: chooser_attributes['tod'] = tod else: path_info['tod'] = tod if demographic_segment is not None: chooser_attributes[ 'demographic_segment'] = demographic_segment.loc[~duplicated] with memo("#TVPB build_virtual_path access_df"): access_df = self.compute_maz_tap_utilities(recipe, maz_od_df, chooser_attributes, leg='access', mode=access_mode, trace_label=trace_label, trace=trace) chunk.log_df(trace_label, "access_df", access_df) with memo("#TVPB build_virtual_path egress_df"): egress_df = self.compute_maz_tap_utilities(recipe, maz_od_df, chooser_attributes, leg='egress', mode=egress_mode, trace_label=trace_label, trace=trace) chunk.log_df(trace_label, "egress_df", egress_df) # path_info for use by expressions (e.g. penalty for drive access if no parking at access tap) with memo("#TVPB build_virtual_path compute_tap_tap"): transit_df = self.compute_tap_tap(recipe, maz_od_df, access_df, egress_df, chooser_attributes, path_info=path_info, trace_label=trace_label, trace=trace) chunk.log_df(trace_label, "transit_df", transit_df) with memo("#TVPB build_virtual_path best_paths"): path_df = self.best_paths(recipe, path_type, maz_od_df, access_df, egress_df, transit_df, trace_label, trace) chunk.log_df(trace_label, "path_df", path_df) # now that we have created path_df, we are done with the dataframes for the separate legs del access_df chunk.log_df(trace_label, "access_df", None) del egress_df chunk.log_df(trace_label, "egress_df", None) del transit_df chunk.log_df(trace_label, "transit_df", None) if units == 'utility': # logsums with memo("#TVPB build_virtual_path logsums"): # one row per seq with utilities in columns # path_num 0-based to aligh with logit.make_choices 0-based choice indexes path_df['path_num'] = path_df.groupby('seq').cumcount() chunk.log_df(trace_label, "path_df", path_df) utilities_df = path_df[['seq', 'path_num', units]].set_index(['seq', 'path_num' ]).unstack() utilities_df.columns = utilities_df.columns.droplevel( ) # for legibility # add rows missing because no access or egress availability utilities_df = pd.concat( [pd.DataFrame(index=maz_od_df.seq), utilities_df], axis=1) utilities_df = utilities_df.fillna( UNAVAILABLE ) # set utilities for missing paths to UNAVAILABLE chunk.log_df(trace_label, "utilities_df", utilities_df) with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. # most likely "divide by zero encountered in log" caused by all transit sets non-viable warnings.simplefilter("always") paths_nest_nesting_coefficient = path_types_settings.get( 'paths_nest_nesting_coefficient', 1) exp_utilities = np.exp(utilities_df.values / paths_nest_nesting_coefficient) logsums = np.maximum( np.log(np.nansum(exp_utilities, axis=1)), UNAVAILABLE) if len(w) > 0: for wrn in w: logger.warning( f"{trace_label} - {type(wrn).__name__} ({wrn.message})" ) DUMP = False if DUMP: zero_utilities_df = utilities_df[np.nansum( np.exp(utilities_df.values), axis=1) == 0] zero_utilities_df.to_csv(config.output_file_path( 'warning_utilities_df.csv'), index=True) bug if want_choices: # orig index to identify appropriate random number channel to use making choices utilities_df.index = orig.index with memo("#TVPB build_virtual_path make_choices"): probs = logit.utils_to_probs(utilities_df, allow_zero_probs=True, trace_label=trace_label) chunk.log_df(trace_label, "probs", probs) if trace: choices = override_choices utilities_df['choices'] = choices self.trace_df(utilities_df, trace_label, 'utilities_df') probs['choices'] = choices self.trace_df(probs, trace_label, 'probs') else: choices, rands = logit.make_choices( probs, allow_bad_probs=True, trace_label=trace_label) chunk.log_df(trace_label, "rands", rands) del rands chunk.log_df(trace_label, "rands", None) del probs chunk.log_df(trace_label, "probs", None) # we need to get path_set, btap, atap from path_df row with same seq and path_num # drop seq join column, but keep path_num of choice to override_choices when tracing columns_to_cache = ['btap', 'atap', 'path_set', 'path_num'] logsum_df = \ pd.merge(pd.DataFrame({'seq': range(len(orig)), 'path_num': choices.values}), path_df[['seq'] + columns_to_cache], on=['seq', 'path_num'], how='left')\ .drop(columns=['seq'])\ .set_index(orig.index) logsum_df['logsum'] = logsums else: assert len(logsums) == len(orig) logsum_df = pd.DataFrame({'logsum': logsums}, index=orig.index) chunk.log_df(trace_label, "logsum_df", logsum_df) del utilities_df chunk.log_df(trace_label, "utilities_df", None) if trace: self.trace_df(logsum_df, trace_label, 'logsum_df') chunk.log_df(trace_label, "logsum_df", logsum_df) results = logsum_df else: assert units == 'time' # return a series results = pd.Series(path_df[units].values, index=path_df['idx']) # zero-fill rows for O-D pairs where no best path exists because there was no tap-tap transit availability results = reindex(results, maz_od_df.idx).fillna(0.0) chunk.log_df(trace_label, "results", results) assert len(results) == len(orig) del path_df chunk.log_df(trace_label, "path_df", None) # diagnostic # maz_od_df['DIST'] = self.network_los.get_default_skim_dict().get('DIST').get(maz_od_df.omaz, maz_od_df.dmaz) # maz_od_df[units] = results.logsum if units == 'utility' else results.values # print(f"maz_od_df\n{maz_od_df}") return results
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize, trace_hh_id=None, trace_label=None): """ Calculate household utilities for each activity pattern alternative for households of hhsize The resulting activity pattern for each household will be coded as a string of activity codes. e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home Parameters ---------- indiv_utils : pandas.DataFrame CDAP utilities for each individual, ignoring interactions ind_utils has index of _persons_index_ and a column for each alternative i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home) interaction_coefficients : pandas.DataFrame Rules and coefficients for generating interaction specs for different household sizes hhsize : int the size of household for which activity perttern should be calculated (1..MAX_HHSIZE) Returns ------- choices : pandas.Series the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH') with same index (_hh_index_) as utils """ if hhsize == 1: # for 1 person households, there are no interactions to account for # and the household utils are the same as the individual utils choosers = vars = None # extract the individual utilities for individuals from hhsize 1 households utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']] # index on household_id, not person_id set_hh_index(utils) else: choosers = hh_choosers(indiv_utils, hhsize=hhsize) spec = build_cdap_spec(interaction_coefficients, hhsize, trace_spec=(trace_hh_id in choosers.index), trace_label=trace_label) utils = simulate.eval_utilities(spec, choosers, trace_label=trace_label) if len(utils.index) == 0: return pd.Series() probs = logit.utils_to_probs(utils, trace_label=trace_label) # select an activity pattern alternative for each household based on probability # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs idx_choices, rands = logit.make_choices(probs, trace_label=trace_label) # convert choice expressed as index into alternative name from util column label choices = pd.Series(utils.columns[idx_choices].values, index=utils.index) if trace_hh_id: if hhsize > 1: tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize), column_labels=['expression', 'person']) tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize), columns=[None, 'rand']) return choices
def choose_tour_leg_pattern(trip_segment, patterns, spec, trace_label='trace_label'): alternatives = generate_alternatives(trip_segment, STOP_TIME_DURATION).sort_index() have_trace_targets = tracing.has_trace_targets(trip_segment) if have_trace_targets: tracing.trace_df(trip_segment, tracing.extend_trace_label(trace_label, 'choosers')) tracing.trace_df(alternatives, tracing.extend_trace_label(trace_label, 'alternatives'), transpose=False) if len(spec.columns) > 1: raise RuntimeError('spec must have only one column') # - join choosers and alts # in vanilla interaction_simulate interaction_df is cross join of choosers and alternatives # interaction_df = logit.interaction_dataset(choosers, alternatives, sample_size) # here, alternatives is sparsely repeated once for each (non-dup) sample # we expect alternatives to have same index of choosers (but with duplicate index values) # so we just need to left join alternatives with choosers assert alternatives.index.name == trip_segment.index.name interaction_df = alternatives.join(trip_segment, how='left', rsuffix='_chooser') chunk.log_df(trace_label, 'interaction_df', interaction_df) if have_trace_targets: trace_rows, trace_ids = tracing.interaction_trace_rows(interaction_df, trip_segment) tracing.trace_df(interaction_df, tracing.extend_trace_label(trace_label, 'interaction_df'), transpose=False) else: trace_rows = trace_ids = None interaction_utilities, trace_eval_results \ = interaction_simulate.eval_interaction_utilities(spec, interaction_df, None, trace_label, trace_rows, estimator=None) interaction_utilities = pd.concat([interaction_df[STOP_TIME_DURATION], interaction_utilities], axis=1) chunk.log_df(trace_label, 'interaction_utilities', interaction_utilities) interaction_utilities = pd.merge(interaction_utilities.reset_index(), patterns[patterns[TRIP_ID].isin(interaction_utilities.index)], on=[TRIP_ID, STOP_TIME_DURATION], how='left') if have_trace_targets: tracing.trace_interaction_eval_results(trace_eval_results, trace_ids, tracing.extend_trace_label(trace_label, 'eval')) tracing.trace_df(interaction_utilities, tracing.extend_trace_label(trace_label, 'interaction_utilities'), transpose=False) del interaction_df chunk.log_df(trace_label, 'interaction_df', None) interaction_utilities = interaction_utilities.groupby([TOUR_ID, OUTBOUND, PATTERN_ID], as_index=False)[['utility']].sum() interaction_utilities[TOUR_LEG_ID] = \ interaction_utilities.apply(generate_tour_leg_id, axis=1) tour_choosers = interaction_utilities.set_index(TOUR_LEG_ID) interaction_utilities = tour_choosers[['utility']].copy() # reshape utilities (one utility column and one row per row in model_design) # to a dataframe with one row per chooser and one column per alternative # interaction_utilities is sparse because duplicate sampled alternatives were dropped # so we need to pad with dummy utilities so low that they are never chosen # number of samples per chooser sample_counts = interaction_utilities.groupby(interaction_utilities.index).size().values chunk.log_df(trace_label, 'sample_counts', sample_counts) # max number of alternatvies for any chooser max_sample_count = sample_counts.max() # offsets of the first and last rows of each chooser in sparse interaction_utilities last_row_offsets = sample_counts.cumsum() first_row_offsets = np.insert(last_row_offsets[:-1], 0, 0) # repeat the row offsets once for each dummy utility to insert # (we want to insert dummy utilities at the END of the list of alternative utilities) # inserts is a list of the indices at which we want to do the insertions inserts = np.repeat(last_row_offsets, max_sample_count - sample_counts) del sample_counts chunk.log_df(trace_label, 'sample_counts', None) # insert the zero-prob utilities to pad each alternative set to same size padded_utilities = np.insert(interaction_utilities.utility.values, inserts, -999) del inserts del interaction_utilities chunk.log_df(trace_label, 'interaction_utilities', None) # reshape to array with one row per chooser, one column per alternative padded_utilities = padded_utilities.reshape(-1, max_sample_count) chunk.log_df(trace_label, 'padded_utilities', padded_utilities) # convert to a dataframe with one row per chooser and one column per alternative utilities_df = pd.DataFrame( padded_utilities, index=tour_choosers.index.unique()) chunk.log_df(trace_label, 'utilities_df', utilities_df) del padded_utilities chunk.log_df(trace_label, 'padded_utilities', None) if have_trace_targets: tracing.trace_df(utilities_df, tracing.extend_trace_label(trace_label, 'utilities'), column_labels=['alternative', 'utility']) # convert to probabilities (utilities exponentiated and normalized to probs) # probs is same shape as utilities, one row per chooser and one column for alternative probs = logit.utils_to_probs(utilities_df, trace_label=trace_label, trace_choosers=trip_segment) chunk.log_df(trace_label, 'probs', probs) del utilities_df chunk.log_df(trace_label, 'utilities_df', None) if have_trace_targets: tracing.trace_df(probs, tracing.extend_trace_label(trace_label, 'probs'), column_labels=['alternative', 'probability']) # make choices # positions is series with the chosen alternative represented as a column index in probs # which is an integer between zero and num alternatives in the alternative sample positions, rands = \ logit.make_choices(probs, trace_label=trace_label, trace_choosers=trip_segment) chunk.log_df(trace_label, 'positions', positions) chunk.log_df(trace_label, 'rands', rands) del probs chunk.log_df(trace_label, 'probs', None) # shouldn't have chosen any of the dummy pad utilities assert positions.max() < max_sample_count # need to get from an integer offset into the alternative sample to the alternative index # that is, we want the index value of the row that is offset by <position> rows into the # tranche of this choosers alternatives created by cross join of alternatives and choosers # resulting pandas Int64Index has one element per chooser row and is in same order as choosers choices = tour_choosers[PATTERN_ID].take(positions + first_row_offsets) chunk.log_df(trace_label, 'choices', choices) if have_trace_targets: tracing.trace_df(choices, tracing.extend_trace_label(trace_label, 'choices'), columns=[None, PATTERN_ID]) tracing.trace_df(rands, tracing.extend_trace_label(trace_label, 'rands'), columns=[None, 'rand']) return choices