def eval_nl_logsums(choosers, spec, nest_spec, locals_d, trace_label=None): """ like eval_nl except return logsums instead of making choices Returns ------- logsums : pandas.Series Index will be that of `choosers`, values will be nest logsum based on spec column values """ trace_label = tracing.extend_trace_label(trace_label, 'nl_logsums') check_for_variability = tracing.check_for_variability() # logger.debug("running eval_nl_logsums") t0 = tracing.print_elapsed_time() # column names of expression_values match spec index values expression_values = eval_variables(spec.index, choosers, locals_d) t0 = tracing.print_elapsed_time("eval_variables", t0, debug=True) if check_for_variability: _check_for_variability(expression_values, trace_label) t0 = tracing.print_elapsed_time("_check_for_variability", t0, debug=True) # raw utilities of all the leaves raw_utilities = compute_utilities(expression_values, spec) t0 = tracing.print_elapsed_time("expression_values.dot", t0, debug=True) # exponentiated utilities of leaves and nests nested_exp_utilities = compute_nested_exp_utilities( raw_utilities, nest_spec) t0 = tracing.print_elapsed_time("compute_nested_exp_utilities", t0, debug=True) logsums = np.log(nested_exp_utilities.root) logsums = pd.Series(logsums, index=choosers.index) t0 = tracing.print_elapsed_time("logsums", t0, debug=True) if trace_label: # add logsum to nested_exp_utilities for tracing nested_exp_utilities['logsum'] = logsums tracing.trace_df(choosers, '%s.choosers' % trace_label) tracing.trace_df(raw_utilities, '%s.raw_utilities' % trace_label, column_labels=['alternative', 'utility']) tracing.trace_df(nested_exp_utilities, '%s.nested_exp_utilities' % trace_label, column_labels=['alternative', 'utility']) tracing.trace_df(logsums, '%s.logsums' % trace_label, column_labels=['alternative', 'logsum']) return logsums
def asim_households(asim_store, households_sample_size, trace_hh_id): df_full = asim_store["households"] # if we are tracing hh exclusively if trace_hh_id and households_sample_size == 1: # df contains only trace_hh (or empty if not in full store) df = tracing.slice_ids(df_full, trace_hh_id) # if we need sample a subset of full store elif households_sample_size > 0 and \ len(df_full.index) > households_sample_size: # take the requested random sample df = asim_simulate.random_rows(df_full, households_sample_size) # if tracing and we missed trace_hh in sample, but it is in full store if trace_hh_id and trace_hh_id not in df.index and \ trace_hh_id in df_full.index: # replace first hh in sample with trace_hh print( "replacing household %s with %s in household sample" % (df.index[0], trace_hh_id)) df_hh = tracing.slice_ids(df_full, trace_hh_id) df = pd.concat([df_hh, df[1:]]) else: df = df_full print("loaded households %s" % (df.shape,)) # replace table function with dataframe orca.add_table('asim_households', df) asim_utils.get_rn_generator().add_channel(df, 'asim_households') if trace_hh_id: tracing.register_traceable_table('asim_households', df) tracing.trace_df(df, "asim_households", warn_if_empty=True) return df
def asim_persons(asim_store, households_sample_size, asim_households, trace_hh_id): df = asim_store["persons"] if households_sample_size > 0: # keep all persons in the sampled households df = df[df.household_id.isin(asim_households.index)] print("loaded asim asim_persons %s" % (df.shape,)) # replace table function with dataframe orca.add_table('asim_persons', df) asim_utils.get_rn_generator().add_channel(df, 'asim_persons') if trace_hh_id: tracing.register_traceable_table('asim_persons', df) tracing.trace_df(df, "asim_persons", warn_if_empty=True) return df
def eval_mnl_logsums(choosers, spec, locals_d, trace_label=None): """ like eval_nl except return logsums instead of making choices Returns ------- logsums : pandas.Series Index will be that of `choosers`, values will be logsum across spec column values """ trace_label = tracing.extend_trace_label(trace_label, 'mnl') check_for_variability = tracing.check_for_variability() print("running eval_mnl_logsums") expression_values = eval_variables(spec.index, choosers, locals_d) if check_for_variability: _check_for_variability(expression_values, trace_label) # utility values utilities = compute_utilities(expression_values, spec) # logsum is log of exponentiated utilities summed across # columns of each chooser row utils_arr = utilities.as_matrix().astype('float') logsums = np.log(np.exp(utils_arr).sum(axis=1)) logsums = pd.Series(logsums, index=choosers.index) if trace_label: # add logsum to utilities for tracing utilities['logsum'] = logsums tracing.trace_df(choosers, '%s.choosers' % trace_label) tracing.trace_df(utilities, '%s.utilities' % trace_label, column_labels=['alternative', 'utility']) tracing.trace_df(logsums, '%s.logsums' % trace_label, column_labels=['alternative', 'logsum']) tracing.trace_df(expression_values, '%s.expression_values' % trace_label, column_labels=['expression', None]) return logsums
def compute_logsums(choosers, logsum_spec, logsum_settings, skim_dict, skim_stack, alt_col_name, chunk_size, trace_hh_id, trace_label): """ Parameters ---------- choosers logsum_spec logsum_settings skim_dict skim_stack alt_col_name chunk_size trace_hh_id trace_label Returns ------- logsums: pandas series computed logsums with same index as choosers """ trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums') nest_spec = get_logit_model_settings(logsum_settings) constants = get_model_constants(logsum_settings) print("Running compute_logsums with %d choosers" % len(choosers.index)) if trace_hh_id: tracing.trace_df(logsum_spec, tracing.extend_trace_label(trace_label, 'spec'), slicer='NONE', transpose=False) # setup skim keys odt_skim_stack_wrapper = skim_stack.wrap(left_key='TAZ', right_key=alt_col_name, skim_key="out_period") dot_skim_stack_wrapper = skim_stack.wrap(left_key=alt_col_name, right_key='TAZ', skim_key="in_period") od_skim_stack_wrapper = skim_dict.wrap('TAZ', alt_col_name) skims = [ odt_skim_stack_wrapper, dot_skim_stack_wrapper, od_skim_stack_wrapper ] locals_d = { "odt_skims": odt_skim_stack_wrapper, "dot_skims": dot_skim_stack_wrapper, "od_skims": od_skim_stack_wrapper } if constants is not None: locals_d.update(constants) logsums = asim_simulate.simple_simulate_logsums(choosers, logsum_spec, nest_spec, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_label) return logsums
def _interaction_simulate(choosers, alternatives, spec, skims=None, locals_d=None, sample_size=None, trace_label=None, trace_choice_name=None): """ Run a MNL simulation in the situation in which alternatives must be merged with choosers because there are interaction terms or because alternatives are being sampled. Parameters are same as for public function interaction_simulate spec : dataframe one row per spec expression and one col with utility coefficient interaction_df : dataframe cross join (cartesian product) of choosers with alternatives combines columns of choosers and alternatives len(df) == len(choosers) * len(alternatives) index values (non-unique) are index values from alternatives df interaction_utilities : dataframe the utility of each alternative is sum of the partial utilities determined by the various spec expressions and their corresponding coefficients yielding a dataframe with len(interaction_df) rows and one utility column having the same index as interaction_df (non-unique values from alternatives df) utilities : dataframe dot product of model_design.dot(spec) yields utility value for element in the cross product of choosers and alternatives this is then reshaped as a dataframe with one row per chooser and one column per alternative probs : dataframe utilities exponentiated and converted to probabilities same shape as utilities, one row per chooser and one column for alternative positions : series choices among alternatives with the chosen alternative represented as the integer index of the selected alternative column in probs choices : series series with the alternative chosen for each chooser the index is same as choosers and the series value is the alternative df index of chosen alternative Returns ------- ret : pandas.Series A series where index should match the index of the choosers DataFrame and values will match the index of the alternatives DataFrame - choices are simulated in the standard Monte Carlo fashion """ trace_label = tracing.extend_trace_label(trace_label, 'interaction_simulate') have_trace_targets = trace_label and tracing.has_trace_targets(choosers) if have_trace_targets: tracing.trace_df(choosers, tracing.extend_trace_label(trace_label, 'choosers')) tracing.trace_df(alternatives, tracing.extend_trace_label(trace_label, 'alternatives'), slicer='NONE', transpose=False) if len(spec.columns) > 1: raise RuntimeError('spec must have only one column') sample_size = sample_size or len(alternatives) if sample_size > len(alternatives): logger.warn("clipping sample size %s to len(alternatives) %s" % (sample_size, len(alternatives))) sample_size = min(sample_size, len(alternatives)) # if using skims, copy index into the dataframe, so it will be # available as the "destination" for the skims dereference below if skims: alternatives[alternatives.index.name] = alternatives.index # cross join choosers and alternatives (cartesian product) # for every chooser, there will be a row for each alternative # index values (non-unique) are from alternatives df interaction_df = interaction_dataset(choosers, alternatives, sample_size) if skims: add_skims(interaction_df, skims) # evaluate expressions from the spec multiply by coefficients and sum # spec is df with one row per spec expression and one col with utility coefficient # column names of model_design match spec index values # utilities has utility value for element in the cross product of choosers and alternatives # interaction_utilities is a df with one utility column and one row per row in model_design if have_trace_targets: trace_rows, trace_ids = tracing.interaction_trace_rows( interaction_df, choosers) tracing.trace_df(interaction_df[trace_rows], tracing.extend_trace_label(trace_label, 'interaction_df'), slicer='NONE', transpose=False) else: trace_rows = trace_ids = None interaction_utilities, trace_eval_results \ = eval_interaction_utilities(spec, interaction_df, locals_d, trace_label, trace_rows) if have_trace_targets: tracing.trace_interaction_eval_results( trace_eval_results, trace_ids, tracing.extend_trace_label(trace_label, 'eval')) tracing.trace_df(interaction_utilities[trace_rows], tracing.extend_trace_label(trace_label, 'interaction_utilities'), slicer='NONE', transpose=False) # reshape utilities (one utility column and one row per row in model_design) # to a dataframe with one row per chooser and one column per alternative utilities = pd.DataFrame(interaction_utilities.as_matrix().reshape( len(choosers), sample_size), index=choosers.index) if have_trace_targets: tracing.trace_df(utilities, tracing.extend_trace_label(trace_label, 'utilities'), column_labels=['alternative', 'utility']) # tracing.trace_df(utilities, '%s.DUMP.utilities' % trace_label, transpose=False, slicer='NONE') # convert to probabilities (utilities exponentiated and normalized to probs) # probs is same shape as utilities, one row per chooser and one column for alternative probs = utils_to_probs(utilities, trace_label=trace_label, trace_choosers=choosers) if have_trace_targets: tracing.trace_df(probs, tracing.extend_trace_label(trace_label, 'probs'), column_labels=['alternative', 'probability']) # make choices # positions is series with the chosen alternative represented as a column index in probs # which is an integer between zero and num alternatives in the alternative sample positions = make_choices(probs, trace_label=trace_label, trace_choosers=choosers) # need to get from an integer offset into the alternative sample to the alternative index # that is, we want the index value of the row that is offset by <position> rows into the # tranche of this choosers alternatives created by cross join of alternatives and choosers # offsets is the offset into model_design df of first row of chooser alternatives offsets = np.arange(len(positions)) * sample_size # resulting pandas Int64Index has one element per chooser row and is in same order as choosers choices = interaction_utilities.index.take(positions + offsets) # create a series with index from choosers and the index of the chosen alternative choices = pd.Series(choices, index=choosers.index) if have_trace_targets: tracing.trace_df(choices, tracing.extend_trace_label(trace_label, 'choices'), columns=[None, trace_choice_name]) # # if have_trace_targets: # tracing.trace_df(choosers, '%s.choosers' % trace_label) # tracing.trace_df(utilities, '%s.utilities' % trace_label, # column_labels=['alternative', 'utility']) # tracing.trace_df(probs, '%s.probs' % trace_label, # column_labels=['alternative', 'probability']) # tracing.trace_df(choices, '%s.choices' % trace_label, # columns=[None, trace_choice_name]) # tracing.trace_interaction_eval_results(trace_eval_results, trace_ids, # '%s.eval' % trace_label) return choices
def eval_nl(choosers, spec, nest_spec, locals_d=None, trace_label=None, trace_choice_name=None): """ Run a nested-logit simulation for when the model spec does not involve alternative specific data, e.g. there are no interactions with alternative properties and no need to sample from alternatives. Parameters ---------- choosers : pandas.DataFrame spec : pandas.DataFrame A table of variable specifications and coefficient values. Variable expressions should be in the table index and the table should have a column for each alternative. nest_spec: dictionary specifying nesting structure and nesting coefficients (from the model spec yaml file) locals_d : Dict This is a dictionary of local variables that will be the environment for an evaluation of an expression that begins with @ trace_label: str This is the label to be used for trace log file entries and dump file names when household tracing enabled. No tracing occurs if label is empty or None. trace_choice_name: str This is the column label to be used in trace file csv dump of choices Returns ------- choices : pandas.Series Index will be that of `choosers`, values will match the columns of `spec`. """ trace_label = tracing.extend_trace_label(trace_label, 'nl') check_for_variability = tracing.check_for_variability() # column names of model_design match spec index values model_design = eval_variables(spec.index, choosers, locals_d) if check_for_variability: _check_for_variability(model_design, trace_label) # raw utilities of all the leaves # matrix product of spec expression evals with utility coefficients of alternatives # sums the partial utilities (represented by each spec row) of the alternatives # resulting in a dataframe with one row per chooser and one column per alternative # pandas dot matrix-multiply depends on column names of model_design matching spec index values raw_utilities = model_design.dot(spec) # exponentiated utilities of leaves and nests nested_exp_utilities = compute_nested_exp_utilities( raw_utilities, nest_spec) # probabilities of alternatives relative to siblings sharing the same nest nested_probabilities = compute_nested_probabilities( nested_exp_utilities, nest_spec, trace_label=trace_label) # global (flattened) leaf probabilities based on relative nest coefficients base_probabilities = compute_base_probabilities(nested_probabilities, nest_spec) # note base_probabilities could all be zero since we allowed all probs for nests to be zero # check here to print a clear message but make_choices will raise error if probs don't sum to 1 BAD_PROB_THRESHOLD = 0.001 no_choices = \ base_probabilities.sum(axis=1).sub(np.ones(len(base_probabilities.index))).abs() \ > BAD_PROB_THRESHOLD * np.ones(len(base_probabilities.index)) if no_choices.any(): report_bad_choices(no_choices, base_probabilities, tracing.extend_trace_label(trace_label, 'eval_nl'), tag='bad_probs', msg="base_probabilities all zero") choices = make_choices(base_probabilities, trace_label, trace_choosers=choosers) if trace_label: tracing.trace_df(choosers, '%s.choosers' % trace_label) tracing.trace_df(raw_utilities, '%s.raw_utilities' % trace_label, column_labels=['alternative', 'utility']) tracing.trace_df(nested_exp_utilities, '%s.nested_exp_utilities' % trace_label, column_labels=['alternative', 'utility']) tracing.trace_df(nested_probabilities, '%s.nested_probabilities' % trace_label, column_labels=['alternative', 'probability']) tracing.trace_df(base_probabilities, '%s.base_probabilities' % trace_label, column_labels=['alternative', 'probability']) tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, trace_choice_name]) tracing.trace_df(model_design, '%s.model_design' % trace_label, column_labels=['expression', None]) # dump whole df - for software development debugging # tracing.trace_df(raw_utilities, "%s.raw_utilities" % trace_label, # slicer='NONE', transpose=False) # tracing.trace_df(nested_exp_utilities, "%s.nested_exp_utilities" % trace_label, # slicer='NONE', transpose=False) # tracing.trace_df(nested_probabilities, "%s.nested_probabilities" % trace_label, # slicer='NONE', transpose=False) # tracing.trace_df(base_probabilities, "%s.base_probabilities" % trace_label, # slicer='NONE', transpose=False) # tracing.trace_df(unnested_probabilities, "%s.unnested_probabilities" % trace_label, # slicer='NONE', transpose=False) return choices
def eval_mnl(choosers, spec, locals_d=None, trace_label=None, trace_choice_name=None): """ Run a simulation for when the model spec does not involve alternative specific data, e.g. there are no interactions with alternative properties and no need to sample from alternatives. Each row in spec computes a partial utility for each alternative, by providing a spec expression (often a boolean 0-1 trigger) and a column of utility coefficients for each alternative. We compute the utility of each alternative by matrix-multiplication of eval results with the utility coefficients in the spec alternative columns yielding one row per chooser and one column per alternative Parameters ---------- choosers : pandas.DataFrame spec : pandas.DataFrame A table of variable specifications and coefficient values. Variable expressions should be in the table index and the table should have a column for each alternative. locals_d : Dict This is a dictionary of local variables that will be the environment for an evaluation of an expression that begins with @ trace_label: str This is the label to be used for trace log file entries and dump file names when household tracing enabled. No tracing occurs if label is empty or None. trace_choice_name: str This is the column label to be used in trace file csv dump of choices Returns ------- choices : pandas.Series Index will be that of `choosers`, values will match the columns of `spec`. """ trace_label = tracing.extend_trace_label(trace_label, 'mnl') check_for_variability = tracing.check_for_variability() model_design = eval_variables(spec.index, choosers, locals_d) if check_for_variability: _check_for_variability(model_design, trace_label) # matrix product of spec expression evals with utility coefficients of alternatives # sums the partial utilities (represented by each spec row) of the alternatives # resulting in a dataframe with one row per chooser and one column per alternative # pandas dot matrix-multiply depends on column names of model_design matching spec index values utilities = model_design.dot(spec) probs = utils_to_probs(utilities, trace_label=trace_label, trace_choosers=choosers) choices = make_choices(probs, trace_label=trace_label, trace_choosers=choosers) if trace_label: tracing.trace_df(choosers, '%s.choosers' % trace_label) tracing.trace_df(utilities, '%s.utilities' % trace_label, column_labels=['alternative', 'utility']) tracing.trace_df(probs, '%s.probs' % trace_label, column_labels=['alternative', 'probability']) tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, trace_choice_name]) tracing.trace_df(model_design, '%s.model_design' % trace_label, column_labels=['expression', None]) return choices
def eval_nl(choosers, spec, nest_spec, locals_d, trace_label=None, trace_choice_name=None): """ Run a nested-logit simulation for when the model spec does not involve alternative specific data, e.g. there are no interactions with alternative properties and no need to sample from alternatives. Parameters ---------- choosers : pandas.DataFrame spec : pandas.DataFrame A table of variable specifications and coefficient values. Variable expressions should be in the table index and the table should have a column for each alternative. nest_spec: dictionary specifying nesting structure and nesting coefficients (from the model spec yaml file) locals_d : Dict or None This is a dictionary of local variables that will be the environment for an evaluation of an expression that begins with @ trace_label: str This is the label to be used for trace log file entries and dump file names when household tracing enabled. No tracing occurs if label is empty or None. trace_choice_name: str This is the column label to be used in trace file csv dump of choices Returns ------- choices : pandas.Series Index will be that of `choosers`, values will match the columns of `spec`. """ trace_label = tracing.extend_trace_label(trace_label, 'nl') check_for_variability = tracing.check_for_variability() t0 = tracing.print_elapsed_time() # column names of expression_values match spec index values expression_values = eval_variables(spec.index, choosers, locals_d) t0 = tracing.print_elapsed_time("eval_variables", t0, debug=True) if check_for_variability: _check_for_variability(expression_values, trace_label) t0 = tracing.print_elapsed_time("_check_for_variability", t0, debug=True) # raw utilities of all the leaves raw_utilities = compute_utilities(expression_values, spec) t0 = tracing.print_elapsed_time("expression_values.dot", t0, debug=True) # exponentiated utilities of leaves and nests nested_exp_utilities = compute_nested_exp_utilities( raw_utilities, nest_spec) t0 = tracing.print_elapsed_time("compute_nested_exp_utilities", t0, debug=True) # probabilities of alternatives relative to siblings sharing the same nest nested_probabilities = compute_nested_probabilities( nested_exp_utilities, nest_spec, trace_label=trace_label) t0 = tracing.print_elapsed_time("compute_nested_probabilities", t0, debug=True) # global (flattened) leaf probabilities based on relative nest coefficients base_probabilities = compute_base_probabilities(nested_probabilities, nest_spec) t0 = tracing.print_elapsed_time("compute_base_probabilities", t0, debug=True) # note base_probabilities could all be zero since we allowed all probs # for nests to be zero check here to print a clear message but # make_choices will raise error if probs don't sum to 1 BAD_PROB_THRESHOLD = 0.001 no_choices = \ base_probabilities.sum(axis=1).sub( np.ones(len(base_probabilities.index))).abs() \ > BAD_PROB_THRESHOLD * np.ones(len(base_probabilities.index)) if no_choices.any(): logit.report_bad_choices(no_choices, base_probabilities, tracing.extend_trace_label( trace_label, 'eval_nl'), tag='bad_probs', msg="base_probabilities all zero") t0 = tracing.print_elapsed_time("report_bad_choices", t0, debug=True) choices, rands = logit.make_choices(base_probabilities, trace_label, trace_choosers=choosers) t0 = tracing.print_elapsed_time("logit.make_choices", t0, debug=True) if trace_label: tracing.trace_df(choosers, '%s.choosers' % trace_label) tracing.trace_df(raw_utilities, '%s.raw_utilities' % trace_label, column_labels=['alternative', 'utility']) tracing.trace_df(nested_exp_utilities, '%s.nested_exp_utilities' % trace_label, column_labels=['alternative', 'utility']) tracing.trace_df(nested_probabilities, '%s.nested_probabilities' % trace_label, column_labels=['alternative', 'probability']) tracing.trace_df(base_probabilities, '%s.base_probabilities' % trace_label, column_labels=['alternative', 'probability']) tracing.trace_df(choices, '%s.choices' % trace_label, columns=[None, trace_choice_name]) tracing.trace_df(rands, '%s.rands' % trace_label, columns=[None, 'rand']) tracing.trace_df(expression_values, '%s.expression_values' % trace_label, column_labels=['expression', None]) return choices
def _interaction_sample(choosers, alternatives, spec, sample_size, alt_col_name, skims=None, locals_d=None, trace_label=None): """ Run a MNL simulation in the situation in which alternatives must be merged with choosers because there are interaction terms or because alternatives are being sampled. Parameters are same as for public function interaction_simulate spec : dataframe one row per spec expression and one col with utility coefficient interaction_df : dataframe cross join (cartesian product) of choosers with alternatives combines columns of choosers and alternatives len(df) == len(choosers) * len(alternatives) index values (non-unique) are index values from alternatives df interaction_utilities : dataframe the utility of each alternative is sum of the partial utilities determined by the various spec expressions and their corresponding coefficients yielding a dataframe with len(interaction_df) rows and one utility column having the same index as interaction_df (non-unique values from alternatives df) utilities : dataframe dot product of model_design.dot(spec) yields utility value for element in the cross product of choosers and alternatives this is then reshaped as a dataframe with one row per chooser and one column per alternative probs : dataframe utilities exponentiated and converted to probabilities same shape as utilities, one row per chooser and one column per alternative positions : series choices among alternatives with the chosen alternative represented as the integer index of the selected alternative column in probs choices : series series with the alternative chosen for each chooser the index is same as choosers and the series value is the alternative df index of chosen alternative Returns ------- choices_df : pandas.DataFrame A DataFrame where index should match the index of the choosers DataFrame and columns alt_col_name, prob, rand, pick_count prob: float the probability of the chosen alternative rand: float the rand that did the choosing pick_count : int number of duplicate picks for chooser, alt """ trace_label = tracing.extend_trace_label(trace_label, 'interaction_simulate') have_trace_targets = trace_label and tracing.has_trace_targets(choosers) if alt_col_name is None: alt_col_name = 'alt_%s' % alternatives.index.name if have_trace_targets: tracing.trace_df(choosers, tracing.extend_trace_label(trace_label, 'choosers')) tracing.trace_df(alternatives, tracing.extend_trace_label(trace_label, 'alternatives'), slicer='NONE', transpose=False) if len(spec.columns) > 1: raise RuntimeError('spec must have only one column') alternative_count = len(alternatives) # print("_interaction_sample alternative_count %s" % alternative_count) # if using skims, copy index into the dataframe, so it will be # available as the "destination" for the skims dereference below if skims: alternatives[alternatives.index.name] = alternatives.index # cross join choosers and alternatives (cartesian product) # for every chooser, there will be a row for each alternative # index values (non-unique) are from alternatives df interaction_df = logit.interaction_dataset(choosers, alternatives, alternative_count) assert alternative_count == len(interaction_df.index) / len(choosers.index) if skims: asim_utils.add_skims(interaction_df, skims) # evaluate expressions from the spec multiply by coefficients and sum # spec is df with one row per spec expression and one col # with utility coefficient column names of interaction_df match spec # index values utilities has utility value for element in the # cross product of choosers and alternatives interaction_utilities is # a df with one utility column and one row per row in interaction_df if have_trace_targets: trace_rows, trace_ids \ = tracing.interaction_trace_rows( interaction_df, choosers, alternative_count) tracing.trace_df(interaction_df[trace_rows], tracing.extend_trace_label(trace_label, 'interaction_df'), slicer='NONE', transpose=False) else: trace_rows = trace_ids = None interaction_utilities, trace_eval_results \ = eval_interaction_utilities( spec, interaction_df, locals_d, trace_label, trace_rows) if have_trace_targets: tracing.trace_interaction_eval_results( trace_eval_results, trace_ids, tracing.extend_trace_label(trace_label, 'eval')) tracing.trace_df(interaction_utilities[trace_rows], tracing.extend_trace_label(trace_label, 'interaction_utilities'), slicer='NONE', transpose=False) tracing.dump_df(DUMP, interaction_utilities, trace_label, 'interaction_utilities') # FIXME - do this in numpy, not pandas? # reshape utilities (one utility column and one row per # row in interaction_utilities) to a dataframe with one # row per chooser and one column per alternative utilities = pd.DataFrame(interaction_utilities.as_matrix().reshape( len(choosers), alternative_count), index=choosers.index) if have_trace_targets: tracing.trace_df(utilities, tracing.extend_trace_label(trace_label, 'utilities'), column_labels=['alternative', 'utility']) tracing.dump_df(DUMP, utilities, trace_label, 'utilities') # FIXME - do this in numpy, not pandas? # convert to probabilities (utilities exponentiated # and normalized to probs) probs is same shape as utilities, # one row per chooser and one column for alternative probs = logit.utils_to_probs(utilities, trace_label=trace_label, trace_choosers=choosers) if have_trace_targets: tracing.trace_df(probs, tracing.extend_trace_label(trace_label, 'probs'), column_labels=['alternative', 'probability']) choices_df = make_sample_choices(choosers, probs, interaction_utilities, sample_size, alternative_count, alt_col_name, trace_label) # make_sample_choices should return choosers index as choices_df column assert choosers.index.name in choices_df.columns # pick_count and pick_dup # pick_count is number of duplicate picks # pick_dup flag is True for all but first of duplicates pick_group = choices_df.groupby([choosers.index.name, alt_col_name]) # number each item in each group from 0 to the length of that group - 1. choices_df['pick_count'] = pick_group.cumcount(ascending=True) # flag duplicate rows after first choices_df['pick_dup'] = choices_df['pick_count'] > 0 # add reverse cumcount to get total pick_count # (conveniently faster than groupby.count + merge) choices_df['pick_count'] += pick_group.cumcount(ascending=False) + 1 # drop the duplicates choices_df = choices_df[~choices_df['pick_dup']] del choices_df['pick_dup'] # set index after groupby so we can trace on it choices_df.set_index(choosers.index.name, inplace=True) tracing.dump_df(DUMP, choices_df, trace_label, 'choices_df') if have_trace_targets: tracing.trace_df(choices_df, tracing.extend_trace_label(trace_label, 'sampled_alternatives'), transpose=False, column_labels=['sample_alt', 'alternative']) return choices_df
def workplace_location_simulate(asim_persons_merged, workplace_location_sample, workplace_location_spec, workplace_location_settings, skim_dict, destination_size_terms, chunk_size, trace_hh_id): """ Workplace location model on workplace_location_sample annotated with mode_choice logsum to select a work_taz from sample alternatives """ # for now I'm going to generate a workplace location for everyone - # presumably it will not get used in downstream models for everyone - # it should depend on CDAP and mandatory tour generation as to whether # it gets used choosers = asim_persons_merged.to_frame() alt_col_name = workplace_location_settings["ALT_COL_NAME"] # alternatives are pre-sampled and annotated with logsums and pick_count # but we have to merge additional alt columns into alt sample list workplace_location_sample = workplace_location_sample.to_frame() destination_size_terms = destination_size_terms.to_frame() alternatives = \ pd.merge(workplace_location_sample, destination_size_terms, left_on=alt_col_name, right_index=True, how="left") tracing.dump_df( DUMP, alternatives, 'workplace_location_simulate', 'alternatives') constants = asim_utils.get_model_constants(workplace_location_settings) sample_pool_size = len(destination_size_terms.index) print("Running workplace_location_simulate with %d persons" % len( choosers)) # create wrapper with keys for this lookup - in this case there is a # TAZ in the choosers and a TAZ in the alternatives which get merged # during interaction the skims will be available under the name # "skims" for any @ expressions skims = skim_dict.wrap("TAZ", alt_col_name) locals_d = { 'skims': skims, 'sample_pool_size': float(sample_pool_size) } if constants is not None: locals_d.update(constants) # FIXME - MEMORY HACK - only include columns actually used in spec chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS'] choosers = choosers[chooser_columns] tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers') choices = interaction_sample_simulate( choosers, alternatives, spec=workplace_location_spec, choice_column=alt_col_name, skims=skims, locals_d=locals_d, chunk_size=chunk_size, trace_label=trace_hh_id and 'workplace_location', trace_choice_name='workplace_location') # FIXME - no need to reindex since we didn't slice choosers # choices = choices.reindex(persons_merged.index) tracing.print_summary('workplace_taz', choices, describe=True) orca.add_column("asim_persons", "workplace_taz", choices) asim_utils.add_dependent_columns("asim_persons", "persons_workplace") if trace_hh_id: trace_columns = ['workplace_taz'] + orca.get_table( 'persons_workplace').columns tracing.trace_df(orca.get_table('asim_persons_merged').to_frame(), label="workplace_location", columns=trace_columns, warn_if_empty=True)