def test_build_cdap_spec_hhsize2(people, cdap_indiv_and_hhsize1, cdap_interaction_coefficients): hhsize = 2 cdap.assign_cdap_rank(people) indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None) choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize) spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize=hhsize, cache=False) vars = simulate.eval_variables(spec.index, choosers) utils = simulate.compute_utilities(vars, spec) expected = pd.DataFrame( [ [0, 3, 0, 3, 7, 3, 0, 3, 0], # household 3 [0, 0, 1, 1, 1, 2, 0, 0, 2], # household 4 ], index=[3, 4], columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float') pdt.assert_frame_equal(utils, expected, check_names=False)
def individual_utilities(persons, cdap_indiv_spec, locals_d, trace_hh_id=None, trace_label=None): """ Calculate CDAP utilities for all individuals. Parameters ---------- persons : pandas.DataFrame DataFrame of individual persons data. cdap_indiv_spec : pandas.DataFrame CDAP spec applied to individuals. Returns ------- utilities : pandas.DataFrame Will have index of `persons` and columns for each of the alternatives. plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] """ # calculate single person utilities individual_vars = eval_variables(cdap_indiv_spec.index, persons, locals_d) indiv_utils = individual_vars.dot(cdap_indiv_spec) # add columns from persons to facilitate building household interactions useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] indiv_utils[useful_columns] = persons[useful_columns] # if DUMP: # tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label, # transpose=False, slicer='NONE') if trace_hh_id: tracing.trace_df(individual_vars, '%s.individual_vars' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(indiv_utils, '%s.indiv_utils' % trace_label, column_labels=['activity', 'person']) return indiv_utils
def test_build_cdap_spec_hhsize2(people, model_settings): hhsize = 2 cdap_indiv_and_hhsize1 = simulate.read_model_spec( file_name='cdap_indiv_and_hhsize1.csv') interaction_coefficients = pd.read_csv( config.config_file_path('cdap_interaction_coefficients.csv'), comment='#') interaction_coefficients = cdap.preprocess_interaction_coefficients( interaction_coefficients) person_type_map = model_settings.get('PERSON_TYPE_MAP', {}) cdap.assign_cdap_rank(people, person_type_map) indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None) choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize) spec = cdap.build_cdap_spec(interaction_coefficients, hhsize=hhsize, cache=False) vars = simulate.eval_variables(spec.index, choosers) utils = simulate.compute_utilities(vars, spec) expected = pd.DataFrame( [ [0, 3, 0, 3, 7, 3, 0, 3, 0], # household 3 [0, 0, 1, 1, 1, 2, 0, 0, 2], # household 4 ], index=[3, 4], columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float') pdt.assert_frame_equal(utils, expected, check_names=False)
def test_build_cdap_spec_hhsize2(people, cdap_indiv_and_hhsize1, cdap_interaction_coefficients): hhsize = 2 cdap.assign_cdap_rank(people) indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None) choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize) spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize=hhsize, cache=False) vars = simulate.eval_variables(spec.index, choosers) utils = simulate.compute_utilities(vars, spec) expected = pd.DataFrame([ [0, 3, 0, 3, 7, 3, 0, 3, 0], # household 3 [0, 0, 1, 1, 1, 2, 0, 0, 2], # household 4 ], index=[3, 4], columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float') pdt.assert_frame_equal(utils, expected, check_names=False)
def test_build_cdap_spec_hhsize2(people, model_settings): hhsize = 2 cdap_indiv_and_hhsize1 = simulate.read_model_spec(file_name='cdap_indiv_and_hhsize1.csv') interaction_coefficients = pd.read_csv(config.config_file_path('cdap_interaction_coefficients.csv'), comment='#') interaction_coefficients = cdap.preprocess_interaction_coefficients(interaction_coefficients) person_type_map = model_settings.get('PERSON_TYPE_MAP', {}) with chunk.chunk_log('test_build_cdap_spec_hhsize2', base=True): cdap.assign_cdap_rank(people, person_type_map) indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None) choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize) spec = cdap.build_cdap_spec(interaction_coefficients, hhsize=hhsize, cache=False) # pandas.dot depends on column names of expression_values matching spec index values # expressions should have been uniquified when spec was read assert spec.index.is_unique vars = simulate.eval_variables(spec.index, choosers) assert (spec.index.values == vars.columns.values).all() # spec = spec.astype(np.float64) utils = vars.dot(spec) expected = pd.DataFrame([ [0, 3, 0, 3, 7, 3, 0, 3, 0], # household 3 [0, 0, 1, 1, 1, 2, 0, 0, 2], # household 4 ], index=[3, 4], columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float') pdt.assert_frame_equal(utils, expected, check_names=False)
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d, trace_hh_id, trace_label): """ Generate the activity choices for the 'extra' household members who weren't handled by cdap Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate, simple cross-sectional distribution is looked up for the remaining household members" The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec, EXCEPT that the values computed are relative proportions, not utilities (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0) Parameters ---------- persons : pandas.DataFrame Table of persons data indexed on _persons_index_ We expect, at least, columns [_hh_id_, _ptype_] cdap_fixed_relative_proportions spec to compute/specify the relative proportions of each activity (M, N, H) that should be used to choose activities for additional household members not handled by CDAP. locals_d : Dict dictionary of local variables that eval_variables adds to the environment for an evaluation of an expression that begins with @ Returns ------- choices : pandas.Series list of alternatives chosen for all extra members, indexed by _persons_index_ """ trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices') # extra household members have cdap_ran > MAX_HHSIZE choosers = persons[persons['cdap_rank'] > MAX_HHSIZE] if len(choosers.index) == 0: return pd.Series(dtype='float64') # eval the expression file values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d) # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities proportions = values.dot(cdap_fixed_relative_proportions) # convert relative proportions to probability probs = proportions.div(proportions.sum(axis=1), axis=0) # select an activity pattern alternative for each person based on probability # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented # as the integer (0 based) index of the chosen column from probs idx_choices, rands = logit.make_choices(probs, trace_label=trace_label) # convert choice from column index to activity name choices = pd.Series(probs.columns[idx_choices].values, index=probs.index) # if DUMP: # tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label, # transpose=False, slicer='NONE') # tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label, # transpose=False, slicer='NONE') # tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label, # transpose=False, # slicer='NONE') if trace_hh_id: tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label, columns=[None, 'rand']) return choices
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d, trace_hh_id, trace_label): """ Generate the activity choices for the 'extra' household members who weren't handled by cdap Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate, simple cross-sectional distribution is looked up for the remaining household members" The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec, EXCEPT that the values computed are relative proportions, not utilities (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0) Parameters ---------- persons : pandas.DataFrame Table of persons data indexed on _persons_index_ We expect, at least, columns [_hh_id_, _ptype_] cdap_fixed_relative_proportions spec to compute/specify the relative proportions of each activity (M, N, H) that should be used to choose activities for additional household members not handled by CDAP. locals_d : Dict dictionary of local variables that eval_variables adds to the environment for an evaluation of an expression that begins with @ Returns ------- choices : pandas.Series list of alternatives chosen for all extra members, indexed by _persons_index_ """ trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices') # extra household members have cdap_ran > MAX_HHSIZE choosers = persons[persons['cdap_rank'] > MAX_HHSIZE] if len(choosers.index) == 0: return pd.Series() # eval the expression file values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d) # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities proportions = values.dot(cdap_fixed_relative_proportions) # convert relative proportions to probability probs = proportions.div(proportions.sum(axis=1), axis=0) # select an activity pattern alternative for each person based on probability # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented # as the integer (0 based) index of the chosen column from probs idx_choices, rands = logit.make_choices(probs, trace_label=trace_label) # convert choice from column index to activity name choices = pd.Series(probs.columns[idx_choices].values, index=probs.index) # if DUMP: # tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label, # transpose=False, slicer='NONE') # tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label, # transpose=False, slicer='NONE') # tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label, # transpose=False, # slicer='NONE') if trace_hh_id: tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label, columns=[None, 'rand']) return choices
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize, trace_hh_id=None, trace_label=None): """ Calculate household utilities for each activity pattern alternative for households of hhsize The resulting activity pattern for each household will be coded as a string of activity codes. e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home Parameters ---------- indiv_utils : pandas.DataFrame CDAP utilities for each individual, ignoring interactions ind_utils has index of _persons_index_ and a column for each alternative i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home) interaction_coefficients : pandas.DataFrame Rules and coefficients for generating interaction specs for different household sizes hhsize : int the size of household for which activity perttern should be calculated (1..MAX_HHSIZE) Returns ------- choices : pandas.Series the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH') with same index (_hh_index_) as utils """ if hhsize == 1: # for 1 person households, there are no interactions to account for # and the household utils are the same as the individual utils choosers = vars = None # extract the individual utilities for individuals from hhsize 1 households utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']] # index on household_id, not person_id set_hh_index(utils) else: choosers = hh_choosers(indiv_utils, hhsize=hhsize) spec = build_cdap_spec(interaction_coefficients, hhsize, trace_spec=(trace_hh_id in choosers.index), trace_label=trace_label) vars = eval_variables(spec.index, choosers) utils = vars.dot(spec).astype('float') if len(utils.index) == 0: return pd.Series() probs = logit.utils_to_probs(utils, trace_label=trace_label) # select an activity pattern alternative for each household based on probability # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs idx_choices, rands = logit.make_choices(probs, trace_label=trace_label) # convert choice expressed as index into alternative name from util column label choices = pd.Series(utils.columns[idx_choices].values, index=utils.index) # if DUMP: # # if hhsize > 1: # tracing.trace_df(choosers, '%s.DUMP.hhsize%d_choosers' % (trace_label, hhsize), # transpose=False, slicer='NONE') # tracing.trace_df(vars, '%s.DUMP.hhsize%d_vars' % (trace_label, hhsize), # transpose=False, slicer='NONE') # # tracing.trace_df(utils, '%s.DUMP.hhsize%d_utils' % (trace_label, hhsize), # transpose=False, slicer='NONE') # # tracing.trace_df(probs, '%s.DUMP.hhsize%d_probs' % (trace_label, hhsize), # transpose=False, slicer='NONE') # # tracing.trace_df(choices, '%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize), # transpose=False, slicer='NONE') if trace_hh_id: if hhsize > 1: tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize), column_labels=['expression', 'person']) tracing.trace_df(vars, '%s.hhsize%d_vars' % (trace_label, hhsize), column_labels=['expression', 'person']) tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize), columns=[None, 'rand']) return choices