def test_build_cdap_spec_hhsize2(people, cdap_indiv_and_hhsize1, cdap_interaction_coefficients): hhsize = 2 cdap.assign_cdap_rank(people) indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None) choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize) spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize=hhsize, cache=False) vars = simulate.eval_variables(spec.index, choosers) utils = simulate.compute_utilities(vars, spec) expected = pd.DataFrame( [ [0, 3, 0, 3, 7, 3, 0, 3, 0], # household 3 [0, 0, 1, 1, 1, 2, 0, 0, 2], # household 4 ], index=[3, 4], columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float') pdt.assert_frame_equal(utils, expected, check_names=False)
def individual_utilities(persons, cdap_indiv_spec, locals_d, trace_hh_id=None, trace_label=None): """ Calculate CDAP utilities for all individuals. Parameters ---------- persons : pandas.DataFrame DataFrame of individual persons data. cdap_indiv_spec : pandas.DataFrame CDAP spec applied to individuals. Returns ------- utilities : pandas.DataFrame Will have index of `persons` and columns for each of the alternatives. plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] """ # calculate single person utilities individual_vars = eval_variables(cdap_indiv_spec.index, persons, locals_d) indiv_utils = compute_utilities(individual_vars, cdap_indiv_spec) # add columns from persons to facilitate building household interactions useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_] indiv_utils[useful_columns] = persons[useful_columns] # if DUMP: # tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label, # transpose=False, slicer='NONE') if trace_hh_id: tracing.trace_df(individual_vars, '%s.individual_vars' % trace_label, column_labels=['expression', 'person']) tracing.trace_df(indiv_utils, '%s.indiv_utils' % trace_label, column_labels=['activity', 'person']) return indiv_utils
def test_build_cdap_spec_hhsize2(people, model_settings): hhsize = 2 cdap_indiv_and_hhsize1 = simulate.read_model_spec( file_name='cdap_indiv_and_hhsize1.csv') interaction_coefficients = pd.read_csv( config.config_file_path('cdap_interaction_coefficients.csv'), comment='#') interaction_coefficients = cdap.preprocess_interaction_coefficients( interaction_coefficients) person_type_map = model_settings.get('PERSON_TYPE_MAP', {}) cdap.assign_cdap_rank(people, person_type_map) indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None) choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize) spec = cdap.build_cdap_spec(interaction_coefficients, hhsize=hhsize, cache=False) vars = simulate.eval_variables(spec.index, choosers) utils = simulate.compute_utilities(vars, spec) expected = pd.DataFrame( [ [0, 3, 0, 3, 7, 3, 0, 3, 0], # household 3 [0, 0, 1, 1, 1, 2, 0, 0, 2], # household 4 ], index=[3, 4], columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float') pdt.assert_frame_equal(utils, expected, check_names=False)
def test_build_cdap_spec_hhsize2(people, cdap_indiv_and_hhsize1, cdap_interaction_coefficients): hhsize = 2 cdap.assign_cdap_rank(people) indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None) choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize) spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize=hhsize, cache=False) vars = simulate.eval_variables(spec.index, choosers) utils = simulate.compute_utilities(vars, spec) expected = pd.DataFrame([ [0, 3, 0, 3, 7, 3, 0, 3, 0], # household 3 [0, 0, 1, 1, 1, 2, 0, 0, 2], # household 4 ], index=[3, 4], columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float') pdt.assert_frame_equal(utils, expected, check_names=False)
def household_activity_choices(indiv_utils, interaction_coefficients, hhsize, trace_hh_id=None, trace_label=None): """ Calculate household utilities for each activity pattern alternative for households of hhsize The resulting activity pattern for each household will be coded as a string of activity codes. e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home Parameters ---------- indiv_utils : pandas.DataFrame CDAP utilities for each individual, ignoring interactions ind_utils has index of _persons_index_ and a column for each alternative i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home) interaction_coefficients : pandas.DataFrame Rules and coefficients for generating interaction specs for different household sizes hhsize : int the size of household for which activity perttern should be calculated (1..MAX_HHSIZE) Returns ------- choices : pandas.Series the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH') with same index (_hh_index_) as utils """ if hhsize == 1: # for 1 person households, there are no interactions to account for # and the household utils are the same as the individual utils choosers = vars = None # extract the individual utilities for individuals from hhsize 1 households utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1, [_hh_id_, 'M', 'N', 'H']] # index on household_id, not person_id set_hh_index(utils) else: choosers = hh_choosers(indiv_utils, hhsize=hhsize) spec = build_cdap_spec(interaction_coefficients, hhsize, trace_spec=(trace_hh_id in choosers.index), trace_label=trace_label) vars = eval_variables(spec.index, choosers) utils = compute_utilities(vars, spec) if len(utils.index) == 0: return pd.Series() probs = logit.utils_to_probs(utils, trace_label=trace_label) # select an activity pattern alternative for each household based on probability # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs idx_choices, rands = logit.make_choices(probs, trace_label=trace_label) # convert choice expressed as index into alternative name from util column label choices = pd.Series(utils.columns[idx_choices].values, index=utils.index) # if DUMP: # # if hhsize > 1: # tracing.trace_df(choosers, '%s.DUMP.hhsize%d_choosers' % (trace_label, hhsize), # transpose=False, slicer='NONE') # tracing.trace_df(vars, '%s.DUMP.hhsize%d_vars' % (trace_label, hhsize), # transpose=False, slicer='NONE') # # tracing.trace_df(utils, '%s.DUMP.hhsize%d_utils' % (trace_label, hhsize), # transpose=False, slicer='NONE') # # tracing.trace_df(probs, '%s.DUMP.hhsize%d_probs' % (trace_label, hhsize), # transpose=False, slicer='NONE') # # tracing.trace_df(choices, '%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize), # transpose=False, slicer='NONE') if trace_hh_id: if hhsize > 1: tracing.trace_df(choosers, '%s.hhsize%d_choosers' % (trace_label, hhsize), column_labels=['expression', 'person']) tracing.trace_df(vars, '%s.hhsize%d_vars' % (trace_label, hhsize), column_labels=['expression', 'person']) tracing.trace_df(utils, '%s.hhsize%d_utils' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(probs, '%s.hhsize%d_probs' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(choices, '%s.hhsize%d_activity_choices' % (trace_label, hhsize), column_labels=['expression', 'household']) tracing.trace_df(rands, '%s.hhsize%d_rands' % (trace_label, hhsize), columns=[None, 'rand']) return choices