Esempio n. 1
0
def test_build_cdap_spec_hhsize2(people, cdap_indiv_and_hhsize1,
                                 cdap_interaction_coefficients):

    hhsize = 2

    cdap.assign_cdap_rank(people)
    indiv_utils = cdap.individual_utilities(people,
                                            cdap_indiv_and_hhsize1,
                                            locals_d=None)

    choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize)

    spec = cdap.build_cdap_spec(cdap_interaction_coefficients,
                                hhsize=hhsize,
                                cache=False)

    vars = simulate.eval_variables(spec.index, choosers)

    utils = simulate.compute_utilities(vars, spec)

    expected = pd.DataFrame(
        [
            [0, 3, 0, 3, 7, 3, 0, 3, 0],  # household 3
            [0, 0, 1, 1, 1, 2, 0, 0, 2],  # household 4
        ],
        index=[3, 4],
        columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM',
                 'NN']).astype('float')

    pdt.assert_frame_equal(utils, expected, check_names=False)
Esempio n. 2
0
def individual_utilities(persons,
                         cdap_indiv_spec,
                         locals_d,
                         trace_hh_id=None,
                         trace_label=None):
    """
    Calculate CDAP utilities for all individuals.

    Parameters
    ----------
    persons : pandas.DataFrame
        DataFrame of individual persons data.
    cdap_indiv_spec : pandas.DataFrame
        CDAP spec applied to individuals.

    Returns
    -------
    utilities : pandas.DataFrame
        Will have index of `persons` and columns for each of the alternatives.
        plus some 'useful columns' [_hh_id_, _ptype_, 'cdap_rank', _hh_size_]

    """

    # calculate single person utilities
    individual_vars = eval_variables(cdap_indiv_spec.index, persons, locals_d)
    indiv_utils = individual_vars.dot(cdap_indiv_spec)

    # add columns from persons to facilitate building household interactions
    useful_columns = [_hh_id_, _ptype_, 'cdap_rank', _hh_size_]
    indiv_utils[useful_columns] = persons[useful_columns]

    # if DUMP:
    #     tracing.trace_df(indiv_utils, '%s.DUMP.indiv_utils' % trace_label,
    #                      transpose=False, slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(individual_vars,
                         '%s.individual_vars' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(indiv_utils,
                         '%s.indiv_utils' % trace_label,
                         column_labels=['activity', 'person'])

    return indiv_utils
Esempio n. 3
0
def test_build_cdap_spec_hhsize2(people, model_settings):

    hhsize = 2
    cdap_indiv_and_hhsize1 = simulate.read_model_spec(
        file_name='cdap_indiv_and_hhsize1.csv')

    interaction_coefficients = pd.read_csv(
        config.config_file_path('cdap_interaction_coefficients.csv'),
        comment='#')
    interaction_coefficients = cdap.preprocess_interaction_coefficients(
        interaction_coefficients)

    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})
    cdap.assign_cdap_rank(people, person_type_map)
    indiv_utils = cdap.individual_utilities(people,
                                            cdap_indiv_and_hhsize1,
                                            locals_d=None)

    choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize)

    spec = cdap.build_cdap_spec(interaction_coefficients,
                                hhsize=hhsize,
                                cache=False)

    vars = simulate.eval_variables(spec.index, choosers)

    utils = simulate.compute_utilities(vars, spec)

    expected = pd.DataFrame(
        [
            [0, 3, 0, 3, 7, 3, 0, 3, 0],  # household 3
            [0, 0, 1, 1, 1, 2, 0, 0, 2],  # household 4
        ],
        index=[3, 4],
        columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM',
                 'NN']).astype('float')

    pdt.assert_frame_equal(utils, expected, check_names=False)
Esempio n. 4
0
def test_build_cdap_spec_hhsize2(people, cdap_indiv_and_hhsize1, cdap_interaction_coefficients):

    hhsize = 2

    cdap.assign_cdap_rank(people)
    indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None)

    choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize)

    spec = cdap.build_cdap_spec(cdap_interaction_coefficients, hhsize=hhsize, cache=False)

    vars = simulate.eval_variables(spec.index, choosers)

    utils = simulate.compute_utilities(vars, spec)

    expected = pd.DataFrame([
        [0, 3, 0, 3, 7, 3, 0, 3, 0],  # household 3
        [0, 0, 1, 1, 1, 2, 0, 0, 2],  # household 4
        ],
        index=[3, 4],
        columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float')

    pdt.assert_frame_equal(utils, expected, check_names=False)
Esempio n. 5
0
def test_build_cdap_spec_hhsize2(people, model_settings):

    hhsize = 2
    cdap_indiv_and_hhsize1 = simulate.read_model_spec(file_name='cdap_indiv_and_hhsize1.csv')

    interaction_coefficients = pd.read_csv(config.config_file_path('cdap_interaction_coefficients.csv'), comment='#')
    interaction_coefficients = cdap.preprocess_interaction_coefficients(interaction_coefficients)

    person_type_map = model_settings.get('PERSON_TYPE_MAP', {})

    with chunk.chunk_log('test_build_cdap_spec_hhsize2', base=True):
        cdap.assign_cdap_rank(people, person_type_map)
        indiv_utils = cdap.individual_utilities(people, cdap_indiv_and_hhsize1, locals_d=None)

        choosers = cdap.hh_choosers(indiv_utils, hhsize=hhsize)

        spec = cdap.build_cdap_spec(interaction_coefficients, hhsize=hhsize, cache=False)

        # pandas.dot depends on column names of expression_values matching spec index values
        # expressions should have been uniquified when spec was read
        assert spec.index.is_unique

        vars = simulate.eval_variables(spec.index, choosers)
        assert (spec.index.values == vars.columns.values).all()

    # spec = spec.astype(np.float64)

    utils = vars.dot(spec)

    expected = pd.DataFrame([
        [0, 3, 0, 3, 7, 3, 0, 3, 0],  # household 3
        [0, 0, 1, 1, 1, 2, 0, 0, 2],  # household 4
        ],
        index=[3, 4],
        columns=['HH', 'HM', 'HN', 'MH', 'MM', 'MN', 'NH', 'NM', 'NN']).astype('float')

    pdt.assert_frame_equal(utils, expected, check_names=False)
Esempio n. 6
0
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
                            trace_hh_id, trace_label):
    """
    Generate the activity choices for the 'extra' household members who weren't handled by cdap

    Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate,
    simple cross-sectional distribution is looked up for the remaining household members"

    The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec,
    EXCEPT that the values computed are relative proportions, not utilities
    (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0)

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data indexed on _persons_index_
         We expect, at least, columns [_hh_id_, _ptype_]
    cdap_fixed_relative_proportions
        spec to compute/specify the relative proportions of each activity (M, N, H)
        that should be used to choose activities for additional household members
        not handled by CDAP.
    locals_d : Dict
        dictionary of local variables that eval_variables adds to the environment
        for an evaluation of an expression that begins with @

    Returns
    -------
    choices : pandas.Series
        list of alternatives chosen for all extra members, indexed by _persons_index_
    """

    trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices')

    # extra household members have cdap_ran > MAX_HHSIZE
    choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]

    if len(choosers.index) == 0:
        return pd.Series(dtype='float64')

    # eval the expression file
    values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)

    # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities
    proportions = values.dot(cdap_fixed_relative_proportions)

    # convert relative proportions to probability
    probs = proportions.div(proportions.sum(axis=1), axis=0)

    # select an activity pattern alternative for each person based on probability
    # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented
    # as the integer (0 based) index of the chosen column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice from column index to activity name
    choices = pd.Series(probs.columns[idx_choices].values, index=probs.index)

    # if DUMP:
    #     tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
    #                      transpose=False,
    #                      slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label,
                         columns=[None, 'rand'])

    return choices
Esempio n. 7
0
def extra_hh_member_choices(persons, cdap_fixed_relative_proportions, locals_d,
                            trace_hh_id, trace_label):
    """
    Generate the activity choices for the 'extra' household members who weren't handled by cdap

    Following the CTRAMP HouseholdCoordinatedDailyActivityPatternModel, "a separate,
    simple cross-sectional distribution is looked up for the remaining household members"

    The cdap_fixed_relative_proportions spec is handled like an activitysim logit utility spec,
    EXCEPT that the values computed are relative proportions, not utilities
    (i.e. values are not exponentiated before being normalized to probabilities summing to 1.0)

    Parameters
    ----------
    persons : pandas.DataFrame
        Table of persons data indexed on _persons_index_
         We expect, at least, columns [_hh_id_, _ptype_]
    cdap_fixed_relative_proportions
        spec to compute/specify the relative proportions of each activity (M, N, H)
        that should be used to choose activities for additional household members
        not handled by CDAP.
    locals_d : Dict
        dictionary of local variables that eval_variables adds to the environment
        for an evaluation of an expression that begins with @

    Returns
    -------
    choices : pandas.Series
        list of alternatives chosen for all extra members, indexed by _persons_index_
    """

    trace_label = tracing.extend_trace_label(trace_label, 'extra_hh_member_choices')

    # extra household members have cdap_ran > MAX_HHSIZE
    choosers = persons[persons['cdap_rank'] > MAX_HHSIZE]

    if len(choosers.index) == 0:
        return pd.Series()

    # eval the expression file
    values = simulate.eval_variables(cdap_fixed_relative_proportions.index, choosers, locals_d)

    # cdap_fixed_relative_proportions computes relative proportions by ptype, not utilities
    proportions = values.dot(cdap_fixed_relative_proportions)

    # convert relative proportions to probability
    probs = proportions.div(proportions.sum(axis=1), axis=0)

    # select an activity pattern alternative for each person based on probability
    # idx_choices is a series (indexed on _persons_index_ ) with the chosen alternative represented
    # as the integer (0 based) index of the chosen column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice from column index to activity name
    choices = pd.Series(probs.columns[idx_choices].values, index=probs.index)

    # if DUMP:
    #     tracing.trace_df(proportions, '%s.DUMP.extra_proportions' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(probs, '%s.DUMP.extra_probs' % trace_label,
    #                      transpose=False, slicer='NONE')
    #     tracing.trace_df(choices, '%s.DUMP.extra_choices' % trace_label,
    #                      transpose=False,
    #                      slicer='NONE')

    if trace_hh_id:
        tracing.trace_df(proportions, '%s.extra_hh_member_choices_proportions' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(probs, '%s.extra_hh_member_choices_probs' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(choices, '%s.extra_hh_member_choices_choices' % trace_label,
                         column_labels=['expression', 'person'])
        tracing.trace_df(rands, '%s.extra_hh_member_choices_rands' % trace_label,
                         columns=[None, 'rand'])

    return choices
Esempio n. 8
0
def household_activity_choices(indiv_utils,
                               interaction_coefficients,
                               hhsize,
                               trace_hh_id=None,
                               trace_label=None):
    """
    Calculate household utilities for each activity pattern alternative for households of hhsize
    The resulting activity pattern for each household will be coded as a string of activity codes.
    e.g. 'MNHH' for a 4 person household with activities Mandatory, NonMandatory, Home, Home

    Parameters
    ----------
    indiv_utils : pandas.DataFrame
        CDAP utilities for each individual, ignoring interactions
        ind_utils has index of _persons_index_ and a column for each alternative
        i.e. three columns 'M' (Mandatory), 'N' (NonMandatory), 'H' (Home)

    interaction_coefficients : pandas.DataFrame
        Rules and coefficients for generating interaction specs for different household sizes

    hhsize : int
        the size of household for which activity perttern should be calculated (1..MAX_HHSIZE)

    Returns
    -------
    choices : pandas.Series
        the chosen cdap activity pattern for each household represented as a string (e.g. 'MNH')
        with same index (_hh_index_) as utils

    """

    if hhsize == 1:
        # for 1 person households, there are no interactions to account for
        # and the household utils are the same as the individual utils
        choosers = vars = None
        # extract the individual utilities for individuals from hhsize 1 households
        utils = indiv_utils.loc[indiv_utils[_hh_size_] == 1,
                                [_hh_id_, 'M', 'N', 'H']]
        # index on household_id, not person_id
        set_hh_index(utils)
    else:

        choosers = hh_choosers(indiv_utils, hhsize=hhsize)

        spec = build_cdap_spec(interaction_coefficients,
                               hhsize,
                               trace_spec=(trace_hh_id in choosers.index),
                               trace_label=trace_label)

        vars = eval_variables(spec.index, choosers)

        utils = vars.dot(spec).astype('float')

    if len(utils.index) == 0:
        return pd.Series()

    probs = logit.utils_to_probs(utils, trace_label=trace_label)

    # select an activity pattern alternative for each household based on probability
    # result is a series indexed on _hh_index_ with the (0 based) index of the column from probs
    idx_choices, rands = logit.make_choices(probs, trace_label=trace_label)

    # convert choice expressed as index into alternative name from util column label
    choices = pd.Series(utils.columns[idx_choices].values, index=utils.index)

    # if DUMP:
    #
    #     if hhsize > 1:
    #         tracing.trace_df(choosers, '%s.DUMP.hhsize%d_choosers' % (trace_label, hhsize),
    #                          transpose=False, slicer='NONE')
    #         tracing.trace_df(vars, '%s.DUMP.hhsize%d_vars' % (trace_label, hhsize),
    #                          transpose=False, slicer='NONE')
    #
    #     tracing.trace_df(utils, '%s.DUMP.hhsize%d_utils' % (trace_label, hhsize),
    #                      transpose=False, slicer='NONE')
    #
    #     tracing.trace_df(probs, '%s.DUMP.hhsize%d_probs' % (trace_label, hhsize),
    #                      transpose=False, slicer='NONE')
    #
    #     tracing.trace_df(choices, '%s.DUMP.hhsize%d_activity_choices' % (trace_label, hhsize),
    #                      transpose=False, slicer='NONE')

    if trace_hh_id:

        if hhsize > 1:
            tracing.trace_df(choosers,
                             '%s.hhsize%d_choosers' % (trace_label, hhsize),
                             column_labels=['expression', 'person'])
            tracing.trace_df(vars,
                             '%s.hhsize%d_vars' % (trace_label, hhsize),
                             column_labels=['expression', 'person'])

        tracing.trace_df(utils,
                         '%s.hhsize%d_utils' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(probs,
                         '%s.hhsize%d_probs' % (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(choices,
                         '%s.hhsize%d_activity_choices' %
                         (trace_label, hhsize),
                         column_labels=['expression', 'household'])
        tracing.trace_df(rands,
                         '%s.hhsize%d_rands' % (trace_label, hhsize),
                         columns=[None, 'rand'])

    return choices