예제 #1
0
def eval_nl_logsums(choosers, spec, nest_spec, locals_d, trace_label=None):
    """
    like eval_nl except return logsums instead of making choices

    Returns
    -------
    logsums : pandas.Series
        Index will be that of `choosers`, values will be nest logsum
        based on spec column values
    """

    trace_label = tracing.extend_trace_label(trace_label, 'nl_logsums')
    check_for_variability = tracing.check_for_variability()

    # logger.debug("running eval_nl_logsums")
    t0 = tracing.print_elapsed_time()

    # column names of expression_values match spec index values
    expression_values = eval_variables(spec.index, choosers, locals_d)
    t0 = tracing.print_elapsed_time("eval_variables", t0, debug=True)

    if check_for_variability:
        _check_for_variability(expression_values, trace_label)
        t0 = tracing.print_elapsed_time("_check_for_variability",
                                        t0,
                                        debug=True)

    # raw utilities of all the leaves
    raw_utilities = compute_utilities(expression_values, spec)
    t0 = tracing.print_elapsed_time("expression_values.dot", t0, debug=True)

    # exponentiated utilities of leaves and nests
    nested_exp_utilities = compute_nested_exp_utilities(
        raw_utilities, nest_spec)
    t0 = tracing.print_elapsed_time("compute_nested_exp_utilities",
                                    t0,
                                    debug=True)

    logsums = np.log(nested_exp_utilities.root)
    logsums = pd.Series(logsums, index=choosers.index)
    t0 = tracing.print_elapsed_time("logsums", t0, debug=True)

    if trace_label:
        # add logsum to nested_exp_utilities for tracing
        nested_exp_utilities['logsum'] = logsums

        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(raw_utilities,
                         '%s.raw_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_exp_utilities,
                         '%s.nested_exp_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(logsums,
                         '%s.logsums' % trace_label,
                         column_labels=['alternative', 'logsum'])

    return logsums
예제 #2
0
def asim_households(asim_store, households_sample_size, trace_hh_id):

    df_full = asim_store["households"]

    # if we are tracing hh exclusively
    if trace_hh_id and households_sample_size == 1:

        # df contains only trace_hh (or empty if not in full store)
        df = tracing.slice_ids(df_full, trace_hh_id)

    # if we need sample a subset of full store
    elif households_sample_size > 0 and \
            len(df_full.index) > households_sample_size:

        # take the requested random sample
        df = asim_simulate.random_rows(df_full, households_sample_size)

        # if tracing and we missed trace_hh in sample, but it is in full store
        if trace_hh_id and trace_hh_id not in df.index and \
                trace_hh_id in df_full.index:
            # replace first hh in sample with trace_hh
            print(
                "replacing household %s with %s in household sample" %
                (df.index[0], trace_hh_id))
            df_hh = tracing.slice_ids(df_full, trace_hh_id)
            df = pd.concat([df_hh, df[1:]])

    else:
        df = df_full

    print("loaded households %s" % (df.shape,))

    # replace table function with dataframe
    orca.add_table('asim_households', df)

    asim_utils.get_rn_generator().add_channel(df, 'asim_households')

    if trace_hh_id:
        tracing.register_traceable_table('asim_households', df)
        tracing.trace_df(df, "asim_households", warn_if_empty=True)

    return df
예제 #3
0
def asim_persons(asim_store, households_sample_size, asim_households,
                 trace_hh_id):

    df = asim_store["persons"]

    if households_sample_size > 0:
        # keep all persons in the sampled households
        df = df[df.household_id.isin(asim_households.index)]

    print("loaded asim asim_persons %s" % (df.shape,))

    # replace table function with dataframe
    orca.add_table('asim_persons', df)

    asim_utils.get_rn_generator().add_channel(df, 'asim_persons')

    if trace_hh_id:
        tracing.register_traceable_table('asim_persons', df)
        tracing.trace_df(df, "asim_persons", warn_if_empty=True)

    return df
예제 #4
0
def eval_mnl_logsums(choosers, spec, locals_d, trace_label=None):
    """
    like eval_nl except return logsums instead of making choices

    Returns
    -------
    logsums : pandas.Series
        Index will be that of `choosers`, values will be
        logsum across spec column values
    """

    trace_label = tracing.extend_trace_label(trace_label, 'mnl')
    check_for_variability = tracing.check_for_variability()

    print("running eval_mnl_logsums")

    expression_values = eval_variables(spec.index, choosers, locals_d)

    if check_for_variability:
        _check_for_variability(expression_values, trace_label)

    # utility values
    utilities = compute_utilities(expression_values, spec)

    # logsum is log of exponentiated utilities summed across
    # columns of each chooser row
    utils_arr = utilities.as_matrix().astype('float')
    logsums = np.log(np.exp(utils_arr).sum(axis=1))
    logsums = pd.Series(logsums, index=choosers.index)

    if trace_label:
        # add logsum to utilities for tracing
        utilities['logsum'] = logsums

        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(utilities,
                         '%s.utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(logsums,
                         '%s.logsums' % trace_label,
                         column_labels=['alternative', 'logsum'])
        tracing.trace_df(expression_values,
                         '%s.expression_values' % trace_label,
                         column_labels=['expression', None])

    return logsums
예제 #5
0
def compute_logsums(choosers, logsum_spec, logsum_settings, skim_dict,
                    skim_stack, alt_col_name, chunk_size, trace_hh_id,
                    trace_label):
    """

    Parameters
    ----------
    choosers
    logsum_spec
    logsum_settings
    skim_dict
    skim_stack
    alt_col_name
    chunk_size
    trace_hh_id
    trace_label

    Returns
    -------
    logsums: pandas series
        computed logsums with same index as choosers
    """

    trace_label = tracing.extend_trace_label(trace_label, 'compute_logsums')

    nest_spec = get_logit_model_settings(logsum_settings)
    constants = get_model_constants(logsum_settings)

    print("Running compute_logsums with %d choosers" % len(choosers.index))

    if trace_hh_id:
        tracing.trace_df(logsum_spec,
                         tracing.extend_trace_label(trace_label, 'spec'),
                         slicer='NONE',
                         transpose=False)

    # setup skim keys
    odt_skim_stack_wrapper = skim_stack.wrap(left_key='TAZ',
                                             right_key=alt_col_name,
                                             skim_key="out_period")
    dot_skim_stack_wrapper = skim_stack.wrap(left_key=alt_col_name,
                                             right_key='TAZ',
                                             skim_key="in_period")
    od_skim_stack_wrapper = skim_dict.wrap('TAZ', alt_col_name)

    skims = [
        odt_skim_stack_wrapper, dot_skim_stack_wrapper, od_skim_stack_wrapper
    ]

    locals_d = {
        "odt_skims": odt_skim_stack_wrapper,
        "dot_skims": dot_skim_stack_wrapper,
        "od_skims": od_skim_stack_wrapper
    }
    if constants is not None:
        locals_d.update(constants)

    logsums = asim_simulate.simple_simulate_logsums(choosers,
                                                    logsum_spec,
                                                    nest_spec,
                                                    skims=skims,
                                                    locals_d=locals_d,
                                                    chunk_size=chunk_size,
                                                    trace_label=trace_label)

    return logsums
예제 #6
0
def _interaction_simulate(choosers,
                          alternatives,
                          spec,
                          skims=None,
                          locals_d=None,
                          sample_size=None,
                          trace_label=None,
                          trace_choice_name=None):
    """
    Run a MNL simulation in the situation in which alternatives must
    be merged with choosers because there are interaction terms or
    because alternatives are being sampled.

    Parameters are same as for public function interaction_simulate

    spec : dataframe
        one row per spec expression and one col with utility coefficient

    interaction_df : dataframe
        cross join (cartesian product) of choosers with alternatives
        combines columns of choosers and alternatives
        len(df) == len(choosers) * len(alternatives)
        index values (non-unique) are index values from alternatives df

    interaction_utilities : dataframe
        the utility of each alternative is sum of the partial utilities determined by the
        various spec expressions and their corresponding coefficients
        yielding a dataframe  with len(interaction_df) rows and one utility column
        having the same index as interaction_df (non-unique values from alternatives df)

    utilities : dataframe
        dot product of model_design.dot(spec)
        yields utility value for element in the cross product of choosers and alternatives
        this is then reshaped as a dataframe with one row per chooser and one column per alternative

    probs : dataframe
        utilities exponentiated and converted to probabilities
        same shape as utilities, one row per chooser and one column for alternative

    positions : series
        choices among alternatives with the chosen alternative represented
        as the integer index of the selected alternative column in probs

    choices : series
        series with the alternative chosen for each chooser
        the index is same as choosers
        and the series value is the alternative df index of chosen alternative

    Returns
    -------
    ret : pandas.Series
        A series where index should match the index of the choosers DataFrame
        and values will match the index of the alternatives DataFrame -
        choices are simulated in the standard Monte Carlo fashion
    """

    trace_label = tracing.extend_trace_label(trace_label,
                                             'interaction_simulate')
    have_trace_targets = trace_label and tracing.has_trace_targets(choosers)

    if have_trace_targets:
        tracing.trace_df(choosers,
                         tracing.extend_trace_label(trace_label, 'choosers'))
        tracing.trace_df(alternatives,
                         tracing.extend_trace_label(trace_label,
                                                    'alternatives'),
                         slicer='NONE',
                         transpose=False)

    if len(spec.columns) > 1:
        raise RuntimeError('spec must have only one column')

    sample_size = sample_size or len(alternatives)

    if sample_size > len(alternatives):
        logger.warn("clipping sample size %s to len(alternatives) %s" %
                    (sample_size, len(alternatives)))
        sample_size = min(sample_size, len(alternatives))

    # if using skims, copy index into the dataframe, so it will be
    # available as the "destination" for the skims dereference below
    if skims:
        alternatives[alternatives.index.name] = alternatives.index

    # cross join choosers and alternatives (cartesian product)
    # for every chooser, there will be a row for each alternative
    # index values (non-unique) are from alternatives df
    interaction_df = interaction_dataset(choosers, alternatives, sample_size)

    if skims:
        add_skims(interaction_df, skims)

    # evaluate expressions from the spec multiply by coefficients and sum
    # spec is df with one row per spec expression and one col with utility coefficient
    # column names of model_design match spec index values
    # utilities has utility value for element in the cross product of choosers and alternatives
    # interaction_utilities is a df with one utility column and one row per row in model_design
    if have_trace_targets:
        trace_rows, trace_ids = tracing.interaction_trace_rows(
            interaction_df, choosers)

        tracing.trace_df(interaction_df[trace_rows],
                         tracing.extend_trace_label(trace_label,
                                                    'interaction_df'),
                         slicer='NONE',
                         transpose=False)
    else:
        trace_rows = trace_ids = None

    interaction_utilities, trace_eval_results \
        = eval_interaction_utilities(spec, interaction_df, locals_d, trace_label, trace_rows)

    if have_trace_targets:
        tracing.trace_interaction_eval_results(
            trace_eval_results, trace_ids,
            tracing.extend_trace_label(trace_label, 'eval'))

        tracing.trace_df(interaction_utilities[trace_rows],
                         tracing.extend_trace_label(trace_label,
                                                    'interaction_utilities'),
                         slicer='NONE',
                         transpose=False)

    # reshape utilities (one utility column and one row per row in model_design)
    # to a dataframe with one row per chooser and one column per alternative
    utilities = pd.DataFrame(interaction_utilities.as_matrix().reshape(
        len(choosers), sample_size),
                             index=choosers.index)

    if have_trace_targets:
        tracing.trace_df(utilities,
                         tracing.extend_trace_label(trace_label, 'utilities'),
                         column_labels=['alternative', 'utility'])

    # tracing.trace_df(utilities, '%s.DUMP.utilities' % trace_label, transpose=False, slicer='NONE')

    # convert to probabilities (utilities exponentiated and normalized to probs)
    # probs is same shape as utilities, one row per chooser and one column for alternative
    probs = utils_to_probs(utilities,
                           trace_label=trace_label,
                           trace_choosers=choosers)

    if have_trace_targets:
        tracing.trace_df(probs,
                         tracing.extend_trace_label(trace_label, 'probs'),
                         column_labels=['alternative', 'probability'])

    # make choices
    # positions is series with the chosen alternative represented as a column index in probs
    # which is an integer between zero and num alternatives in the alternative sample
    positions = make_choices(probs,
                             trace_label=trace_label,
                             trace_choosers=choosers)

    # need to get from an integer offset into the alternative sample to the alternative index
    # that is, we want the index value of the row that is offset by <position> rows into the
    # tranche of this choosers alternatives created by cross join of alternatives and choosers

    # offsets is the offset into model_design df of first row of chooser alternatives
    offsets = np.arange(len(positions)) * sample_size
    # resulting pandas Int64Index has one element per chooser row and is in same order as choosers
    choices = interaction_utilities.index.take(positions + offsets)

    # create a series with index from choosers and the index of the chosen alternative
    choices = pd.Series(choices, index=choosers.index)

    if have_trace_targets:
        tracing.trace_df(choices,
                         tracing.extend_trace_label(trace_label, 'choices'),
                         columns=[None, trace_choice_name])

    #
    # if have_trace_targets:
    #     tracing.trace_df(choosers, '%s.choosers' % trace_label)
    #     tracing.trace_df(utilities, '%s.utilities' % trace_label,
    #                      column_labels=['alternative', 'utility'])
    #     tracing.trace_df(probs, '%s.probs' % trace_label,
    #                      column_labels=['alternative', 'probability'])
    #     tracing.trace_df(choices, '%s.choices' % trace_label,
    #                      columns=[None, trace_choice_name])
    #     tracing.trace_interaction_eval_results(trace_eval_results, trace_ids,
    #                                            '%s.eval' % trace_label)

    return choices
예제 #7
0
def eval_nl(choosers,
            spec,
            nest_spec,
            locals_d=None,
            trace_label=None,
            trace_choice_name=None):
    """
    Run a nested-logit simulation for when the model spec does not involve alternative
    specific data, e.g. there are no interactions with alternative
    properties and no need to sample from alternatives.

    Parameters
    ----------
    choosers : pandas.DataFrame
    spec : pandas.DataFrame
        A table of variable specifications and coefficient values.
        Variable expressions should be in the table index and the table
        should have a column for each alternative.
    nest_spec:
        dictionary specifying nesting structure and nesting coefficients
        (from the model spec yaml file)
    locals_d : Dict
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
    trace_label: str
        This is the label to be used  for trace log file entries and dump file names
        when household tracing enabled. No tracing occurs if label is empty or None.
    trace_choice_name: str
        This is the column label to be used in trace file csv dump of choices

    Returns
    -------
    choices : pandas.Series
        Index will be that of `choosers`, values will match the columns
        of `spec`.
    """

    trace_label = tracing.extend_trace_label(trace_label, 'nl')
    check_for_variability = tracing.check_for_variability()

    # column names of model_design match spec index values
    model_design = eval_variables(spec.index, choosers, locals_d)

    if check_for_variability:
        _check_for_variability(model_design, trace_label)

    # raw utilities of all the leaves

    # matrix product of spec expression evals with utility coefficients of alternatives
    # sums the partial utilities (represented by each spec row) of the alternatives
    # resulting in a dataframe with one row per chooser and one column per alternative
    # pandas dot matrix-multiply depends on column names of model_design matching spec index values
    raw_utilities = model_design.dot(spec)

    # exponentiated utilities of leaves and nests
    nested_exp_utilities = compute_nested_exp_utilities(
        raw_utilities, nest_spec)

    # probabilities of alternatives relative to siblings sharing the same nest
    nested_probabilities = compute_nested_probabilities(
        nested_exp_utilities, nest_spec, trace_label=trace_label)

    # global (flattened) leaf probabilities based on relative nest coefficients
    base_probabilities = compute_base_probabilities(nested_probabilities,
                                                    nest_spec)

    # note base_probabilities could all be zero since we allowed all probs for nests to be zero
    # check here to print a clear message but make_choices will raise error if probs don't sum to 1
    BAD_PROB_THRESHOLD = 0.001
    no_choices = \
        base_probabilities.sum(axis=1).sub(np.ones(len(base_probabilities.index))).abs() \
        > BAD_PROB_THRESHOLD * np.ones(len(base_probabilities.index))

    if no_choices.any():
        report_bad_choices(no_choices,
                           base_probabilities,
                           tracing.extend_trace_label(trace_label, 'eval_nl'),
                           tag='bad_probs',
                           msg="base_probabilities all zero")

    choices = make_choices(base_probabilities,
                           trace_label,
                           trace_choosers=choosers)

    if trace_label:
        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(raw_utilities,
                         '%s.raw_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_exp_utilities,
                         '%s.nested_exp_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_probabilities,
                         '%s.nested_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(base_probabilities,
                         '%s.base_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, trace_choice_name])
        tracing.trace_df(model_design,
                         '%s.model_design' % trace_label,
                         column_labels=['expression', None])

        # dump whole df - for software development debugging
        # tracing.trace_df(raw_utilities, "%s.raw_utilities" % trace_label,
        #                  slicer='NONE', transpose=False)
        # tracing.trace_df(nested_exp_utilities, "%s.nested_exp_utilities" % trace_label,
        #                  slicer='NONE', transpose=False)
        # tracing.trace_df(nested_probabilities, "%s.nested_probabilities" % trace_label,
        #                  slicer='NONE', transpose=False)
        # tracing.trace_df(base_probabilities, "%s.base_probabilities" % trace_label,
        #                  slicer='NONE', transpose=False)
        # tracing.trace_df(unnested_probabilities, "%s.unnested_probabilities" % trace_label,
        #                  slicer='NONE', transpose=False)

    return choices
예제 #8
0
def eval_mnl(choosers,
             spec,
             locals_d=None,
             trace_label=None,
             trace_choice_name=None):
    """
    Run a simulation for when the model spec does not involve alternative
    specific data, e.g. there are no interactions with alternative
    properties and no need to sample from alternatives.

    Each row in spec computes a partial utility for each alternative,
    by providing a spec expression (often a boolean 0-1 trigger)
    and a column of utility coefficients for each alternative.

    We compute the utility of each alternative by matrix-multiplication of eval results
    with the utility coefficients in the spec alternative columns
    yielding one row per chooser and one column per alternative

    Parameters
    ----------
    choosers : pandas.DataFrame
    spec : pandas.DataFrame
        A table of variable specifications and coefficient values.
        Variable expressions should be in the table index and the table
        should have a column for each alternative.
    locals_d : Dict
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
    trace_label: str
        This is the label to be used  for trace log file entries and dump file names
        when household tracing enabled. No tracing occurs if label is empty or None.
    trace_choice_name: str
        This is the column label to be used in trace file csv dump of choices

    Returns
    -------
    choices : pandas.Series
        Index will be that of `choosers`, values will match the columns
        of `spec`.
    """

    trace_label = tracing.extend_trace_label(trace_label, 'mnl')
    check_for_variability = tracing.check_for_variability()

    model_design = eval_variables(spec.index, choosers, locals_d)

    if check_for_variability:
        _check_for_variability(model_design, trace_label)

    # matrix product of spec expression evals with utility coefficients of alternatives
    # sums the partial utilities (represented by each spec row) of the alternatives
    # resulting in a dataframe with one row per chooser and one column per alternative
    # pandas dot matrix-multiply depends on column names of model_design matching spec index values

    utilities = model_design.dot(spec)

    probs = utils_to_probs(utilities,
                           trace_label=trace_label,
                           trace_choosers=choosers)
    choices = make_choices(probs,
                           trace_label=trace_label,
                           trace_choosers=choosers)

    if trace_label:

        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(utilities,
                         '%s.utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(probs,
                         '%s.probs' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, trace_choice_name])
        tracing.trace_df(model_design,
                         '%s.model_design' % trace_label,
                         column_labels=['expression', None])

    return choices
예제 #9
0
def eval_nl(choosers,
            spec,
            nest_spec,
            locals_d,
            trace_label=None,
            trace_choice_name=None):
    """
    Run a nested-logit simulation for when the model spec does not
    involve alternative
    specific data, e.g. there are no interactions with alternative
    properties and no need to sample from alternatives.

    Parameters
    ----------
    choosers : pandas.DataFrame
    spec : pandas.DataFrame
        A table of variable specifications and coefficient values.
        Variable expressions should be in the table index and the table
        should have a column for each alternative.
    nest_spec:
        dictionary specifying nesting structure and nesting coefficients
        (from the model spec yaml file)
    locals_d : Dict or None
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
    trace_label: str
        This is the label to be used  for trace log file entries
        and dump file names
        when household tracing enabled. No tracing occurs if
        label is empty or None.
    trace_choice_name: str
        This is the column label to be used in trace file csv dump of choices

    Returns
    -------
    choices : pandas.Series
        Index will be that of `choosers`, values will match the columns
        of `spec`.
    """

    trace_label = tracing.extend_trace_label(trace_label, 'nl')
    check_for_variability = tracing.check_for_variability()

    t0 = tracing.print_elapsed_time()

    # column names of expression_values match spec index values
    expression_values = eval_variables(spec.index, choosers, locals_d)
    t0 = tracing.print_elapsed_time("eval_variables", t0, debug=True)

    if check_for_variability:
        _check_for_variability(expression_values, trace_label)
    t0 = tracing.print_elapsed_time("_check_for_variability", t0, debug=True)

    # raw utilities of all the leaves
    raw_utilities = compute_utilities(expression_values, spec)
    t0 = tracing.print_elapsed_time("expression_values.dot", t0, debug=True)

    # exponentiated utilities of leaves and nests
    nested_exp_utilities = compute_nested_exp_utilities(
        raw_utilities, nest_spec)
    t0 = tracing.print_elapsed_time("compute_nested_exp_utilities",
                                    t0,
                                    debug=True)

    # probabilities of alternatives relative to siblings sharing the same nest
    nested_probabilities = compute_nested_probabilities(
        nested_exp_utilities, nest_spec, trace_label=trace_label)
    t0 = tracing.print_elapsed_time("compute_nested_probabilities",
                                    t0,
                                    debug=True)

    # global (flattened) leaf probabilities based on relative nest coefficients
    base_probabilities = compute_base_probabilities(nested_probabilities,
                                                    nest_spec)
    t0 = tracing.print_elapsed_time("compute_base_probabilities",
                                    t0,
                                    debug=True)

    # note base_probabilities could all be zero since we allowed all probs
    # for nests to be zero check here to print a clear message but
    # make_choices will raise error if probs don't sum to 1
    BAD_PROB_THRESHOLD = 0.001
    no_choices = \
        base_probabilities.sum(axis=1).sub(
            np.ones(len(base_probabilities.index))).abs() \
        > BAD_PROB_THRESHOLD * np.ones(len(base_probabilities.index))

    if no_choices.any():
        logit.report_bad_choices(no_choices,
                                 base_probabilities,
                                 tracing.extend_trace_label(
                                     trace_label, 'eval_nl'),
                                 tag='bad_probs',
                                 msg="base_probabilities all zero")

    t0 = tracing.print_elapsed_time("report_bad_choices", t0, debug=True)

    choices, rands = logit.make_choices(base_probabilities,
                                        trace_label,
                                        trace_choosers=choosers)
    t0 = tracing.print_elapsed_time("logit.make_choices", t0, debug=True)

    if trace_label:
        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(raw_utilities,
                         '%s.raw_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_exp_utilities,
                         '%s.nested_exp_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_probabilities,
                         '%s.nested_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(base_probabilities,
                         '%s.base_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, trace_choice_name])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])
        tracing.trace_df(expression_values,
                         '%s.expression_values' % trace_label,
                         column_labels=['expression', None])

    return choices
예제 #10
0
def _interaction_sample(choosers,
                        alternatives,
                        spec,
                        sample_size,
                        alt_col_name,
                        skims=None,
                        locals_d=None,
                        trace_label=None):
    """
    Run a MNL simulation in the situation in which alternatives must
    be merged with choosers because there are interaction terms or
    because alternatives are being sampled.

    Parameters are same as for public function interaction_simulate

    spec : dataframe
        one row per spec expression and one col with utility coefficient

    interaction_df : dataframe
        cross join (cartesian product) of choosers with alternatives
        combines columns of choosers and alternatives
        len(df) == len(choosers) * len(alternatives)
        index values (non-unique) are index values from alternatives df

    interaction_utilities : dataframe
        the utility of each alternative is sum of the partial
        utilities determined by the various spec expressions and
        their corresponding coefficients yielding a dataframe
        with len(interaction_df) rows and one utility column
        having the same index as interaction_df (non-unique values
        from alternatives df)

    utilities : dataframe
        dot product of model_design.dot(spec)
        yields utility value for element in the cross product of
        choosers and alternatives this is then reshaped as a dataframe
        with one row per chooser and one column per alternative

    probs : dataframe
        utilities exponentiated and converted to probabilities
        same shape as utilities, one row per chooser and one column
        per alternative

    positions : series
        choices among alternatives with the chosen alternative represented
        as the integer index of the selected alternative column in probs

    choices : series
        series with the alternative chosen for each chooser
        the index is same as choosers
        and the series value is the alternative df index of chosen alternative

    Returns
    -------
    choices_df : pandas.DataFrame

        A DataFrame where index should match the index of the choosers
        DataFrame and columns alt_col_name, prob, rand, pick_count

        prob: float
            the probability of the chosen alternative
        rand: float
            the rand that did the choosing
        pick_count : int
            number of duplicate picks for chooser, alt
    """

    trace_label = tracing.extend_trace_label(trace_label,
                                             'interaction_simulate')
    have_trace_targets = trace_label and tracing.has_trace_targets(choosers)

    if alt_col_name is None:
        alt_col_name = 'alt_%s' % alternatives.index.name

    if have_trace_targets:
        tracing.trace_df(choosers,
                         tracing.extend_trace_label(trace_label, 'choosers'))
        tracing.trace_df(alternatives,
                         tracing.extend_trace_label(trace_label,
                                                    'alternatives'),
                         slicer='NONE',
                         transpose=False)

    if len(spec.columns) > 1:
        raise RuntimeError('spec must have only one column')

    alternative_count = len(alternatives)
    # print("_interaction_sample alternative_count %s" % alternative_count)

    # if using skims, copy index into the dataframe, so it will be
    # available as the "destination" for the skims dereference below
    if skims:
        alternatives[alternatives.index.name] = alternatives.index

    # cross join choosers and alternatives (cartesian product)
    # for every chooser, there will be a row for each alternative
    # index values (non-unique) are from alternatives df
    interaction_df = logit.interaction_dataset(choosers, alternatives,
                                               alternative_count)

    assert alternative_count == len(interaction_df.index) / len(choosers.index)

    if skims:
        asim_utils.add_skims(interaction_df, skims)

    # evaluate expressions from the spec multiply by coefficients and sum
    # spec is df with one row per spec expression and one col
    # with utility coefficient column names of interaction_df match spec
    # index values utilities has utility value for element in the
    # cross product of choosers and alternatives interaction_utilities is
    # a df with one utility column and one row per row in interaction_df
    if have_trace_targets:
        trace_rows, trace_ids \
            = tracing.interaction_trace_rows(
                interaction_df, choosers, alternative_count)

        tracing.trace_df(interaction_df[trace_rows],
                         tracing.extend_trace_label(trace_label,
                                                    'interaction_df'),
                         slicer='NONE',
                         transpose=False)
    else:
        trace_rows = trace_ids = None

    interaction_utilities, trace_eval_results \
        = eval_interaction_utilities(
            spec, interaction_df, locals_d, trace_label, trace_rows)

    if have_trace_targets:
        tracing.trace_interaction_eval_results(
            trace_eval_results, trace_ids,
            tracing.extend_trace_label(trace_label, 'eval'))

        tracing.trace_df(interaction_utilities[trace_rows],
                         tracing.extend_trace_label(trace_label,
                                                    'interaction_utilities'),
                         slicer='NONE',
                         transpose=False)

    tracing.dump_df(DUMP, interaction_utilities, trace_label,
                    'interaction_utilities')

    # FIXME - do this in numpy, not pandas?
    # reshape utilities (one utility column and one row per
    # row in interaction_utilities) to a dataframe with one
    # row per chooser and one column per alternative
    utilities = pd.DataFrame(interaction_utilities.as_matrix().reshape(
        len(choosers), alternative_count),
                             index=choosers.index)

    if have_trace_targets:
        tracing.trace_df(utilities,
                         tracing.extend_trace_label(trace_label, 'utilities'),
                         column_labels=['alternative', 'utility'])

    tracing.dump_df(DUMP, utilities, trace_label, 'utilities')

    # FIXME - do this in numpy, not pandas?
    # convert to probabilities (utilities exponentiated
    # and normalized to probs) probs is same shape as utilities,
    # one row per chooser and one column for alternative
    probs = logit.utils_to_probs(utilities,
                                 trace_label=trace_label,
                                 trace_choosers=choosers)

    if have_trace_targets:
        tracing.trace_df(probs,
                         tracing.extend_trace_label(trace_label, 'probs'),
                         column_labels=['alternative', 'probability'])

    choices_df = make_sample_choices(choosers, probs, interaction_utilities,
                                     sample_size, alternative_count,
                                     alt_col_name, trace_label)

    # make_sample_choices should return choosers index as choices_df column
    assert choosers.index.name in choices_df.columns

    # pick_count and pick_dup
    # pick_count is number of duplicate picks
    # pick_dup flag is True for all but first of duplicates
    pick_group = choices_df.groupby([choosers.index.name, alt_col_name])

    # number each item in each group from 0 to the length of that group - 1.
    choices_df['pick_count'] = pick_group.cumcount(ascending=True)
    # flag duplicate rows after first
    choices_df['pick_dup'] = choices_df['pick_count'] > 0
    # add reverse cumcount to get total pick_count
    # (conveniently faster than groupby.count + merge)
    choices_df['pick_count'] += pick_group.cumcount(ascending=False) + 1

    # drop the duplicates
    choices_df = choices_df[~choices_df['pick_dup']]
    del choices_df['pick_dup']

    # set index after groupby so we can trace on it
    choices_df.set_index(choosers.index.name, inplace=True)

    tracing.dump_df(DUMP, choices_df, trace_label, 'choices_df')

    if have_trace_targets:
        tracing.trace_df(choices_df,
                         tracing.extend_trace_label(trace_label,
                                                    'sampled_alternatives'),
                         transpose=False,
                         column_labels=['sample_alt', 'alternative'])

    return choices_df
예제 #11
0
def workplace_location_simulate(asim_persons_merged,
                                workplace_location_sample,
                                workplace_location_spec,
                                workplace_location_settings,
                                skim_dict,
                                destination_size_terms,
                                chunk_size,
                                trace_hh_id):
    """
    Workplace location model on workplace_location_sample
    annotated with mode_choice logsum to select a work_taz
    from sample alternatives
    """

    # for now I'm going to generate a workplace location for everyone -
    # presumably it will not get used in downstream models for everyone -
    # it should depend on CDAP and mandatory tour generation as to whether
    # it gets used
    choosers = asim_persons_merged.to_frame()

    alt_col_name = workplace_location_settings["ALT_COL_NAME"]

    # alternatives are pre-sampled and annotated with logsums and pick_count
    # but we have to merge additional alt columns into alt sample list
    workplace_location_sample = workplace_location_sample.to_frame()
    destination_size_terms = destination_size_terms.to_frame()
    alternatives = \
        pd.merge(workplace_location_sample, destination_size_terms,
                 left_on=alt_col_name, right_index=True, how="left")

    tracing.dump_df(
        DUMP, alternatives, 'workplace_location_simulate', 'alternatives')

    constants = asim_utils.get_model_constants(workplace_location_settings)

    sample_pool_size = len(destination_size_terms.index)

    print("Running workplace_location_simulate with %d persons" % len(
        choosers))

    # create wrapper with keys for this lookup - in this case there is a
    # TAZ in the choosers and a TAZ in the alternatives which get merged
    # during interaction the skims will be available under the name
    # "skims" for any @ expressions
    skims = skim_dict.wrap("TAZ", alt_col_name)

    locals_d = {
        'skims': skims,
        'sample_pool_size': float(sample_pool_size)
    }
    if constants is not None:
        locals_d.update(constants)

    # FIXME - MEMORY HACK - only include columns actually used in spec
    chooser_columns = workplace_location_settings['SIMULATE_CHOOSER_COLUMNS']
    choosers = choosers[chooser_columns]

    tracing.dump_df(DUMP, choosers, 'workplace_location_simulate', 'choosers')

    choices = interaction_sample_simulate(
        choosers,
        alternatives,
        spec=workplace_location_spec,
        choice_column=alt_col_name,
        skims=skims,
        locals_d=locals_d,
        chunk_size=chunk_size,
        trace_label=trace_hh_id and 'workplace_location',
        trace_choice_name='workplace_location')

    # FIXME - no need to reindex since we didn't slice choosers
    # choices = choices.reindex(persons_merged.index)

    tracing.print_summary('workplace_taz', choices, describe=True)

    orca.add_column("asim_persons", "workplace_taz", choices)

    asim_utils.add_dependent_columns("asim_persons", "persons_workplace")

    if trace_hh_id:
        trace_columns = ['workplace_taz'] + orca.get_table(
            'persons_workplace').columns
        tracing.trace_df(orca.get_table('asim_persons_merged').to_frame(),
                         label="workplace_location",
                         columns=trace_columns,
                         warn_if_empty=True)