Beispiel #1
0
def eval_nl_logsums(choosers, spec, nest_spec, locals_d, trace_label=None):
    """
    like eval_nl except return logsums instead of making choices

    Returns
    -------
    logsums : pandas.Series
        Index will be that of `choosers`, values will be nest logsum
        based on spec column values
    """

    trace_label = tracing.extend_trace_label(trace_label, 'nl_logsums')
    check_for_variability = tracing.check_for_variability()

    # logger.debug("running eval_nl_logsums")
    t0 = tracing.print_elapsed_time()

    # column names of expression_values match spec index values
    expression_values = eval_variables(spec.index, choosers, locals_d)
    t0 = tracing.print_elapsed_time("eval_variables", t0, debug=True)

    if check_for_variability:
        _check_for_variability(expression_values, trace_label)
        t0 = tracing.print_elapsed_time("_check_for_variability",
                                        t0,
                                        debug=True)

    # raw utilities of all the leaves
    raw_utilities = compute_utilities(expression_values, spec)
    t0 = tracing.print_elapsed_time("expression_values.dot", t0, debug=True)

    # exponentiated utilities of leaves and nests
    nested_exp_utilities = compute_nested_exp_utilities(
        raw_utilities, nest_spec)
    t0 = tracing.print_elapsed_time("compute_nested_exp_utilities",
                                    t0,
                                    debug=True)

    logsums = np.log(nested_exp_utilities.root)
    logsums = pd.Series(logsums, index=choosers.index)
    t0 = tracing.print_elapsed_time("logsums", t0, debug=True)

    if trace_label:
        # add logsum to nested_exp_utilities for tracing
        nested_exp_utilities['logsum'] = logsums

        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(raw_utilities,
                         '%s.raw_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_exp_utilities,
                         '%s.nested_exp_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(logsums,
                         '%s.logsums' % trace_label,
                         column_labels=['alternative', 'logsum'])

    return logsums
Beispiel #2
0
def eval_mnl_logsums(choosers, spec, locals_d, trace_label=None):
    """
    like eval_nl except return logsums instead of making choices

    Returns
    -------
    logsums : pandas.Series
        Index will be that of `choosers`, values will be
        logsum across spec column values
    """

    trace_label = tracing.extend_trace_label(trace_label, 'mnl')
    check_for_variability = tracing.check_for_variability()

    print("running eval_mnl_logsums")

    expression_values = eval_variables(spec.index, choosers, locals_d)

    if check_for_variability:
        _check_for_variability(expression_values, trace_label)

    # utility values
    utilities = compute_utilities(expression_values, spec)

    # logsum is log of exponentiated utilities summed across
    # columns of each chooser row
    utils_arr = utilities.as_matrix().astype('float')
    logsums = np.log(np.exp(utils_arr).sum(axis=1))
    logsums = pd.Series(logsums, index=choosers.index)

    if trace_label:
        # add logsum to utilities for tracing
        utilities['logsum'] = logsums

        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(utilities,
                         '%s.utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(logsums,
                         '%s.logsums' % trace_label,
                         column_labels=['alternative', 'logsum'])
        tracing.trace_df(expression_values,
                         '%s.expression_values' % trace_label,
                         column_labels=['expression', None])

    return logsums
Beispiel #3
0
def eval_interaction_utilities(spec, df, locals_d, trace_label, trace_rows):
    """
    Compute the utilities for a single-alternative spec evaluated in the context of df

    We could compute the utilities for interaction datasets just as we do for simple_simulate
    specs with multiple alternative columns byt calling eval_variables and then computing the
    utilities by matrix-multiplication of eval results with the utility coefficients in the
    spec alternative columns.

    But interaction simulate computes the utilities of each alternative in the context of a
    separate row in interaction dataset df, and so there is only one alternative in spec.
    This turns out to be quite a bit faster (in this special case) than the pandas dot function.

    For efficiency, we combine eval_variables and multiplication of coefficients into a single step,
    so we don't have to create a separate column for each partial utility. Instead, we simply
    multiply the eval result by a single alternative coefficient and sum the partial utilities.


    spec : dataframe
        one row per spec expression and one col with utility coefficient

    df : dataframe
        cross join (cartesian product) of choosers with alternatives
        combines columns of choosers and alternatives
        len(df) == len(choosers) * len(alternatives)
        index values (non-unique) are index values from alternatives df

    interaction_utilities : dataframe
        the utility of each alternative is sum of the partial utilities determined by the
        various spec expressions and their corresponding coefficients
        yielding a dataframe  with len(interaction_df) rows and one utility column
        having the same index as interaction_df (non-unique values from alternatives df)

    Returns
    -------
    utilities : pandas.DataFrame
        Will have the index of `df` and a single column of utilities

    """
    assert (len(spec.columns) == 1)

    # avoid altering caller's passed-in locals_d parameter (they may be looping)
    locals_d = locals_d.copy() if locals_d is not None else {}
    locals_d.update(locals())

    def to_series(x):
        if np.isscalar(x):
            return pd.Series([x] * len(df), index=df.index)
        return x

    if trace_rows is not None and trace_rows.any():
        # # convert to numpy array so we can slice ndarrays as well as series
        # trace_rows = np.asanyarray(trace_rows)
        assert type(trace_rows) == np.ndarray
        trace_eval_results = []
    else:
        trace_eval_results = None

    check_for_variability = tracing.check_for_variability()

    # need to be able to identify which variables causes an error, which keeps
    # this from being expressed more parsimoniously

    utilities = pd.DataFrame({'utility': 0.0}, index=df.index)
    no_variability = has_missing_vals = 0

    for expr, coefficient in zip(spec.index, spec.iloc[:, 0]):
        try:

            if expr.startswith('@'):
                v = to_series(eval(expr[1:], globals(), locals_d))
            else:
                v = df.eval(expr)

            if check_for_variability and v.std() == 0:
                logger.info("%s: no variability (%s) in: %s" %
                            (trace_label, v.iloc[0], expr))
                no_variability += 1

            # FIXME - how likely is this to happen? Not sure it is really a problem?
            if check_for_variability and np.count_nonzero(
                    v.isnull().values) > 0:
                logger.info("%s: missing values in: %s" % (trace_label, expr))
                has_missing_vals += 1

            utilities.utility += (v * coefficient).astype('float')

            if trace_eval_results is not None:
                trace_eval_results.append((expr, v[trace_rows]))
                trace_eval_results.append(
                    ('partial utility (coefficient = %s)' % coefficient,
                     v[trace_rows] * coefficient))
                # trace_eval_results.append(('cumulative utility',
                #                            utilities.utility[trace_rows]))

        except Exception as err:
            logger.exception("Variable evaluation failed for: %s" % str(expr))
            raise err

    if no_variability > 0:
        logger.warn("%s: %s columns have no variability" %
                    (trace_label, no_variability))

    if has_missing_vals > 0:
        logger.warn("%s: %s columns have missing values" %
                    (trace_label, has_missing_vals))

    if trace_eval_results is not None:

        trace_eval_results.append(
            ('total utility', utilities.utility[trace_rows]))

        trace_eval_results = pd.DataFrame.from_items(trace_eval_results)
        trace_eval_results.index = df[trace_rows].index

        # add df columns to trace_results
        trace_eval_results = pd.concat([df[trace_rows], trace_eval_results],
                                       axis=1)

    return utilities, trace_eval_results
Beispiel #4
0
def eval_nl(choosers,
            spec,
            nest_spec,
            locals_d=None,
            trace_label=None,
            trace_choice_name=None):
    """
    Run a nested-logit simulation for when the model spec does not involve alternative
    specific data, e.g. there are no interactions with alternative
    properties and no need to sample from alternatives.

    Parameters
    ----------
    choosers : pandas.DataFrame
    spec : pandas.DataFrame
        A table of variable specifications and coefficient values.
        Variable expressions should be in the table index and the table
        should have a column for each alternative.
    nest_spec:
        dictionary specifying nesting structure and nesting coefficients
        (from the model spec yaml file)
    locals_d : Dict
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
    trace_label: str
        This is the label to be used  for trace log file entries and dump file names
        when household tracing enabled. No tracing occurs if label is empty or None.
    trace_choice_name: str
        This is the column label to be used in trace file csv dump of choices

    Returns
    -------
    choices : pandas.Series
        Index will be that of `choosers`, values will match the columns
        of `spec`.
    """

    trace_label = tracing.extend_trace_label(trace_label, 'nl')
    check_for_variability = tracing.check_for_variability()

    # column names of model_design match spec index values
    model_design = eval_variables(spec.index, choosers, locals_d)

    if check_for_variability:
        _check_for_variability(model_design, trace_label)

    # raw utilities of all the leaves

    # matrix product of spec expression evals with utility coefficients of alternatives
    # sums the partial utilities (represented by each spec row) of the alternatives
    # resulting in a dataframe with one row per chooser and one column per alternative
    # pandas dot matrix-multiply depends on column names of model_design matching spec index values
    raw_utilities = model_design.dot(spec)

    # exponentiated utilities of leaves and nests
    nested_exp_utilities = compute_nested_exp_utilities(
        raw_utilities, nest_spec)

    # probabilities of alternatives relative to siblings sharing the same nest
    nested_probabilities = compute_nested_probabilities(
        nested_exp_utilities, nest_spec, trace_label=trace_label)

    # global (flattened) leaf probabilities based on relative nest coefficients
    base_probabilities = compute_base_probabilities(nested_probabilities,
                                                    nest_spec)

    # note base_probabilities could all be zero since we allowed all probs for nests to be zero
    # check here to print a clear message but make_choices will raise error if probs don't sum to 1
    BAD_PROB_THRESHOLD = 0.001
    no_choices = \
        base_probabilities.sum(axis=1).sub(np.ones(len(base_probabilities.index))).abs() \
        > BAD_PROB_THRESHOLD * np.ones(len(base_probabilities.index))

    if no_choices.any():
        report_bad_choices(no_choices,
                           base_probabilities,
                           tracing.extend_trace_label(trace_label, 'eval_nl'),
                           tag='bad_probs',
                           msg="base_probabilities all zero")

    choices = make_choices(base_probabilities,
                           trace_label,
                           trace_choosers=choosers)

    if trace_label:
        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(raw_utilities,
                         '%s.raw_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_exp_utilities,
                         '%s.nested_exp_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_probabilities,
                         '%s.nested_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(base_probabilities,
                         '%s.base_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, trace_choice_name])
        tracing.trace_df(model_design,
                         '%s.model_design' % trace_label,
                         column_labels=['expression', None])

        # dump whole df - for software development debugging
        # tracing.trace_df(raw_utilities, "%s.raw_utilities" % trace_label,
        #                  slicer='NONE', transpose=False)
        # tracing.trace_df(nested_exp_utilities, "%s.nested_exp_utilities" % trace_label,
        #                  slicer='NONE', transpose=False)
        # tracing.trace_df(nested_probabilities, "%s.nested_probabilities" % trace_label,
        #                  slicer='NONE', transpose=False)
        # tracing.trace_df(base_probabilities, "%s.base_probabilities" % trace_label,
        #                  slicer='NONE', transpose=False)
        # tracing.trace_df(unnested_probabilities, "%s.unnested_probabilities" % trace_label,
        #                  slicer='NONE', transpose=False)

    return choices
Beispiel #5
0
def eval_mnl(choosers,
             spec,
             locals_d=None,
             trace_label=None,
             trace_choice_name=None):
    """
    Run a simulation for when the model spec does not involve alternative
    specific data, e.g. there are no interactions with alternative
    properties and no need to sample from alternatives.

    Each row in spec computes a partial utility for each alternative,
    by providing a spec expression (often a boolean 0-1 trigger)
    and a column of utility coefficients for each alternative.

    We compute the utility of each alternative by matrix-multiplication of eval results
    with the utility coefficients in the spec alternative columns
    yielding one row per chooser and one column per alternative

    Parameters
    ----------
    choosers : pandas.DataFrame
    spec : pandas.DataFrame
        A table of variable specifications and coefficient values.
        Variable expressions should be in the table index and the table
        should have a column for each alternative.
    locals_d : Dict
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
    trace_label: str
        This is the label to be used  for trace log file entries and dump file names
        when household tracing enabled. No tracing occurs if label is empty or None.
    trace_choice_name: str
        This is the column label to be used in trace file csv dump of choices

    Returns
    -------
    choices : pandas.Series
        Index will be that of `choosers`, values will match the columns
        of `spec`.
    """

    trace_label = tracing.extend_trace_label(trace_label, 'mnl')
    check_for_variability = tracing.check_for_variability()

    model_design = eval_variables(spec.index, choosers, locals_d)

    if check_for_variability:
        _check_for_variability(model_design, trace_label)

    # matrix product of spec expression evals with utility coefficients of alternatives
    # sums the partial utilities (represented by each spec row) of the alternatives
    # resulting in a dataframe with one row per chooser and one column per alternative
    # pandas dot matrix-multiply depends on column names of model_design matching spec index values

    utilities = model_design.dot(spec)

    probs = utils_to_probs(utilities,
                           trace_label=trace_label,
                           trace_choosers=choosers)
    choices = make_choices(probs,
                           trace_label=trace_label,
                           trace_choosers=choosers)

    if trace_label:

        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(utilities,
                         '%s.utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(probs,
                         '%s.probs' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, trace_choice_name])
        tracing.trace_df(model_design,
                         '%s.model_design' % trace_label,
                         column_labels=['expression', None])

    return choices
Beispiel #6
0
def eval_nl(choosers,
            spec,
            nest_spec,
            locals_d,
            trace_label=None,
            trace_choice_name=None):
    """
    Run a nested-logit simulation for when the model spec does not
    involve alternative
    specific data, e.g. there are no interactions with alternative
    properties and no need to sample from alternatives.

    Parameters
    ----------
    choosers : pandas.DataFrame
    spec : pandas.DataFrame
        A table of variable specifications and coefficient values.
        Variable expressions should be in the table index and the table
        should have a column for each alternative.
    nest_spec:
        dictionary specifying nesting structure and nesting coefficients
        (from the model spec yaml file)
    locals_d : Dict or None
        This is a dictionary of local variables that will be the environment
        for an evaluation of an expression that begins with @
    trace_label: str
        This is the label to be used  for trace log file entries
        and dump file names
        when household tracing enabled. No tracing occurs if
        label is empty or None.
    trace_choice_name: str
        This is the column label to be used in trace file csv dump of choices

    Returns
    -------
    choices : pandas.Series
        Index will be that of `choosers`, values will match the columns
        of `spec`.
    """

    trace_label = tracing.extend_trace_label(trace_label, 'nl')
    check_for_variability = tracing.check_for_variability()

    t0 = tracing.print_elapsed_time()

    # column names of expression_values match spec index values
    expression_values = eval_variables(spec.index, choosers, locals_d)
    t0 = tracing.print_elapsed_time("eval_variables", t0, debug=True)

    if check_for_variability:
        _check_for_variability(expression_values, trace_label)
    t0 = tracing.print_elapsed_time("_check_for_variability", t0, debug=True)

    # raw utilities of all the leaves
    raw_utilities = compute_utilities(expression_values, spec)
    t0 = tracing.print_elapsed_time("expression_values.dot", t0, debug=True)

    # exponentiated utilities of leaves and nests
    nested_exp_utilities = compute_nested_exp_utilities(
        raw_utilities, nest_spec)
    t0 = tracing.print_elapsed_time("compute_nested_exp_utilities",
                                    t0,
                                    debug=True)

    # probabilities of alternatives relative to siblings sharing the same nest
    nested_probabilities = compute_nested_probabilities(
        nested_exp_utilities, nest_spec, trace_label=trace_label)
    t0 = tracing.print_elapsed_time("compute_nested_probabilities",
                                    t0,
                                    debug=True)

    # global (flattened) leaf probabilities based on relative nest coefficients
    base_probabilities = compute_base_probabilities(nested_probabilities,
                                                    nest_spec)
    t0 = tracing.print_elapsed_time("compute_base_probabilities",
                                    t0,
                                    debug=True)

    # note base_probabilities could all be zero since we allowed all probs
    # for nests to be zero check here to print a clear message but
    # make_choices will raise error if probs don't sum to 1
    BAD_PROB_THRESHOLD = 0.001
    no_choices = \
        base_probabilities.sum(axis=1).sub(
            np.ones(len(base_probabilities.index))).abs() \
        > BAD_PROB_THRESHOLD * np.ones(len(base_probabilities.index))

    if no_choices.any():
        logit.report_bad_choices(no_choices,
                                 base_probabilities,
                                 tracing.extend_trace_label(
                                     trace_label, 'eval_nl'),
                                 tag='bad_probs',
                                 msg="base_probabilities all zero")

    t0 = tracing.print_elapsed_time("report_bad_choices", t0, debug=True)

    choices, rands = logit.make_choices(base_probabilities,
                                        trace_label,
                                        trace_choosers=choosers)
    t0 = tracing.print_elapsed_time("logit.make_choices", t0, debug=True)

    if trace_label:
        tracing.trace_df(choosers, '%s.choosers' % trace_label)
        tracing.trace_df(raw_utilities,
                         '%s.raw_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_exp_utilities,
                         '%s.nested_exp_utilities' % trace_label,
                         column_labels=['alternative', 'utility'])
        tracing.trace_df(nested_probabilities,
                         '%s.nested_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(base_probabilities,
                         '%s.base_probabilities' % trace_label,
                         column_labels=['alternative', 'probability'])
        tracing.trace_df(choices,
                         '%s.choices' % trace_label,
                         columns=[None, trace_choice_name])
        tracing.trace_df(rands,
                         '%s.rands' % trace_label,
                         columns=[None, 'rand'])
        tracing.trace_df(expression_values,
                         '%s.expression_values' % trace_label,
                         column_labels=['expression', None])

    return choices